mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
Merge branch 'main' into bb/gui
This commit is contained in:
commit
747caa74f0
139 changed files with 13565 additions and 816 deletions
14
.env.example
14
.env.example
|
|
@ -273,6 +273,20 @@ BROWSER_SESSION_TIMEOUT=300
|
|||
# Browser sessions are automatically closed after this period of no activity
|
||||
BROWSER_INACTIVITY_TIMEOUT=120
|
||||
|
||||
# Camofox local anti-detection browser (Camoufox-based Firefox).
|
||||
# Set CAMOFOX_URL to route the browser tools through a local Camofox server
|
||||
# instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md.
|
||||
# CAMOFOX_URL=http://localhost:9377
|
||||
|
||||
# Externally managed Camofox sessions — when another app owns the visible
|
||||
# Camofox browser, set these so Hermes shares the same userId/profile instead
|
||||
# of creating its own isolated session.
|
||||
# CAMOFOX_USER_ID=
|
||||
# CAMOFOX_SESSION_KEY=
|
||||
# Set to true to reuse an already-open Camofox tab for this identity before
|
||||
# creating a new one (useful for gateway restarts).
|
||||
# CAMOFOX_ADOPT_EXISTING_TAB=false
|
||||
|
||||
# =============================================================================
|
||||
# SESSION LOGGING
|
||||
# =============================================================================
|
||||
|
|
|
|||
5
.github/workflows/tests.yml
vendored
5
.github/workflows/tests.yml
vendored
|
|
@ -55,11 +55,14 @@ jobs:
|
|||
|
||||
e2e:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update && sudo apt-get install -y ripgrep
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,14 @@ def _get_anthropic_sdk():
|
|||
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
|
||||
global _anthropic_sdk
|
||||
if _anthropic_sdk is ...:
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("provider.anthropic", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
# FeatureUnavailable — fall through to ImportError handling below
|
||||
pass
|
||||
try:
|
||||
import anthropic as _sdk
|
||||
_anthropic_sdk = _sdk
|
||||
|
|
|
|||
|
|
@ -382,7 +382,7 @@ _AI_GATEWAY_HEADERS = {
|
|||
# Nous Portal extra_body for product attribution.
|
||||
# Callers should pass this as extra_body in chat.completions.create()
|
||||
# when the auxiliary client is backed by Nous Portal.
|
||||
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent"]}
|
||||
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
|
||||
|
||||
# Set at resolve time — True if the auxiliary client points to Nous Portal
|
||||
auxiliary_is_nous: bool = False
|
||||
|
|
@ -3828,7 +3828,7 @@ def _resolve_task_provider_model(
|
|||
# (e.g. OPENROUTER_API_KEY) instead of locking into "custom".
|
||||
return cfg_provider, resolved_model, cfg_base_url, None, resolved_api_mode
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, resolved_model, None, None, resolved_api_mode
|
||||
return cfg_provider, resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
|
||||
|
||||
return "auto", resolved_model, None, None, resolved_api_mode
|
||||
|
||||
|
|
@ -4026,7 +4026,7 @@ def _build_call_kwargs(
|
|||
# Provider-specific extra_body
|
||||
merged_extra = dict(extra_body or {})
|
||||
if provider == "nous" or auxiliary_is_nous:
|
||||
merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
|
||||
merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
|
||||
if merged_extra:
|
||||
kwargs["extra_body"] = merged_extra
|
||||
|
||||
|
|
|
|||
106
agent/lsp/__init__.py
Normal file
106
agent/lsp/__init__.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
"""Language Server Protocol (LSP) integration for Hermes Agent.
|
||||
|
||||
Hermes runs full language servers (pyright, gopls, rust-analyzer,
|
||||
typescript-language-server, etc.) as subprocesses and pipes their
|
||||
``textDocument/publishDiagnostics`` output into the post-write lint
|
||||
delta filter used by ``write_file`` and ``patch``.
|
||||
|
||||
LSP is **gated on git workspace detection** — if the agent's cwd is
|
||||
inside a git repository, LSP runs against that workspace; otherwise the
|
||||
file_operations layer falls back to its existing in-process syntax
|
||||
checks. This keeps users on user-home cwd's (e.g. Telegram gateway
|
||||
chats) from spawning daemons they don't need.
|
||||
|
||||
Public API:
|
||||
|
||||
from agent.lsp import get_service
|
||||
|
||||
svc = get_service()
|
||||
if svc and svc.enabled_for(path):
|
||||
await svc.touch_file(path)
|
||||
diags = svc.diagnostics_for(path)
|
||||
|
||||
The bulk of the wiring is internal — most callers only need the layer
|
||||
in :func:`tools.file_operations.FileOperations._check_lint_delta`,
|
||||
which is already wired (see that module).
|
||||
|
||||
Architecture is documented in ``website/docs/user-guide/features/lsp.md``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from agent.lsp.manager import LSPService
|
||||
|
||||
logger = logging.getLogger("agent.lsp")
|
||||
|
||||
_service: Optional[LSPService] = None
|
||||
_atexit_registered = False
|
||||
_service_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_service() -> Optional[LSPService]:
|
||||
"""Return the process-wide LSP service singleton, or None when disabled.
|
||||
|
||||
The service is created lazily on first call. ``None`` is returned
|
||||
when LSP is disabled in config, when no workspace can be detected,
|
||||
or when the platform doesn't support subprocess-based LSP servers.
|
||||
|
||||
On first creation, registers an :mod:`atexit` handler that tears
|
||||
down spawned language servers on Python exit so a long-running
|
||||
CLI or gateway session doesn't leak pyright/gopls/etc. processes
|
||||
when it terminates.
|
||||
"""
|
||||
global _service, _atexit_registered
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
with _service_lock:
|
||||
if _service is not None:
|
||||
return _service if _service.is_active() else None
|
||||
_service = LSPService.create_from_config()
|
||||
if not _atexit_registered:
|
||||
# ``atexit`` handlers run in LIFO order on normal Python
|
||||
# exit and on SystemExit, but NOT on os._exit() or
|
||||
# uncaught signals. Language servers are stateless
|
||||
# subprocesses — losing them on SIGKILL is fine; they'll
|
||||
# be reaped by the kernel along with their parent. We
|
||||
# care about clean exits where Python flushes stdio
|
||||
# before terminating; without this hook every
|
||||
# ``hermes chat`` exit would leak pyright processes that
|
||||
# outlive the parent for a few seconds while their
|
||||
# stdout buffers drain.
|
||||
atexit.register(_atexit_shutdown)
|
||||
_atexit_registered = True
|
||||
return _service if (_service is not None and _service.is_active()) else None
|
||||
|
||||
|
||||
def shutdown_service() -> None:
|
||||
"""Tear down the LSP service if one was started.
|
||||
|
||||
Safe to call multiple times; safe to call when no service was created.
|
||||
"""
|
||||
global _service
|
||||
with _service_lock:
|
||||
svc = _service
|
||||
_service = None
|
||||
if svc is not None:
|
||||
try:
|
||||
svc.shutdown()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
|
||||
|
||||
def _atexit_shutdown() -> None:
|
||||
"""atexit-registered wrapper. Logs at debug because by the time
|
||||
atexit fires the user has already seen the agent's final output —
|
||||
a noisy shutdown line on top of that is just clutter."""
|
||||
try:
|
||||
shutdown_service()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("atexit LSP shutdown failed: %s", e)
|
||||
|
||||
|
||||
__all__ = ["get_service", "shutdown_service", "LSPService"]
|
||||
308
agent/lsp/cli.py
Normal file
308
agent/lsp/cli.py
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
"""``hermes lsp`` CLI subcommand.
|
||||
|
||||
Subcommands:
|
||||
|
||||
- ``status`` — show service state, configured servers, install status.
|
||||
- ``install <server_id>`` — eagerly install one server's binary.
|
||||
- ``install-all`` — try to install every server with a known recipe.
|
||||
- ``restart`` — tear down running clients so the next edit re-spawns.
|
||||
- ``which <server_id>`` — print the resolved binary path for one server.
|
||||
- ``list`` — print the registry of supported servers.
|
||||
|
||||
The handlers are kept here (rather than in
|
||||
``hermes_cli/main.py``) so the LSP module ships self-contained.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def register_subparser(subparsers: argparse._SubParsersAction) -> None:
|
||||
"""Wire the ``hermes lsp`` subcommand tree into the main argparse."""
|
||||
parser = subparsers.add_parser(
|
||||
"lsp",
|
||||
help="Language Server Protocol management",
|
||||
description=(
|
||||
"Manage the LSP layer that powers post-write semantic "
|
||||
"diagnostics in write_file/patch."
|
||||
),
|
||||
)
|
||||
sub = parser.add_subparsers(dest="lsp_command")
|
||||
|
||||
sub_status = sub.add_parser("status", help="Show LSP service status")
|
||||
sub_status.add_argument(
|
||||
"--json", action="store_true", help="Emit machine-readable JSON"
|
||||
)
|
||||
|
||||
sub_list = sub.add_parser("list", help="List supported language servers")
|
||||
sub_list.add_argument(
|
||||
"--installed-only",
|
||||
action="store_true",
|
||||
help="Only show servers whose binary is currently available",
|
||||
)
|
||||
|
||||
sub_install = sub.add_parser("install", help="Install a server binary")
|
||||
sub_install.add_argument("server", help="Server id (e.g. pyright, gopls)")
|
||||
|
||||
sub_install_all = sub.add_parser(
|
||||
"install-all",
|
||||
help="Install every server with a known auto-install recipe",
|
||||
)
|
||||
sub_install_all.add_argument(
|
||||
"--include-manual",
|
||||
action="store_true",
|
||||
help="Even attempt servers marked manual-install (best effort)",
|
||||
)
|
||||
|
||||
sub_restart = sub.add_parser(
|
||||
"restart",
|
||||
help="Tear down running LSP clients (next edit re-spawns)",
|
||||
)
|
||||
|
||||
sub_which = sub.add_parser("which", help="Print binary path for a server")
|
||||
sub_which.add_argument("server", help="Server id")
|
||||
|
||||
parser.set_defaults(func=run_lsp_command)
|
||||
|
||||
|
||||
def run_lsp_command(args: argparse.Namespace) -> int:
|
||||
"""Top-level dispatcher for ``hermes lsp <subcommand>``."""
|
||||
sub = getattr(args, "lsp_command", None) or "status"
|
||||
try:
|
||||
if sub == "status":
|
||||
return _cmd_status(getattr(args, "json", False))
|
||||
if sub == "list":
|
||||
return _cmd_list(getattr(args, "installed_only", False))
|
||||
if sub == "install":
|
||||
return _cmd_install(args.server)
|
||||
if sub == "install-all":
|
||||
return _cmd_install_all(getattr(args, "include_manual", False))
|
||||
if sub == "restart":
|
||||
return _cmd_restart()
|
||||
if sub == "which":
|
||||
return _cmd_which(args.server)
|
||||
sys.stderr.write(f"unknown lsp subcommand: {sub}\n")
|
||||
return 2
|
||||
except KeyboardInterrupt:
|
||||
return 130
|
||||
|
||||
|
||||
def _cmd_status(emit_json: bool) -> int:
|
||||
from agent.lsp import get_service
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
svc = get_service()
|
||||
service_active = svc is not None
|
||||
info = svc.get_status() if svc is not None else {"enabled": False}
|
||||
|
||||
if emit_json:
|
||||
import json
|
||||
payload = {
|
||||
"service": info,
|
||||
"registry": [
|
||||
{
|
||||
"server_id": s.server_id,
|
||||
"extensions": list(s.extensions),
|
||||
"description": s.description,
|
||||
"binary_status": detect_status(_recipe_pkg_for(s.server_id)),
|
||||
}
|
||||
for s in SERVERS
|
||||
],
|
||||
}
|
||||
sys.stdout.write(json.dumps(payload, indent=2) + "\n")
|
||||
return 0
|
||||
|
||||
out = []
|
||||
out.append("LSP Service")
|
||||
out.append("===========")
|
||||
out.append(f" enabled: {info.get('enabled', False)}")
|
||||
if service_active:
|
||||
out.append(f" wait_mode: {info.get('wait_mode')}")
|
||||
out.append(f" wait_timeout: {info.get('wait_timeout')}s")
|
||||
out.append(f" install_strategy:{info.get('install_strategy')}")
|
||||
clients = info.get("clients") or []
|
||||
if clients:
|
||||
out.append(f" active clients: {len(clients)}")
|
||||
for c in clients:
|
||||
out.append(
|
||||
f" - {c['server_id']:20s} state={c['state']:10s} root={c['workspace_root']}"
|
||||
)
|
||||
else:
|
||||
out.append(" active clients: none")
|
||||
broken = info.get("broken") or []
|
||||
if broken:
|
||||
out.append(f" broken pairs: {len(broken)}")
|
||||
for b in broken:
|
||||
out.append(f" - {b}")
|
||||
disabled = info.get("disabled_servers") or []
|
||||
if disabled:
|
||||
out.append(f" disabled in cfg: {', '.join(disabled)}")
|
||||
|
||||
# Surface backend-tool gaps that aren't visible in the registry table:
|
||||
# some servers spawn fine but emit no diagnostics without a sidecar
|
||||
# binary (bash-language-server -> shellcheck).
|
||||
backend_warnings = _backend_warnings()
|
||||
if backend_warnings:
|
||||
out.append("")
|
||||
out.append("Backend warnings")
|
||||
out.append("================")
|
||||
for line in backend_warnings:
|
||||
out.append(f" ! {line}")
|
||||
out.append("")
|
||||
out.append("Registered Servers")
|
||||
out.append("==================")
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
marker = {
|
||||
"installed": "✓",
|
||||
"missing": "·",
|
||||
"manual-only": "?",
|
||||
}.get(status, " ")
|
||||
ext_summary = ", ".join(list(s.extensions)[:5])
|
||||
if len(s.extensions) > 5:
|
||||
ext_summary += f", … (+{len(s.extensions) - 5})"
|
||||
out.append(
|
||||
f" {marker} {s.server_id:24s} [{status:11s}] {ext_summary}"
|
||||
)
|
||||
if s.description:
|
||||
out.append(f" {s.description}")
|
||||
sys.stdout.write("\n".join(out) + "\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_list(installed_only: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import detect_status
|
||||
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
status = detect_status(pkg)
|
||||
if installed_only and status != "installed":
|
||||
continue
|
||||
sys.stdout.write(
|
||||
f"{s.server_id:24s} [{status:11s}] {','.join(s.extensions)}\n"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install(server_id: str) -> int:
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
pkg = _recipe_pkg_for(server_id)
|
||||
pre_status = detect_status(pkg)
|
||||
if pre_status == "installed":
|
||||
sys.stdout.write(f"{server_id} already installed\n")
|
||||
return 0
|
||||
sys.stdout.write(f"installing {server_id} (pkg={pkg}) ...\n")
|
||||
sys.stdout.flush()
|
||||
bin_path = try_install(pkg, "auto")
|
||||
if bin_path is None:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
sys.stderr.write(
|
||||
f"{server_id}: this server requires a manual install. "
|
||||
f"See documentation.\n"
|
||||
)
|
||||
else:
|
||||
sys.stderr.write(f"{server_id}: install failed (see logs).\n")
|
||||
return 1
|
||||
sys.stdout.write(f"installed: {bin_path}\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_install_all(include_manual: bool) -> int:
|
||||
from agent.lsp.servers import SERVERS
|
||||
from agent.lsp.install import try_install, INSTALL_RECIPES, detect_status
|
||||
|
||||
rc = 0
|
||||
for s in SERVERS:
|
||||
pkg = _recipe_pkg_for(s.server_id)
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
continue
|
||||
if recipe.get("strategy") == "manual" and not include_manual:
|
||||
continue
|
||||
if detect_status(pkg) == "installed":
|
||||
sys.stdout.write(f" {s.server_id:24s} already installed\n")
|
||||
continue
|
||||
sys.stdout.write(f" installing {s.server_id} (pkg={pkg}) ... ")
|
||||
sys.stdout.flush()
|
||||
path = try_install(pkg, "auto")
|
||||
if path:
|
||||
sys.stdout.write(f"ok ({path})\n")
|
||||
else:
|
||||
sys.stdout.write("FAILED\n")
|
||||
rc = 1
|
||||
return rc
|
||||
|
||||
|
||||
def _cmd_restart() -> int:
|
||||
from agent.lsp import shutdown_service
|
||||
|
||||
shutdown_service()
|
||||
sys.stdout.write("LSP service shut down. Next edit will respawn clients.\n")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_which(server_id: str) -> int:
|
||||
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
|
||||
import os
|
||||
import shutil as _shutil
|
||||
|
||||
recipe = INSTALL_RECIPES.get(server_id)
|
||||
bin_name = (recipe or {}).get("bin", server_id)
|
||||
staged = hermes_lsp_bin_dir() / bin_name
|
||||
if staged.exists():
|
||||
sys.stdout.write(str(staged) + "\n")
|
||||
return 0
|
||||
on_path = _shutil.which(bin_name)
|
||||
if on_path:
|
||||
sys.stdout.write(on_path + "\n")
|
||||
return 0
|
||||
sys.stderr.write(f"{server_id}: not installed\n")
|
||||
return 1
|
||||
|
||||
|
||||
def _recipe_pkg_for(server_id: str) -> str:
|
||||
"""Map a registry ``server_id`` to its install-recipe package key."""
|
||||
# The mapping lives here (not in install.py) because it's a CLI
|
||||
# convenience layer. Most server_ids are also their own recipe
|
||||
# key, but a few differ (e.g. ``vue-language-server`` →
|
||||
# ``@vue/language-server``).
|
||||
aliases = {
|
||||
"vue-language-server": "@vue/language-server",
|
||||
"astro-language-server": "@astrojs/language-server",
|
||||
"dockerfile-ls": "dockerfile-language-server-nodejs",
|
||||
"typescript": "typescript-language-server",
|
||||
}
|
||||
return aliases.get(server_id, server_id)
|
||||
|
||||
|
||||
def _backend_warnings() -> list:
|
||||
"""Return human-readable notes about LSP backend tools that are missing
|
||||
in a way that won't surface elsewhere.
|
||||
|
||||
Some language servers ship as thin wrappers around an external CLI for
|
||||
actual diagnostics — they spawn cleanly but never emit any errors when
|
||||
the sidecar binary isn't on PATH. bash-language-server / shellcheck
|
||||
is the load-bearing example.
|
||||
|
||||
Returned strings are short, actionable, and include the install
|
||||
suggestion across common platforms.
|
||||
"""
|
||||
import shutil as _shutil
|
||||
from agent.lsp.install import hermes_lsp_bin_dir
|
||||
notes: list = []
|
||||
bash_installed = _shutil.which("bash-language-server") is not None or (
|
||||
(hermes_lsp_bin_dir() / "bash-language-server").exists()
|
||||
)
|
||||
if bash_installed and _shutil.which("shellcheck") is None:
|
||||
notes.append(
|
||||
"bash-language-server is installed but shellcheck is missing — "
|
||||
"diagnostics will be empty (apt: shellcheck, brew: shellcheck, "
|
||||
"scoop: shellcheck)."
|
||||
)
|
||||
return notes
|
||||
930
agent/lsp/client.py
Normal file
930
agent/lsp/client.py
Normal file
|
|
@ -0,0 +1,930 @@
|
|||
"""Async LSP client over stdin/stdout.
|
||||
|
||||
One :class:`LSPClient` corresponds to one ``(language_server, workspace_root)``
|
||||
pair — exactly what OpenCode keys clients on, and the same shape Claude
|
||||
Code uses. The client owns a child process, drives the JSON-RPC
|
||||
exchange, and exposes:
|
||||
|
||||
- :meth:`open_file` / :meth:`change_file` — text document sync
|
||||
- :meth:`wait_for_diagnostics` — block until the server emits fresh
|
||||
diagnostics for a specific file (or a timeout fires)
|
||||
- :meth:`diagnostics_for` — read the current per-file diagnostic store
|
||||
- :meth:`shutdown` — graceful close + SIGTERM/SIGKILL fallback
|
||||
|
||||
The class is designed for async use from a single asyncio event loop.
|
||||
The :class:`agent.lsp.manager.LSPService` runs an event loop in a
|
||||
background thread so the synchronous file_operations layer can call
|
||||
into it via :func:`agent.lsp.manager.LSPService.touch_file`.
|
||||
|
||||
Implementation notes:
|
||||
|
||||
- Push diagnostics are stored per-URI in :attr:`_push_diagnostics` from
|
||||
``textDocument/publishDiagnostics`` notifications. Pull diagnostics
|
||||
go in :attr:`_pull_diagnostics`. The merged view dedupes by content.
|
||||
|
||||
- Whole-document sync. Even when the server advertises incremental
|
||||
sync, we send a single ``contentChanges`` entry replacing the
|
||||
entire document. Pretending to be incremental while sending a
|
||||
full replacement is well-tolerated by every major server and saves
|
||||
range bookkeeping. See OpenCode's ``client.ts:584-659`` for the
|
||||
same trick.
|
||||
|
||||
- The "touch-file dance": every ``open_file`` call also fires a
|
||||
``workspace/didChangeWatchedFiles`` notification (CREATED on the
|
||||
first open, CHANGED thereafter). Some servers (clangd, eslint)
|
||||
only re-scan when this notification fires, even though the LSP spec
|
||||
doesn't strictly require it.
|
||||
|
||||
- ``ContentModified`` (-32801) errors get retried with exponential
|
||||
backoff up to 3 times. This matches Claude Code's
|
||||
``LSPServerInstance.sendRequest``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
from agent.lsp.protocol import (
|
||||
ERROR_CONTENT_MODIFIED,
|
||||
ERROR_METHOD_NOT_FOUND,
|
||||
LSPProtocolError,
|
||||
LSPRequestError,
|
||||
classify_message,
|
||||
encode_message,
|
||||
make_error_response,
|
||||
make_notification,
|
||||
make_request,
|
||||
make_response,
|
||||
read_message,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.client")
|
||||
|
||||
# Timeouts (seconds) — mirror OpenCode's constants, scaled to seconds.
|
||||
INITIALIZE_TIMEOUT = 45.0
|
||||
DIAGNOSTICS_DOCUMENT_WAIT = 5.0
|
||||
DIAGNOSTICS_FULL_WAIT = 10.0
|
||||
DIAGNOSTICS_REQUEST_TIMEOUT = 3.0
|
||||
PUSH_DEBOUNCE = 0.15
|
||||
SHUTDOWN_GRACE = 1.0 # seconds between SIGTERM and SIGKILL
|
||||
|
||||
# Retry policy for transient ContentModified errors.
|
||||
MAX_CONTENT_MODIFIED_RETRIES = 3
|
||||
RETRY_BASE_DELAY = 0.5 # 0.5, 1.0, 2.0 — exponential
|
||||
|
||||
|
||||
def file_uri(path: str) -> str:
|
||||
"""Return ``file://`` URI for an absolute filesystem path.
|
||||
|
||||
Mirrors Node's ``pathToFileURL`` — handles spaces, unicode, and
|
||||
Windows drive letters (``C:\\foo`` → ``file:///C:/foo``).
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
if os.name == "nt":
|
||||
# Windows: backslash → forward slash, prepend extra slash so
|
||||
# the drive letter shows up as part of the path component.
|
||||
abs_path = abs_path.replace("\\", "/")
|
||||
if not abs_path.startswith("/"):
|
||||
abs_path = "/" + abs_path
|
||||
return "file://" + quote(abs_path, safe="/:")
|
||||
|
||||
|
||||
def uri_to_path(uri: str) -> str:
|
||||
"""Inverse of :func:`file_uri`."""
|
||||
if not uri.startswith("file://"):
|
||||
return uri
|
||||
raw = uri[len("file://"):]
|
||||
if os.name == "nt" and raw.startswith("/") and len(raw) > 2 and raw[2] == ":":
|
||||
raw = raw[1:] # strip leading slash before drive letter
|
||||
return os.path.normpath(unquote(raw))
|
||||
|
||||
|
||||
def _end_position(text: str) -> Dict[str, int]:
|
||||
"""Return the LSP Position at the end of ``text``.
|
||||
|
||||
Used to construct a single-range "replace whole document" change
|
||||
for ``textDocument/didChange`` regardless of the server's declared
|
||||
sync mode.
|
||||
"""
|
||||
if not text:
|
||||
return {"line": 0, "character": 0}
|
||||
lines = text.splitlines(keepends=False)
|
||||
last_line = len(lines) - 1
|
||||
last_col = len(lines[-1]) if lines else 0
|
||||
# If the text ends with a trailing newline, ``splitlines`` won't
|
||||
# represent it. The end position is then the start of the next
|
||||
# (empty) line — line index is len(lines), column 0.
|
||||
if text.endswith(("\n", "\r")):
|
||||
return {"line": last_line + 1, "character": 0}
|
||||
return {"line": last_line, "character": last_col}
|
||||
|
||||
|
||||
class LSPClient:
|
||||
"""Async LSP client tied to one server process and one workspace root.
|
||||
|
||||
Lifecycle:
|
||||
|
||||
c = LSPClient(server_id, workspace_root, command, args, init_options)
|
||||
await c.start() # spawn + initialize
|
||||
ver = await c.open_file("/path/to/foo.py")
|
||||
await c.wait_for_diagnostics("/path/to/foo.py", ver)
|
||||
diags = c.diagnostics_for("/path/to/foo.py")
|
||||
await c.shutdown()
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
server_id: str,
|
||||
workspace_root: str,
|
||||
command: List[str],
|
||||
env: Optional[Dict[str, str]] = None,
|
||||
cwd: Optional[str] = None,
|
||||
initialization_options: Optional[Dict[str, Any]] = None,
|
||||
seed_diagnostics_on_first_push: bool = False,
|
||||
) -> None:
|
||||
self.server_id = server_id
|
||||
self.workspace_root = workspace_root
|
||||
self._command = list(command)
|
||||
self._env = env
|
||||
self._cwd = cwd or workspace_root
|
||||
self._init_options = initialization_options or {}
|
||||
self._seed_first_push = seed_diagnostics_on_first_push
|
||||
|
||||
# Process + streams
|
||||
self._proc: Optional[asyncio.subprocess.Process] = None
|
||||
self._stderr_task: Optional[asyncio.Task] = None
|
||||
self._reader_task: Optional[asyncio.Task] = None
|
||||
|
||||
# Request/response correlation
|
||||
self._next_id: int = 0
|
||||
self._pending: Dict[int, asyncio.Future] = {}
|
||||
|
||||
# Server-side request handlers (server → client requests).
|
||||
# Kept small and explicit; everything else returns method-not-found.
|
||||
self._request_handlers: Dict[str, Callable[[Any], Awaitable[Any]]] = {
|
||||
"window/workDoneProgress/create": self._handle_work_done_create,
|
||||
"workspace/configuration": self._handle_workspace_configuration,
|
||||
"client/registerCapability": self._handle_register_capability,
|
||||
"client/unregisterCapability": self._handle_unregister_capability,
|
||||
"workspace/workspaceFolders": self._handle_workspace_folders,
|
||||
"workspace/diagnostic/refresh": self._handle_diagnostic_refresh,
|
||||
}
|
||||
# Notifications (server → client) we care about.
|
||||
self._notification_handlers: Dict[str, Callable[[Any], None]] = {
|
||||
"textDocument/publishDiagnostics": self._handle_publish_diagnostics,
|
||||
# Everything else (window/showMessage, $/progress, etc.)
|
||||
# is silently dropped by default.
|
||||
}
|
||||
|
||||
# Tracked file state — required for didChange version bumps.
|
||||
self._files: Dict[str, Dict[str, Any]] = {}
|
||||
# Diagnostic stores, keyed by file path (NOT URI).
|
||||
self._push_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
self._pull_diagnostics: Dict[str, List[Dict[str, Any]]] = {}
|
||||
# Per-path "last published" time so wait-for-fresh logic works.
|
||||
self._published: Dict[str, float] = {}
|
||||
# Per-path version of the latest push (matches our didChange
|
||||
# version when the server respects it).
|
||||
self._published_version: Dict[str, int] = {}
|
||||
# First-push seen flag, for typescript-style seed-on-first-push.
|
||||
self._first_push_seen: Set[str] = set()
|
||||
# Capability registrations — only diagnostic ones are tracked.
|
||||
self._diagnostic_registrations: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# State machine
|
||||
self._state: str = "stopped"
|
||||
self._initialize_result: Optional[Dict[str, Any]] = None
|
||||
self._sync_kind: int = 1 # 1=Full, 2=Incremental
|
||||
self._stopping: bool = False
|
||||
|
||||
# Push event for waiters.
|
||||
self._push_event = asyncio.Event()
|
||||
# Monotonic counter incremented on every publishDiagnostics push.
|
||||
# Waiters snapshot it on entry and treat any increase as
|
||||
# "something happened, recheck the predicate". Avoids the
|
||||
# asyncio.Event sticky-state trap.
|
||||
self._push_counter = 0
|
||||
# Registration change event so wait_for_diagnostics can re-loop
|
||||
# when the server announces a new dynamic provider.
|
||||
self._registration_event = asyncio.Event()
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._state == "running" and self._proc is not None and self._proc.returncode is None
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
return self._state
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Spawn the server and complete the initialize handshake.
|
||||
|
||||
Raises any exception encountered during spawn/init. On failure
|
||||
the process is killed and the client is left in state
|
||||
``"error"`` — re-call ``start()`` to retry.
|
||||
"""
|
||||
if self._state in ("running", "starting"):
|
||||
return
|
||||
self._state = "starting"
|
||||
try:
|
||||
await self._spawn()
|
||||
await self._initialize()
|
||||
self._state = "running"
|
||||
except Exception:
|
||||
self._state = "error"
|
||||
await self._cleanup_process()
|
||||
raise
|
||||
|
||||
async def _spawn(self) -> None:
|
||||
env = dict(os.environ)
|
||||
if self._env:
|
||||
env.update(self._env)
|
||||
|
||||
try:
|
||||
self._proc = await asyncio.create_subprocess_exec(
|
||||
self._command[0],
|
||||
*self._command[1:],
|
||||
stdin=asyncio.subprocess.PIPE,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
cwd=self._cwd,
|
||||
)
|
||||
except FileNotFoundError as e:
|
||||
raise LSPProtocolError(
|
||||
f"LSP server binary not found: {self._command[0]} ({e})"
|
||||
) from e
|
||||
|
||||
# Drain stderr at debug level — if we don't, the pipe buffer
|
||||
# fills and the server hangs.
|
||||
self._stderr_task = asyncio.create_task(self._drain_stderr())
|
||||
# Start the reader loop.
|
||||
self._reader_task = asyncio.create_task(self._reader_loop())
|
||||
|
||||
async def _drain_stderr(self) -> None:
|
||||
if self._proc is None or self._proc.stderr is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
line = await self._proc.stderr.readline()
|
||||
if not line:
|
||||
break
|
||||
text = line.decode("utf-8", errors="replace").rstrip()
|
||||
if text:
|
||||
logger.debug("[%s] stderr: %s", self.server_id, text[:1000])
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
|
||||
async def _reader_loop(self) -> None:
|
||||
if self._proc is None or self._proc.stdout is None:
|
||||
return
|
||||
try:
|
||||
while True:
|
||||
msg = await read_message(self._proc.stdout)
|
||||
if msg is None:
|
||||
logger.debug("[%s] server closed stdout cleanly", self.server_id)
|
||||
break
|
||||
kind, key = classify_message(msg)
|
||||
if kind == "response":
|
||||
self._dispatch_response(key, msg)
|
||||
elif kind == "request":
|
||||
asyncio.create_task(self._dispatch_request(key, msg))
|
||||
elif kind == "notification":
|
||||
self._dispatch_notification(key, msg)
|
||||
else:
|
||||
logger.warning("[%s] dropping invalid message: %r", self.server_id, msg)
|
||||
except LSPProtocolError as e:
|
||||
logger.warning("[%s] protocol error in reader loop: %s", self.server_id, e)
|
||||
except (asyncio.CancelledError, OSError):
|
||||
pass
|
||||
finally:
|
||||
# Wake up any pending requests so they can fail fast.
|
||||
for fut in list(self._pending.values()):
|
||||
if not fut.done():
|
||||
fut.set_exception(LSPProtocolError("server connection closed"))
|
||||
self._pending.clear()
|
||||
|
||||
async def _initialize(self) -> None:
|
||||
params = {
|
||||
"rootUri": file_uri(self.workspace_root),
|
||||
"rootPath": self.workspace_root,
|
||||
"processId": os.getpid(),
|
||||
"workspaceFolders": [
|
||||
{"name": "workspace", "uri": file_uri(self.workspace_root)}
|
||||
],
|
||||
"initializationOptions": self._init_options,
|
||||
"capabilities": {
|
||||
"window": {"workDoneProgress": True},
|
||||
"workspace": {
|
||||
"configuration": True,
|
||||
"workspaceFolders": True,
|
||||
"didChangeWatchedFiles": {"dynamicRegistration": True},
|
||||
"diagnostics": {"refreshSupport": False},
|
||||
},
|
||||
"textDocument": {
|
||||
"synchronization": {
|
||||
"dynamicRegistration": False,
|
||||
"didOpen": True,
|
||||
"didChange": True,
|
||||
"didSave": True,
|
||||
"willSave": False,
|
||||
"willSaveWaitUntil": False,
|
||||
},
|
||||
"diagnostic": {
|
||||
"dynamicRegistration": True,
|
||||
"relatedDocumentSupport": True,
|
||||
},
|
||||
"publishDiagnostics": {
|
||||
"relatedInformation": True,
|
||||
"tagSupport": {"valueSet": [1, 2]},
|
||||
"versionSupport": True,
|
||||
"codeDescriptionSupport": True,
|
||||
"dataSupport": False,
|
||||
},
|
||||
"hover": {"contentFormat": ["markdown", "plaintext"]},
|
||||
"definition": {"linkSupport": True},
|
||||
"references": {},
|
||||
"documentSymbol": {"hierarchicalDocumentSymbolSupport": True},
|
||||
},
|
||||
"general": {"positionEncodings": ["utf-16"]},
|
||||
},
|
||||
}
|
||||
|
||||
result = await asyncio.wait_for(
|
||||
self._send_request("initialize", params),
|
||||
timeout=INITIALIZE_TIMEOUT,
|
||||
)
|
||||
self._initialize_result = result
|
||||
self._sync_kind = self._extract_sync_kind(result.get("capabilities") or {})
|
||||
|
||||
await self._send_notification("initialized", {})
|
||||
if self._init_options:
|
||||
# Some servers (vtsls, eslint) want config pushed via
|
||||
# didChangeConfiguration even if it was sent in
|
||||
# initializationOptions.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeConfiguration",
|
||||
{"settings": self._init_options},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _extract_sync_kind(capabilities: dict) -> int:
|
||||
sync = capabilities.get("textDocumentSync")
|
||||
if isinstance(sync, int):
|
||||
return sync
|
||||
if isinstance(sync, dict):
|
||||
change = sync.get("change")
|
||||
if isinstance(change, int):
|
||||
return change
|
||||
return 1 # default to Full
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Best-effort graceful shutdown.
|
||||
|
||||
Sends ``shutdown`` + ``exit``, then SIGTERMs/SIGKILLs the
|
||||
process if it doesn't exit cleanly. Idempotent.
|
||||
"""
|
||||
if self._stopping:
|
||||
return
|
||||
self._stopping = True
|
||||
try:
|
||||
if self.is_running:
|
||||
try:
|
||||
await asyncio.wait_for(self._send_request("shutdown", None), timeout=2.0)
|
||||
except (asyncio.TimeoutError, LSPRequestError, LSPProtocolError):
|
||||
pass
|
||||
try:
|
||||
await self._send_notification("exit", None)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._state = "stopped"
|
||||
await self._cleanup_process()
|
||||
|
||||
async def _cleanup_process(self) -> None:
|
||||
if self._reader_task is not None and not self._reader_task.done():
|
||||
self._reader_task.cancel()
|
||||
try:
|
||||
await self._reader_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
if self._stderr_task is not None and not self._stderr_task.done():
|
||||
self._stderr_task.cancel()
|
||||
try:
|
||||
await self._stderr_task
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
proc = self._proc
|
||||
self._proc = None
|
||||
if proc is None:
|
||||
return
|
||||
if proc.returncode is None:
|
||||
try:
|
||||
proc.terminate()
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=SHUTDOWN_GRACE)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# request / notification plumbing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _send_request(self, method: str, params: Any) -> Any:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
raise LSPProtocolError(f"cannot send {method!r}: stdin closed")
|
||||
loop = asyncio.get_running_loop()
|
||||
req_id = self._next_id
|
||||
self._next_id += 1
|
||||
fut: asyncio.Future = loop.create_future()
|
||||
self._pending[req_id] = fut
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_request(req_id, method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
self._pending.pop(req_id, None)
|
||||
raise LSPProtocolError(f"send failed for {method!r}: {e}") from e
|
||||
try:
|
||||
return await fut
|
||||
finally:
|
||||
self._pending.pop(req_id, None)
|
||||
|
||||
async def _send_request_with_retry(self, method: str, params: Any, *, timeout: float) -> Any:
|
||||
"""Send a request, retrying on ``ContentModified`` (-32801).
|
||||
|
||||
Other errors propagate. The retry policy matches Claude Code's
|
||||
``LSPServerInstance.sendRequest`` — 3 attempts with delays
|
||||
0.5s, 1.0s, 2.0s.
|
||||
"""
|
||||
for attempt in range(MAX_CONTENT_MODIFIED_RETRIES + 1):
|
||||
try:
|
||||
return await asyncio.wait_for(self._send_request(method, params), timeout=timeout)
|
||||
except LSPRequestError as e:
|
||||
if e.code == ERROR_CONTENT_MODIFIED and attempt < MAX_CONTENT_MODIFIED_RETRIES:
|
||||
await asyncio.sleep(RETRY_BASE_DELAY * (2 ** attempt))
|
||||
continue
|
||||
raise
|
||||
|
||||
async def _send_notification(self, method: str, params: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_notification(method, params)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError) as e:
|
||||
logger.debug("[%s] notify %s failed: %s", self.server_id, method, e)
|
||||
|
||||
async def _send_response(self, req_id: Any, result: Any) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_response(req_id, result)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
async def _send_error_response(self, req_id: Any, code: int, message: str) -> None:
|
||||
if self._proc is None or self._proc.stdin is None or self._proc.stdin.is_closing():
|
||||
return
|
||||
try:
|
||||
self._proc.stdin.write(encode_message(make_error_response(req_id, code, message)))
|
||||
await self._proc.stdin.drain()
|
||||
except (BrokenPipeError, ConnectionResetError, OSError):
|
||||
pass
|
||||
|
||||
def _dispatch_response(self, req_id: int, msg: dict) -> None:
|
||||
fut = self._pending.get(req_id)
|
||||
if fut is None or fut.done():
|
||||
return
|
||||
if "error" in msg:
|
||||
err = msg["error"] or {}
|
||||
fut.set_exception(
|
||||
LSPRequestError(
|
||||
code=int(err.get("code", -32000)),
|
||||
message=str(err.get("message", "unknown")),
|
||||
data=err.get("data"),
|
||||
)
|
||||
)
|
||||
else:
|
||||
fut.set_result(msg.get("result"))
|
||||
|
||||
async def _dispatch_request(self, req_id: Any, msg: dict) -> None:
|
||||
method = msg.get("method", "")
|
||||
params = msg.get("params")
|
||||
handler = self._request_handlers.get(method)
|
||||
if handler is None:
|
||||
await self._send_error_response(req_id, ERROR_METHOD_NOT_FOUND, f"method not found: {method}")
|
||||
return
|
||||
try:
|
||||
result = await handler(params)
|
||||
except Exception as e: # noqa: BLE001 — protocol must not blow up
|
||||
logger.warning("[%s] request handler %s failed: %s", self.server_id, method, e)
|
||||
await self._send_error_response(req_id, -32000, f"handler failed: {e}")
|
||||
return
|
||||
await self._send_response(req_id, result)
|
||||
|
||||
def _dispatch_notification(self, method: str, msg: dict) -> None:
|
||||
handler = self._notification_handlers.get(method)
|
||||
if handler is None:
|
||||
return
|
||||
try:
|
||||
handler(msg.get("params"))
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("[%s] notification handler %s failed: %s", self.server_id, method, e)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# built-in server-→-client request handlers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _handle_work_done_create(self, params: Any) -> Any:
|
||||
# Acknowledge progress tokens — required by some servers.
|
||||
return None
|
||||
|
||||
async def _handle_workspace_configuration(self, params: Any) -> Any:
|
||||
# Walk dotted sections through initializationOptions. Mirrors
|
||||
# OpenCode's `client.ts:198-220` — return null when missing.
|
||||
if not isinstance(params, dict):
|
||||
return [None]
|
||||
items = params.get("items") or []
|
||||
out: List[Any] = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
out.append(None)
|
||||
continue
|
||||
section = item.get("section")
|
||||
if not section or not self._init_options:
|
||||
out.append(self._init_options or None)
|
||||
continue
|
||||
cur: Any = self._init_options
|
||||
for part in str(section).split("."):
|
||||
if isinstance(cur, dict) and part in cur:
|
||||
cur = cur[part]
|
||||
else:
|
||||
cur = None
|
||||
break
|
||||
out.append(cur)
|
||||
return out
|
||||
|
||||
async def _handle_register_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for reg in params.get("registrations") or []:
|
||||
if not isinstance(reg, dict):
|
||||
continue
|
||||
method = reg.get("method")
|
||||
reg_id = reg.get("id")
|
||||
if method == "textDocument/diagnostic" and reg_id:
|
||||
self._diagnostic_registrations[str(reg_id)] = reg
|
||||
self._registration_event.set()
|
||||
return None
|
||||
|
||||
async def _handle_unregister_capability(self, params: Any) -> Any:
|
||||
if not isinstance(params, dict):
|
||||
return None
|
||||
for unreg in params.get("unregisterations") or []:
|
||||
if not isinstance(unreg, dict):
|
||||
continue
|
||||
reg_id = unreg.get("id")
|
||||
if reg_id:
|
||||
self._diagnostic_registrations.pop(str(reg_id), None)
|
||||
return None
|
||||
|
||||
async def _handle_workspace_folders(self, params: Any) -> Any:
|
||||
return [{"name": "workspace", "uri": file_uri(self.workspace_root)}]
|
||||
|
||||
async def _handle_diagnostic_refresh(self, params: Any) -> Any:
|
||||
# We don't honour refresh — we re-pull on every touchFile.
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# publishDiagnostics handler
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _handle_publish_diagnostics(self, params: Any) -> None:
|
||||
if not isinstance(params, dict):
|
||||
return
|
||||
uri = params.get("uri")
|
||||
if not isinstance(uri, str):
|
||||
return
|
||||
path = uri_to_path(uri)
|
||||
diagnostics = params.get("diagnostics") or []
|
||||
if not isinstance(diagnostics, list):
|
||||
diagnostics = []
|
||||
version = params.get("version")
|
||||
loop_time = asyncio.get_event_loop().time()
|
||||
|
||||
if self._seed_first_push and path not in self._first_push_seen:
|
||||
# First push: seed without firing the event so a waiter
|
||||
# doesn't resolve on the very first push (which arrives
|
||||
# before the user-triggered didChange could've produced
|
||||
# fresh diagnostics).
|
||||
self._first_push_seen.add(path)
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
return
|
||||
|
||||
self._push_diagnostics[path] = diagnostics
|
||||
self._published[path] = loop_time
|
||||
if isinstance(version, int):
|
||||
self._published_version[path] = version
|
||||
self._first_push_seen.add(path)
|
||||
# Bump the monotonic push counter and wake every waiter. We
|
||||
# keep the Event sticky-set so any wait already in progress
|
||||
# resolves; waiters re-check their predicate after waking and
|
||||
# decide whether to keep waiting. ``_push_counter`` is what
|
||||
# they actually compare against to detect a fresh event.
|
||||
self._push_counter += 1
|
||||
self._push_event.set()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public file-sync API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def open_file(self, path: str, *, language_id: str = "plaintext") -> int:
|
||||
"""Send didOpen (first time) or didChange (subsequent) for ``path``.
|
||||
|
||||
Returns the new document version number that the agent's
|
||||
``wait_for_diagnostics`` should match against.
|
||||
"""
|
||||
if not self.is_running:
|
||||
raise LSPProtocolError("client not running")
|
||||
|
||||
abs_path = os.path.abspath(path)
|
||||
try:
|
||||
text = Path(abs_path).read_text(encoding="utf-8", errors="replace")
|
||||
except OSError as e:
|
||||
raise LSPProtocolError(f"cannot read {abs_path}: {e}") from e
|
||||
|
||||
uri = file_uri(abs_path)
|
||||
existing = self._files.get(abs_path)
|
||||
|
||||
if existing is not None:
|
||||
# Re-open: bump version, fire didChangeWatchedFiles + didChange.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 2}]}, # 2 = CHANGED
|
||||
)
|
||||
new_version = existing["version"] + 1
|
||||
old_text = existing["text"]
|
||||
content_changes: List[Dict[str, Any]]
|
||||
if self._sync_kind == 2:
|
||||
content_changes = [
|
||||
{
|
||||
"range": {
|
||||
"start": {"line": 0, "character": 0},
|
||||
"end": _end_position(old_text),
|
||||
},
|
||||
"text": text,
|
||||
}
|
||||
]
|
||||
else:
|
||||
content_changes = [{"text": text}]
|
||||
await self._send_notification(
|
||||
"textDocument/didChange",
|
||||
{
|
||||
"textDocument": {"uri": uri, "version": new_version},
|
||||
"contentChanges": content_changes,
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": new_version, "text": text}
|
||||
return new_version
|
||||
|
||||
# First open: didChangeWatchedFiles CREATED + didOpen.
|
||||
await self._send_notification(
|
||||
"workspace/didChangeWatchedFiles",
|
||||
{"changes": [{"uri": uri, "type": 1}]}, # 1 = CREATED
|
||||
)
|
||||
# Clear any stale push/pull entries — fresh open should start
|
||||
# from scratch.
|
||||
self._push_diagnostics.pop(abs_path, None)
|
||||
self._pull_diagnostics.pop(abs_path, None)
|
||||
self._published.pop(abs_path, None)
|
||||
self._published_version.pop(abs_path, None)
|
||||
await self._send_notification(
|
||||
"textDocument/didOpen",
|
||||
{
|
||||
"textDocument": {
|
||||
"uri": uri,
|
||||
"languageId": language_id,
|
||||
"version": 0,
|
||||
"text": text,
|
||||
}
|
||||
},
|
||||
)
|
||||
self._files[abs_path] = {"version": 0, "text": text}
|
||||
return 0
|
||||
|
||||
async def save_file(self, path: str) -> None:
|
||||
"""Send didSave for ``path``. Some linters re-scan only on save."""
|
||||
if not self.is_running:
|
||||
return
|
||||
abs_path = os.path.abspath(path)
|
||||
await self._send_notification(
|
||||
"textDocument/didSave",
|
||||
{"textDocument": {"uri": file_uri(abs_path)}},
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# diagnostics: pull + wait
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _pull_document_diagnostics(self, path: str) -> None:
|
||||
"""Send ``textDocument/diagnostic`` for one file.
|
||||
|
||||
Stores results into :attr:`_pull_diagnostics`. Silently
|
||||
no-ops on errors (server may not support the pull endpoint).
|
||||
"""
|
||||
try:
|
||||
params: Dict[str, Any] = {
|
||||
"textDocument": {"uri": file_uri(os.path.abspath(path))}
|
||||
}
|
||||
result = await self._send_request_with_retry(
|
||||
"textDocument/diagnostic",
|
||||
params,
|
||||
timeout=DIAGNOSTICS_REQUEST_TIMEOUT,
|
||||
)
|
||||
except (LSPRequestError, LSPProtocolError, asyncio.TimeoutError) as e:
|
||||
logger.debug("[%s] document diagnostic pull failed: %s", self.server_id, e)
|
||||
return
|
||||
if not isinstance(result, dict):
|
||||
return
|
||||
items = result.get("items")
|
||||
if isinstance(items, list):
|
||||
self._pull_diagnostics[os.path.abspath(path)] = items
|
||||
related = result.get("relatedDocuments")
|
||||
if isinstance(related, dict):
|
||||
for uri, sub in related.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
sub_items = sub.get("items")
|
||||
if isinstance(sub_items, list):
|
||||
self._pull_diagnostics[uri_to_path(uri)] = sub_items
|
||||
|
||||
async def wait_for_diagnostics(
|
||||
self,
|
||||
path: str,
|
||||
version: int,
|
||||
*,
|
||||
mode: str = "document",
|
||||
) -> None:
|
||||
"""Wait for the server to publish diagnostics for ``path`` at ``version``.
|
||||
|
||||
``mode`` is ``"document"`` (5s budget, document pulls) or
|
||||
``"full"`` (10s budget, also workspace pulls). Best-effort —
|
||||
returns silently on timeout. Does NOT throw if the server
|
||||
doesn't support pull diagnostics; we still get the push side.
|
||||
"""
|
||||
budget = DIAGNOSTICS_FULL_WAIT if mode == "full" else DIAGNOSTICS_DOCUMENT_WAIT
|
||||
deadline = asyncio.get_event_loop().time() + budget
|
||||
abs_path = os.path.abspath(path)
|
||||
|
||||
while True:
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
|
||||
# Concurrent: document pull + push wait.
|
||||
pull_task = asyncio.create_task(self._pull_document_diagnostics(abs_path))
|
||||
push_task = asyncio.create_task(self._wait_for_fresh_push(abs_path, version, remaining))
|
||||
done, pending = await asyncio.wait(
|
||||
{pull_task, push_task},
|
||||
timeout=remaining,
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
for t in pending:
|
||||
try:
|
||||
await t
|
||||
except (asyncio.CancelledError, Exception): # noqa: BLE001
|
||||
pass
|
||||
|
||||
# If we got a fresh push for our version, we're done.
|
||||
current_v = self._published_version.get(abs_path)
|
||||
if abs_path in self._published and (
|
||||
current_v is None or current_v >= version
|
||||
):
|
||||
return
|
||||
|
||||
# Pull may have populated _pull_diagnostics — that's also
|
||||
# success.
|
||||
if abs_path in self._pull_diagnostics:
|
||||
return
|
||||
|
||||
# Loop until budget runs out.
|
||||
|
||||
async def _wait_for_fresh_push(self, path: str, version: int, timeout: float) -> None:
|
||||
"""Wait until a publishDiagnostics arrives for ``path`` at ``version``+."""
|
||||
deadline = asyncio.get_event_loop().time() + timeout
|
||||
baseline = self._push_counter
|
||||
while True:
|
||||
current_v = self._published_version.get(path)
|
||||
if path in self._published and (current_v is None or current_v >= version):
|
||||
# Debounce — wait a tick in case more diagnostics arrive
|
||||
# immediately after. TS often emits in pairs. We
|
||||
# snapshot the counter so we wake on a *new* push, not
|
||||
# on the one that satisfied us a moment ago.
|
||||
debounce_baseline = self._push_counter
|
||||
debounce_deadline = asyncio.get_event_loop().time() + PUSH_DEBOUNCE
|
||||
while self._push_counter == debounce_baseline:
|
||||
remaining = debounce_deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
break
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=remaining)
|
||||
except asyncio.TimeoutError:
|
||||
break
|
||||
return
|
||||
remaining = deadline - asyncio.get_event_loop().time()
|
||||
if remaining <= 0:
|
||||
return
|
||||
if self._push_counter > baseline:
|
||||
# New event arrived but predicate still false — re-check
|
||||
# immediately without waiting again.
|
||||
baseline = self._push_counter
|
||||
continue
|
||||
self._push_event.clear()
|
||||
try:
|
||||
await asyncio.wait_for(self._push_event.wait(), timeout=min(remaining, 0.5))
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
|
||||
def diagnostics_for(self, path: str) -> List[Dict[str, Any]]:
|
||||
"""Return current merged + deduped diagnostics for one file.
|
||||
|
||||
Diagnostics from push and pull stores are concatenated and
|
||||
deduplicated by ``(severity, code, message, range)`` content
|
||||
key. Empty list if the server hasn't published anything.
|
||||
"""
|
||||
abs_path = os.path.abspath(path)
|
||||
push = self._push_diagnostics.get(abs_path) or []
|
||||
pull = self._pull_diagnostics.get(abs_path) or []
|
||||
return _dedupe(push, pull)
|
||||
|
||||
|
||||
def _dedupe(*lists: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
seen: Set[str] = set()
|
||||
out: List[Dict[str, Any]] = []
|
||||
for lst in lists:
|
||||
for d in lst:
|
||||
if not isinstance(d, dict):
|
||||
continue
|
||||
key = _diagnostic_key(d)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
|
||||
def _diagnostic_key(d: Dict[str, Any]) -> str:
|
||||
"""Content-equality key for a diagnostic.
|
||||
|
||||
Matches the structural-equality used in claude-code's
|
||||
``areDiagnosticsEqual`` — message + severity + source + code +
|
||||
range coords. The range is reduced to a tuple to keep the key
|
||||
stable across dict orderings.
|
||||
"""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"LSPClient",
|
||||
"file_uri",
|
||||
"uri_to_path",
|
||||
"INITIALIZE_TIMEOUT",
|
||||
"DIAGNOSTICS_DOCUMENT_WAIT",
|
||||
"DIAGNOSTICS_FULL_WAIT",
|
||||
]
|
||||
213
agent/lsp/eventlog.py
Normal file
213
agent/lsp/eventlog.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
"""Structured logging with steady-state silence for the LSP layer.
|
||||
|
||||
The LSP layer fires on every write_file/patch. In a busy session
|
||||
that's hundreds of events. We want users to be able to ``rg`` the
|
||||
log for "did LSP fire on that edit?" without drowning in noise.
|
||||
|
||||
The level model:
|
||||
|
||||
- ``DEBUG`` for steady-state events that have no novel signal:
|
||||
``clean``, ``feature off``, ``extension not mapped``, ``no project
|
||||
root for already-announced file``, ``server unavailable for
|
||||
already-announced binary``. These never reach ``agent.log`` at the
|
||||
default INFO threshold.
|
||||
|
||||
- ``INFO`` for state transitions worth surfacing exactly once per
|
||||
session: ``active for <root>`` the first time a (server_id,
|
||||
workspace_root) client starts, ``no project root for <path>``
|
||||
the first time we see that file. Plus every diagnostic event
|
||||
(those are inherently rare and per-edit, exactly what users grep
|
||||
for).
|
||||
|
||||
- ``WARNING`` for action-required failures: ``server unavailable``
|
||||
(binary not on PATH) the first time per (server_id, binary),
|
||||
``no server configured`` once per language. Per-call WARNING for
|
||||
timeouts and unexpected bridge exceptions.
|
||||
|
||||
The dedup is in-process module-level sets. Each set grows at most by
|
||||
the number of distinct (server_id, root) and (server_id, binary)
|
||||
pairs touched in one Python process — bytes of memory in even an
|
||||
aggressive monorepo session. Bounded LRU was rejected: evicting an
|
||||
entry would risk re-firing the WARNING/INFO line we explicitly want
|
||||
to suppress.
|
||||
|
||||
Grep recipe::
|
||||
|
||||
tail -f ~/.hermes/logs/agent.log | rg 'lsp\\['
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from typing import Tuple
|
||||
|
||||
# Dedicated logger name so the documented grep recipe survives a
|
||||
# ``logging.getLogger(__name__)`` rename of any internal module.
|
||||
event_log = logging.getLogger("hermes.lint.lsp")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Once-per-X dedup sets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_announce_lock = threading.Lock()
|
||||
_announced_active: set = set() # keys: (server_id, workspace_root)
|
||||
_announced_unavailable: set = set() # keys: (server_id, binary_path_or_name)
|
||||
_announced_no_root: set = set() # keys: (server_id, file_path)
|
||||
_announced_no_server: set = set() # keys: (server_id,)
|
||||
|
||||
|
||||
def _short_path(file_path: str) -> str:
|
||||
"""Render *file_path* relative to the cwd when sensible, else absolute.
|
||||
|
||||
Keeps log lines readable for the common case (the user is inside
|
||||
the project they're editing) without emitting brittle ``../../..``
|
||||
chains for the cross-tree case.
|
||||
"""
|
||||
if not file_path:
|
||||
return file_path
|
||||
try:
|
||||
rel = os.path.relpath(file_path)
|
||||
except ValueError:
|
||||
return file_path
|
||||
if rel.startswith(".." + os.sep) or rel == "..":
|
||||
return file_path
|
||||
return rel
|
||||
|
||||
|
||||
def _emit(server_id: str, level: int, message: str) -> None:
|
||||
event_log.log(level, "lsp[%s] %s", server_id, message)
|
||||
|
||||
|
||||
def _announce_once(bucket: set, key: Tuple) -> bool:
|
||||
"""Return True if *key* has not been announced for *bucket* yet.
|
||||
|
||||
Atomically marks the key as announced so concurrent callers
|
||||
cannot both win the race and double-log.
|
||||
"""
|
||||
with _announce_lock:
|
||||
if key in bucket:
|
||||
return False
|
||||
bucket.add(key)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public event helpers — call these from the LSP layer.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def log_clean(server_id: str, file_path: str) -> None:
|
||||
"""No diagnostics emitted for *file_path*. DEBUG (silent at default)."""
|
||||
_emit(server_id, logging.DEBUG, f"clean ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_disabled(server_id: str, file_path: str, reason: str) -> None:
|
||||
"""LSP intentionally skipped for this file (feature off, ext unmapped,
|
||||
backend not local, etc.). DEBUG."""
|
||||
_emit(server_id, logging.DEBUG, f"skipped: {reason} ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_active(server_id: str, workspace_root: str) -> None:
|
||||
"""A new LSP client started for (server_id, workspace_root).
|
||||
|
||||
INFO once per (server_id, workspace_root); DEBUG thereafter.
|
||||
Lets users verify "is LSP actually running?" with a single grep.
|
||||
"""
|
||||
key = (server_id, workspace_root)
|
||||
if _announce_once(_announced_active, key):
|
||||
_emit(server_id, logging.INFO, f"active for {workspace_root}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"reused client for {workspace_root}")
|
||||
|
||||
|
||||
def log_diagnostics(server_id: str, file_path: str, count: int) -> None:
|
||||
"""Diagnostics arrived for a file. INFO every time — these are the
|
||||
failure signals users actually want to grep for, and they are
|
||||
inherently rare per edit."""
|
||||
_emit(server_id, logging.INFO, f"{count} diags ({_short_path(file_path)})")
|
||||
|
||||
|
||||
def log_no_project_root(server_id: str, file_path: str) -> None:
|
||||
"""File had no recognised project marker. INFO once per file,
|
||||
DEBUG thereafter."""
|
||||
key = (server_id, file_path)
|
||||
if _announce_once(_announced_no_root, key):
|
||||
_emit(server_id, logging.INFO, f"no project root for {_short_path(file_path)}")
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"no project root for {_short_path(file_path)}")
|
||||
|
||||
|
||||
def log_server_unavailable(server_id: str, binary_or_pkg: str) -> None:
|
||||
"""The server binary couldn't be resolved. WARNING once per
|
||||
(server_id, binary), DEBUG thereafter so a hundred subsequent
|
||||
.py edits don't spam the log."""
|
||||
key = (server_id, binary_or_pkg)
|
||||
if _announce_once(_announced_unavailable, key):
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"server unavailable: {binary_or_pkg} not found "
|
||||
"(install via `hermes lsp install <id>` or set lsp.servers.<id>.command)",
|
||||
)
|
||||
else:
|
||||
_emit(server_id, logging.DEBUG, f"server still unavailable: {binary_or_pkg}")
|
||||
|
||||
|
||||
def log_no_server_configured(server_id: str) -> None:
|
||||
"""No spawn recipe for this language. WARNING once."""
|
||||
if _announce_once(_announced_no_server, (server_id,)):
|
||||
_emit(server_id, logging.WARNING, "no server configured")
|
||||
|
||||
|
||||
def log_timeout(server_id: str, file_path: str, kind: str = "diagnostics") -> None:
|
||||
"""A request to the server timed out. WARNING every time — these are
|
||||
inherently novel events worth surfacing on each occurrence."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"{kind} timed out for {_short_path(file_path)}",
|
||||
)
|
||||
|
||||
|
||||
def log_server_error(server_id: str, file_path: str, exc: BaseException) -> None:
|
||||
"""An unexpected exception bubbled out of the LSP layer. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"unexpected error for {_short_path(file_path)}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def log_spawn_failed(server_id: str, workspace_root: str, exc: BaseException) -> None:
|
||||
"""The LSP server failed to spawn or initialize. WARNING."""
|
||||
_emit(
|
||||
server_id,
|
||||
logging.WARNING,
|
||||
f"spawn/initialize failed for {workspace_root}: {type(exc).__name__}: {exc}",
|
||||
)
|
||||
|
||||
|
||||
def reset_announce_caches() -> None:
|
||||
"""Test-only: clear the dedup caches. Production code never calls this."""
|
||||
with _announce_lock:
|
||||
_announced_active.clear()
|
||||
_announced_unavailable.clear()
|
||||
_announced_no_root.clear()
|
||||
_announced_no_server.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"event_log",
|
||||
"log_clean",
|
||||
"log_disabled",
|
||||
"log_active",
|
||||
"log_diagnostics",
|
||||
"log_no_project_root",
|
||||
"log_server_unavailable",
|
||||
"log_no_server_configured",
|
||||
"log_timeout",
|
||||
"log_server_error",
|
||||
"log_spawn_failed",
|
||||
"reset_announce_caches",
|
||||
]
|
||||
376
agent/lsp/install.py
Normal file
376
agent/lsp/install.py
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
"""Auto-installation of LSP server binaries.
|
||||
|
||||
Tries to install missing servers using whatever package manager is
|
||||
appropriate. All installs go to a Hermes-owned bin staging dir,
|
||||
``<HERMES_HOME>/lsp/bin/``, so we don't pollute the user's global
|
||||
toolchain.
|
||||
|
||||
Strategies:
|
||||
|
||||
- ``auto`` — attempt to install with the best available package
|
||||
manager. This is the default.
|
||||
- ``manual`` — never install; if a binary is missing, the server is
|
||||
silently skipped and the user is told about it via ``hermes lsp
|
||||
status``.
|
||||
- ``off`` — same as ``manual`` for now (kept distinct so we can
|
||||
evolve behavior later, e.g. logging differently).
|
||||
|
||||
The actual installs happen synchronously the first time a server is
|
||||
needed and concurrent calls to :func:`try_install` for the same
|
||||
package are deduplicated via a per-package lock.
|
||||
|
||||
Failure modes are non-fatal: every install path is wrapped in
|
||||
try/except and returns ``None`` on failure. The tool layer then
|
||||
falls back to its in-process syntax checker, exactly as if the user
|
||||
hadn't enabled LSP at all.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger("agent.lsp.install")
|
||||
|
||||
# Package-name → install-strategy hint registry. Each entry is a
|
||||
# tuple of strategy name + package name + executable name. When the
|
||||
# install completes, we look for the executable in
|
||||
# ``<HERMES_HOME>/lsp/bin/`` first, then on PATH.
|
||||
#
|
||||
# Optional fields:
|
||||
# - ``extra_pkgs``: list of sibling packages to install alongside
|
||||
# ``pkg`` in the same node_modules tree. Used when an LSP server
|
||||
# has a runtime peer dependency that npm doesn't auto-pull (e.g.
|
||||
# typescript-language-server needs ``typescript``).
|
||||
INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
|
||||
# Python
|
||||
"pyright": {"strategy": "npm", "pkg": "pyright", "bin": "pyright-langserver"},
|
||||
# JS/TS family
|
||||
"typescript-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "typescript-language-server",
|
||||
"bin": "typescript-language-server",
|
||||
# typescript-language-server requires the `typescript` SDK
|
||||
# (tsserver) to be importable from the same node_modules tree;
|
||||
# otherwise initialize() fails with "Could not find a valid
|
||||
# TypeScript installation". Install them together.
|
||||
"extra_pkgs": ["typescript"],
|
||||
},
|
||||
"@vue/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@vue/language-server",
|
||||
"bin": "vue-language-server",
|
||||
},
|
||||
"svelte-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "svelte-language-server",
|
||||
"bin": "svelteserver",
|
||||
},
|
||||
"@astrojs/language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "@astrojs/language-server",
|
||||
"bin": "astro-ls",
|
||||
},
|
||||
"yaml-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "yaml-language-server",
|
||||
"bin": "yaml-language-server",
|
||||
},
|
||||
"bash-language-server": {
|
||||
"strategy": "npm",
|
||||
"pkg": "bash-language-server",
|
||||
"bin": "bash-language-server",
|
||||
},
|
||||
"intelephense": {"strategy": "npm", "pkg": "intelephense", "bin": "intelephense"},
|
||||
"dockerfile-language-server-nodejs": {
|
||||
"strategy": "npm",
|
||||
"pkg": "dockerfile-language-server-nodejs",
|
||||
"bin": "docker-langserver",
|
||||
},
|
||||
# Go
|
||||
"gopls": {"strategy": "go", "pkg": "golang.org/x/tools/gopls@latest", "bin": "gopls"},
|
||||
# Rust — too heavy (hundreds of MB to bootstrap). We do NOT
|
||||
# auto-install rust-analyzer; users install via rustup.
|
||||
"rust-analyzer": {"strategy": "manual", "pkg": "", "bin": "rust-analyzer"},
|
||||
# C/C++ — manual (clangd ships with LLVM, very heavy)
|
||||
"clangd": {"strategy": "manual", "pkg": "", "bin": "clangd"},
|
||||
# Lua — manual (LuaLS is platform-specific binaries from GitHub
|
||||
# releases; complex enough that we punt to the user)
|
||||
"lua-language-server": {"strategy": "manual", "pkg": "", "bin": "lua-language-server"},
|
||||
}
|
||||
|
||||
|
||||
_install_locks: Dict[str, threading.Lock] = {}
|
||||
_install_results: Dict[str, Optional[str]] = {}
|
||||
_install_lock_meta = threading.Lock()
|
||||
|
||||
|
||||
def hermes_lsp_bin_dir() -> Path:
|
||||
"""Return the Hermes-owned bin staging dir for LSP servers."""
|
||||
home = os.environ.get("HERMES_HOME")
|
||||
if home is None:
|
||||
home = os.path.join(os.path.expanduser("~"), ".hermes")
|
||||
p = Path(home) / "lsp" / "bin"
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
return p
|
||||
|
||||
|
||||
def _existing_binary(name: str) -> Optional[str]:
|
||||
"""Probe the staging dir + PATH for a binary named ``name``."""
|
||||
staged = hermes_lsp_bin_dir() / name
|
||||
if staged.exists() and os.access(staged, os.X_OK):
|
||||
return str(staged)
|
||||
on_path = shutil.which(name)
|
||||
if on_path:
|
||||
return on_path
|
||||
return None
|
||||
|
||||
|
||||
def _get_lock(pkg: str) -> threading.Lock:
|
||||
with _install_lock_meta:
|
||||
lock = _install_locks.get(pkg)
|
||||
if lock is None:
|
||||
lock = threading.Lock()
|
||||
_install_locks[pkg] = lock
|
||||
return lock
|
||||
|
||||
|
||||
def try_install(pkg: str, strategy: str = "auto") -> Optional[str]:
|
||||
"""Try to install ``pkg`` and return the binary path if successful.
|
||||
|
||||
``strategy`` is ``"auto"``, ``"manual"``, or ``"off"``. In
|
||||
``manual``/``off`` mode, this function only probes for an
|
||||
existing binary and returns ``None`` if not found.
|
||||
|
||||
The install is cached per-package — a second call returns the
|
||||
same path (or ``None``) without reinstalling. Concurrent calls
|
||||
are serialized.
|
||||
"""
|
||||
if strategy not in ("auto",):
|
||||
# Only ``auto`` triggers an actual install. In manual/off,
|
||||
# we still check whether the binary already exists.
|
||||
recipe = INSTALL_RECIPES.get(pkg, {})
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
return _existing_binary(bin_name)
|
||||
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
|
||||
lock = _get_lock(pkg)
|
||||
with lock:
|
||||
# Double-check after acquiring lock.
|
||||
if pkg in _install_results:
|
||||
return _install_results[pkg]
|
||||
result = _do_install(pkg)
|
||||
_install_results[pkg] = result
|
||||
return result
|
||||
|
||||
|
||||
def _do_install(pkg: str) -> Optional[str]:
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
if recipe is None:
|
||||
# Not in our registry — best-effort: just probe PATH.
|
||||
return shutil.which(pkg)
|
||||
|
||||
strategy = recipe.get("strategy", "manual")
|
||||
bin_name = recipe.get("bin", pkg)
|
||||
|
||||
# Check if already present (shutil.which or staging dir)
|
||||
existing = _existing_binary(bin_name)
|
||||
if existing:
|
||||
return existing
|
||||
|
||||
if strategy == "manual":
|
||||
logger.debug("[install] %s requires manual install (recipe=%s)", pkg, recipe)
|
||||
return None
|
||||
|
||||
if strategy == "npm":
|
||||
return _install_npm(
|
||||
recipe.get("pkg", pkg),
|
||||
bin_name,
|
||||
extra_pkgs=recipe.get("extra_pkgs") or [],
|
||||
)
|
||||
if strategy == "go":
|
||||
return _install_go(recipe.get("pkg", pkg), bin_name)
|
||||
if strategy == "pip":
|
||||
return _install_pip(recipe.get("pkg", pkg), bin_name)
|
||||
|
||||
logger.warning("[install] unknown strategy %r for %s", strategy, pkg)
|
||||
return None
|
||||
|
||||
|
||||
def _install_npm(
|
||||
pkg: str,
|
||||
bin_name: str,
|
||||
extra_pkgs: Optional[list] = None,
|
||||
) -> Optional[str]:
|
||||
"""Install an npm package into our staging dir.
|
||||
|
||||
Uses ``npm install --prefix`` so the binaries land in
|
||||
``<staging>/node_modules/.bin/<bin_name>`` and we symlink them up
|
||||
one level for direct PATH-style access.
|
||||
|
||||
``extra_pkgs`` is a list of sibling packages to install in the
|
||||
same ``node_modules`` tree. Used for LSP servers with runtime
|
||||
peer deps that npm doesn't auto-pull (typescript-language-server
|
||||
needs ``typescript`` next to it; intelephense ships standalone).
|
||||
"""
|
||||
npm = shutil.which("npm")
|
||||
if npm is None:
|
||||
logger.info("[install] cannot install %s: npm not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir().parent # <HERMES_HOME>/lsp/
|
||||
install_targets = [pkg] + list(extra_pkgs or [])
|
||||
try:
|
||||
logger.info(
|
||||
"[install] npm install --prefix %s %s",
|
||||
staging,
|
||||
" ".join(install_targets),
|
||||
)
|
||||
proc = subprocess.run(
|
||||
[npm, "install", "--prefix", str(staging), "--silent", "--no-fund", "--no-audit", *install_targets],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] npm install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] npm install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
|
||||
# Find the bin
|
||||
nm_bin = staging / "node_modules" / ".bin" / bin_name
|
||||
if os.name == "nt":
|
||||
# On Windows npm sometimes drops `.cmd` shims
|
||||
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
|
||||
else:
|
||||
candidates = [nm_bin]
|
||||
for c in candidates:
|
||||
if c.exists():
|
||||
# Symlink into our `lsp/bin/` for stable PATH access.
|
||||
link = hermes_lsp_bin_dir() / c.name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(c)
|
||||
except (OSError, NotImplementedError):
|
||||
# Symlinks fail on some Windows setups — copy instead.
|
||||
try:
|
||||
shutil.copy2(c, link)
|
||||
except OSError:
|
||||
return str(c)
|
||||
return str(link if link.exists() else c)
|
||||
logger.warning("[install] npm install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_go(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Go module to GOBIN=<staging>."""
|
||||
go = shutil.which("go")
|
||||
if go is None:
|
||||
logger.info("[install] cannot install %s: go not on PATH", pkg)
|
||||
return None
|
||||
staging = hermes_lsp_bin_dir()
|
||||
env = dict(os.environ)
|
||||
env["GOBIN"] = str(staging)
|
||||
try:
|
||||
logger.info("[install] go install %s (GOBIN=%s)", pkg, staging)
|
||||
proc = subprocess.run(
|
||||
[go, "install", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
env=env,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] go install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] go install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
bin_path = staging / bin_name
|
||||
if os.name == "nt":
|
||||
bin_path = bin_path.with_suffix(".exe")
|
||||
if bin_path.exists():
|
||||
return str(bin_path)
|
||||
logger.warning("[install] go install for %s succeeded but bin %s not found", pkg, bin_name)
|
||||
return None
|
||||
|
||||
|
||||
def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
|
||||
"""Install a Python package into a hermes-owned target dir.
|
||||
|
||||
We avoid polluting the user's site-packages by using
|
||||
``pip install --target``. Bins go into
|
||||
``<staging>/python-packages/bin/`` which we symlink into
|
||||
``<staging>/bin``. Note: this only works for packages that ship a
|
||||
console script.
|
||||
"""
|
||||
pip_target = hermes_lsp_bin_dir().parent / "python-packages"
|
||||
pip_target.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
logger.info("[install] pip install --target %s %s", pip_target, pkg)
|
||||
proc = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "--target", str(pip_target), "--quiet", pkg],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"[install] pip install failed for %s: %s", pkg, proc.stderr.strip()[:500]
|
||||
)
|
||||
return None
|
||||
except (subprocess.TimeoutExpired, OSError) as e:
|
||||
logger.warning("[install] pip install errored for %s: %s", pkg, e)
|
||||
return None
|
||||
# Look for the script
|
||||
bin_path = pip_target / "bin" / bin_name
|
||||
if bin_path.exists():
|
||||
link = hermes_lsp_bin_dir() / bin_name
|
||||
if not link.exists():
|
||||
try:
|
||||
link.symlink_to(bin_path)
|
||||
except (OSError, NotImplementedError):
|
||||
try:
|
||||
shutil.copy2(bin_path, link)
|
||||
except OSError:
|
||||
return str(bin_path)
|
||||
return str(link if link.exists() else bin_path)
|
||||
return None
|
||||
|
||||
|
||||
def detect_status(pkg: str) -> str:
|
||||
"""Return ``installed``, ``missing``, or ``manual-only`` for a package.
|
||||
|
||||
Used by the ``hermes lsp status`` CLI to give users a quick
|
||||
overview of what's available without spawning anything.
|
||||
"""
|
||||
recipe = INSTALL_RECIPES.get(pkg)
|
||||
bin_name = recipe.get("bin", pkg) if recipe else pkg
|
||||
if _existing_binary(bin_name):
|
||||
return "installed"
|
||||
if recipe and recipe.get("strategy") == "manual":
|
||||
return "manual-only"
|
||||
return "missing"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"INSTALL_RECIPES",
|
||||
"try_install",
|
||||
"detect_status",
|
||||
"hermes_lsp_bin_dir",
|
||||
]
|
||||
607
agent/lsp/manager.py
Normal file
607
agent/lsp/manager.py
Normal file
|
|
@ -0,0 +1,607 @@
|
|||
"""Service-level orchestration for LSP clients.
|
||||
|
||||
The :class:`LSPService` is the bridge between the synchronous
|
||||
file_operations layer and the async :class:`agent.lsp.client.LSPClient`.
|
||||
|
||||
Design choices:
|
||||
|
||||
- A **single asyncio event loop** runs in a background thread. All
|
||||
client work happens on that loop. Synchronous callers from
|
||||
``tools/file_operations.py`` use :meth:`get_diagnostics_sync` to
|
||||
open + wait + drain in one blocking call.
|
||||
|
||||
- One client per ``(server_id, workspace_root)`` key. Lazy spawn:
|
||||
the first request for a key spawns the client; subsequent requests
|
||||
re-use it.
|
||||
|
||||
- A **broken-set** records ``(server_id, workspace_root)`` pairs that
|
||||
failed to spawn or initialize. These are never retried for the
|
||||
life of the service. Mirrors OpenCode's design.
|
||||
|
||||
- A **delta baseline** map keeps "diagnostics-as-of-the-last-snapshot"
|
||||
per file. ``snapshot_baseline()`` is called BEFORE a write; the
|
||||
next ``get_diagnostics_sync()`` returns only diagnostics that
|
||||
weren't in the baseline. This is the lift from Claude Code's
|
||||
``beforeFileEdited`` / ``getNewDiagnostics`` pattern, except wired
|
||||
to the local LSP layer instead of MCP IDE RPC.
|
||||
|
||||
The service is **off by default** — call :meth:`is_active` to check
|
||||
whether it's actually doing anything. When LSP is disabled in
|
||||
config, when no git workspace can be detected, when all configured
|
||||
servers are missing binaries and auto-install is off, ``is_active``
|
||||
returns False and the file_operations layer falls through to the
|
||||
in-process syntax check.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from concurrent.futures import Future as ConcurrentFuture
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from agent.lsp import eventlog
|
||||
from agent.lsp.client import (
|
||||
DIAGNOSTICS_DOCUMENT_WAIT,
|
||||
LSPClient,
|
||||
file_uri,
|
||||
)
|
||||
from agent.lsp.servers import (
|
||||
ServerContext,
|
||||
ServerDef,
|
||||
SpawnSpec,
|
||||
find_server_for_file,
|
||||
language_id_for,
|
||||
)
|
||||
from agent.lsp.workspace import (
|
||||
clear_cache,
|
||||
is_inside_workspace,
|
||||
resolve_workspace_for_file,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("agent.lsp.manager")
|
||||
|
||||
DEFAULT_IDLE_TIMEOUT = 600 # seconds; servers idle for >10min get reaped
|
||||
|
||||
|
||||
class _BackgroundLoop:
|
||||
"""A daemon thread that owns one asyncio event loop.
|
||||
|
||||
Provides :meth:`run` for synchronous callers — submits a coroutine
|
||||
to the loop and blocks until it finishes (or a timeout fires).
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
if self._thread is not None:
|
||||
return
|
||||
self._thread = threading.Thread(
|
||||
target=self._run_forever,
|
||||
name="hermes-lsp-loop",
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
self._ready.wait(timeout=5.0)
|
||||
|
||||
def _run_forever(self) -> None:
|
||||
loop = asyncio.new_event_loop()
|
||||
self._loop = loop
|
||||
asyncio.set_event_loop(loop)
|
||||
self._ready.set()
|
||||
try:
|
||||
loop.run_forever()
|
||||
finally:
|
||||
try:
|
||||
loop.close()
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
def run(self, coro, *, timeout: Optional[float] = None) -> Any:
|
||||
"""Submit a coroutine to the loop and block until done.
|
||||
|
||||
Returns the coroutine's result, or raises its exception.
|
||||
"""
|
||||
if self._loop is None:
|
||||
raise RuntimeError("background loop not started")
|
||||
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
try:
|
||||
return fut.result(timeout=timeout)
|
||||
except Exception:
|
||||
fut.cancel()
|
||||
raise
|
||||
|
||||
def stop(self) -> None:
|
||||
loop = self._loop
|
||||
if loop is None:
|
||||
return
|
||||
try:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
except RuntimeError:
|
||||
pass
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=2.0)
|
||||
self._loop = None
|
||||
self._thread = None
|
||||
|
||||
|
||||
class LSPService:
|
||||
"""The process-wide LSP service.
|
||||
|
||||
Created once via :meth:`create_from_config`; the
|
||||
:func:`agent.lsp.get_service` accessor manages the singleton.
|
||||
Most callers should use that accessor rather than constructing
|
||||
:class:`LSPService` directly.
|
||||
"""
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# construction + factory
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
enabled: bool,
|
||||
wait_mode: str,
|
||||
wait_timeout: float,
|
||||
install_strategy: str,
|
||||
binary_overrides: Optional[Dict[str, List[str]]] = None,
|
||||
env_overrides: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
init_overrides: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
disabled_servers: Optional[List[str]] = None,
|
||||
idle_timeout: float = DEFAULT_IDLE_TIMEOUT,
|
||||
) -> None:
|
||||
self._enabled = enabled
|
||||
self._wait_mode = wait_mode if wait_mode in ("document", "full") else "document"
|
||||
self._wait_timeout = wait_timeout
|
||||
self._install_strategy = install_strategy
|
||||
self._binary_overrides = binary_overrides or {}
|
||||
self._env_overrides = env_overrides or {}
|
||||
self._init_overrides = init_overrides or {}
|
||||
self._disabled_servers = set(disabled_servers or [])
|
||||
self._idle_timeout = idle_timeout
|
||||
|
||||
self._loop = _BackgroundLoop()
|
||||
if self._enabled:
|
||||
self._loop.start()
|
||||
|
||||
# Per-(server_id, workspace_root) state
|
||||
self._clients: Dict[Tuple[str, str], LSPClient] = {}
|
||||
self._broken: set = set()
|
||||
self._spawning: Dict[Tuple[str, str], asyncio.Future] = {}
|
||||
self._last_used: Dict[Tuple[str, str], float] = {}
|
||||
self._state_lock = threading.Lock()
|
||||
|
||||
# Delta baseline: file path → snapshot of diagnostics taken
|
||||
# immediately before a write. ``get_diagnostics_sync`` filters
|
||||
# out anything in the baseline so the agent only sees errors
|
||||
# introduced by the current edit.
|
||||
self._delta_baseline: Dict[str, List[Dict[str, Any]]] = {}
|
||||
|
||||
@classmethod
|
||||
def create_from_config(cls) -> Optional["LSPService"]:
|
||||
"""Build a service from ``hermes_cli.config`` settings.
|
||||
|
||||
Returns ``None`` if the config can't be loaded. The service
|
||||
itself returns ``is_active()`` False when LSP is disabled.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP config load failed: %s", e)
|
||||
return None
|
||||
|
||||
lsp_cfg = (cfg.get("lsp") or {}) if isinstance(cfg, dict) else {}
|
||||
if not isinstance(lsp_cfg, dict):
|
||||
lsp_cfg = {}
|
||||
|
||||
enabled = bool(lsp_cfg.get("enabled", True))
|
||||
wait_mode = lsp_cfg.get("wait_mode", "document")
|
||||
wait_timeout = float(lsp_cfg.get("wait_timeout", DIAGNOSTICS_DOCUMENT_WAIT))
|
||||
install_strategy = lsp_cfg.get("install_strategy", "auto")
|
||||
servers_cfg = lsp_cfg.get("servers") or {}
|
||||
disabled = []
|
||||
binary_overrides: Dict[str, List[str]] = {}
|
||||
env_overrides: Dict[str, Dict[str, str]] = {}
|
||||
init_overrides: Dict[str, Dict[str, Any]] = {}
|
||||
if isinstance(servers_cfg, dict):
|
||||
for name, sub in servers_cfg.items():
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
if sub.get("disabled"):
|
||||
disabled.append(name)
|
||||
cmd = sub.get("command")
|
||||
if isinstance(cmd, list) and cmd:
|
||||
binary_overrides[name] = cmd
|
||||
env = sub.get("env")
|
||||
if isinstance(env, dict):
|
||||
env_overrides[name] = {k: str(v) for k, v in env.items()}
|
||||
init = sub.get("initialization_options")
|
||||
if isinstance(init, dict):
|
||||
init_overrides[name] = init
|
||||
|
||||
return cls(
|
||||
enabled=enabled,
|
||||
wait_mode=wait_mode,
|
||||
wait_timeout=wait_timeout,
|
||||
install_strategy=install_strategy,
|
||||
binary_overrides=binary_overrides,
|
||||
env_overrides=env_overrides,
|
||||
init_overrides=init_overrides,
|
||||
disabled_servers=disabled,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def is_active(self) -> bool:
|
||||
"""Return True iff this service should be consulted at all."""
|
||||
return self._enabled
|
||||
|
||||
def enabled_for(self, file_path: str) -> bool:
|
||||
"""Return True iff LSP should run for this specific file.
|
||||
|
||||
Gates on workspace detection (file or cwd inside a git worktree),
|
||||
on whether any registered server matches the extension, and
|
||||
on whether the (server_id, workspace_root) pair is in the
|
||||
broken-set from a previous spawn failure.
|
||||
|
||||
Files in already-broken pairs return False so the file_operations
|
||||
layer skips the LSP path entirely — no spawn attempts, no
|
||||
timeout cost — until the service is restarted (``hermes lsp
|
||||
restart``) or the process exits.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return False
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None or srv.server_id in self._disabled_servers:
|
||||
return False
|
||||
ws_root, gated_in = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated_in):
|
||||
return False
|
||||
# Broken-set short-circuit. Use the per-server root if we can
|
||||
# compute one cheaply; otherwise fall back to the workspace
|
||||
# root as the broken key (which is what _get_or_spawn would
|
||||
# have used anyway when it failed).
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
if (srv.server_id, per_server_root) in self._broken:
|
||||
return False
|
||||
return True
|
||||
|
||||
def snapshot_baseline(self, file_path: str) -> None:
|
||||
"""Snapshot current diagnostics for ``file_path`` as the delta baseline.
|
||||
|
||||
Called BEFORE a write so the next ``get_diagnostics_sync()``
|
||||
can filter out pre-existing errors. Best-effort — failures
|
||||
are silently swallowed so a flaky server can't break a write.
|
||||
|
||||
Outer timeouts (e.g. server hangs during initialize) mark the
|
||||
(server_id, workspace_root) pair as broken so subsequent edits
|
||||
skip it instantly instead of re-paying the timeout cost.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return
|
||||
try:
|
||||
diags = self._loop.run(self._snapshot_async(file_path), timeout=8.0)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = diags or []
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("baseline snapshot failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
self._delta_baseline[os.path.abspath(file_path)] = []
|
||||
|
||||
def get_diagnostics_sync(
|
||||
self,
|
||||
file_path: str,
|
||||
*,
|
||||
delta: bool = True,
|
||||
timeout: Optional[float] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Synchronously open ``file_path`` in the right server, wait for
|
||||
diagnostics, return them.
|
||||
|
||||
If ``delta`` is True (default), the result is filtered against
|
||||
any baseline previously captured via :meth:`snapshot_baseline`.
|
||||
Diagnostics present in the baseline are removed so the caller
|
||||
only sees errors introduced by the current edit.
|
||||
|
||||
Returns an empty list when LSP is disabled, when no workspace
|
||||
can be detected, when no server matches, or when the server
|
||||
can't be spawned. Never raises.
|
||||
"""
|
||||
if not self.enabled_for(file_path):
|
||||
return []
|
||||
|
||||
# Resolve server_id eagerly so we can emit structured logs even
|
||||
# when the request errors out below.
|
||||
srv = find_server_for_file(file_path)
|
||||
server_id = srv.server_id if srv else "?"
|
||||
|
||||
try:
|
||||
t = timeout if timeout is not None else self._wait_timeout + 2.0
|
||||
diags = self._loop.run(self._open_and_wait_async(file_path), timeout=t) or []
|
||||
except asyncio.TimeoutError as e:
|
||||
eventlog.log_timeout(server_id, file_path)
|
||||
logger.debug("LSP diagnostics timeout for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_server_error(server_id, file_path, e)
|
||||
logger.debug("LSP diagnostics fetch failed for %s: %s", file_path, e)
|
||||
self._mark_broken_for_file(file_path, e)
|
||||
return []
|
||||
|
||||
abs_path = os.path.abspath(file_path)
|
||||
if delta:
|
||||
baseline = self._delta_baseline.get(abs_path) or []
|
||||
if baseline:
|
||||
seen = {_diag_key(d) for d in baseline}
|
||||
diags = [d for d in diags if _diag_key(d) not in seen]
|
||||
# Roll baseline forward — next call returns deltas relative
|
||||
# to the just-emitted state, mirroring claude-code's
|
||||
# diagnosticTracking.
|
||||
try:
|
||||
fresh = self._loop.run(self._current_diags_async(file_path), timeout=2.0) or []
|
||||
except Exception: # noqa: BLE001
|
||||
fresh = []
|
||||
if fresh:
|
||||
self._delta_baseline[abs_path] = fresh
|
||||
|
||||
if diags:
|
||||
eventlog.log_diagnostics(server_id, file_path, len(diags))
|
||||
else:
|
||||
eventlog.log_clean(server_id, file_path)
|
||||
return diags
|
||||
|
||||
def _mark_broken_for_file(self, file_path: str, exc: BaseException) -> None:
|
||||
"""Mark the (server_id, workspace_root) pair as broken so subsequent
|
||||
edits skip it instantly instead of re-paying timeout cost.
|
||||
|
||||
Called when the outer ``_loop.run`` timeout cancels an in-flight
|
||||
spawn/initialize that the inner ``_get_or_spawn`` task was still
|
||||
holding open. Without this, every subsequent write would re-enter
|
||||
the spawn path and re-pay the full ``snapshot_baseline``
|
||||
timeout (8s) until the binary is fixed.
|
||||
|
||||
Also kills any orphan client process that survived the cancelled
|
||||
future, and emits a single eventlog WARNING so the user knows
|
||||
which server gave up.
|
||||
|
||||
``exc`` is whatever exception the outer wrapper caught — used
|
||||
only for logging, never re-raised.
|
||||
"""
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
return
|
||||
try:
|
||||
per_server_root = srv.resolve_root(file_path, ws_root) or ws_root
|
||||
except Exception: # noqa: BLE001
|
||||
per_server_root = ws_root
|
||||
key = (srv.server_id, per_server_root)
|
||||
already_broken = key in self._broken
|
||||
self._broken.add(key)
|
||||
|
||||
# Kill any client we managed to spawn before the timeout. The
|
||||
# cancelled future never reached the broken-set add inside
|
||||
# ``_get_or_spawn`` so the client may still be hanging in
|
||||
# ``_clients`` with a half-initialized state.
|
||||
with self._state_lock:
|
||||
client = self._clients.pop(key, None)
|
||||
if client is not None:
|
||||
try:
|
||||
# Fire-and-forget shutdown — give it a second to cleanup,
|
||||
# but don't block. We're already on a slow path.
|
||||
self._loop.run(client.shutdown(), timeout=1.0)
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
|
||||
if not already_broken:
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, exc)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Tear down all clients and stop the background loop."""
|
||||
if not self._enabled:
|
||||
return
|
||||
try:
|
||||
self._loop.run(self._shutdown_async(), timeout=10.0)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("LSP shutdown error: %s", e)
|
||||
self._loop.stop()
|
||||
clear_cache()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# async internals
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _snapshot_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("snapshot open/wait failed: %s", e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _open_and_wait_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
client = await self._get_or_spawn(file_path)
|
||||
if client is None:
|
||||
return []
|
||||
try:
|
||||
version = await client.open_file(file_path, language_id=language_id_for(file_path))
|
||||
await client.save_file(file_path)
|
||||
await client.wait_for_diagnostics(file_path, version, mode=self._wait_mode)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("open/wait failed for %s: %s", file_path, e)
|
||||
return []
|
||||
self._last_used[(client.server_id, client.workspace_root)] = time.time()
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _current_diags_async(self, file_path: str) -> List[Dict[str, Any]]:
|
||||
ws, gated = resolve_workspace_for_file(file_path)
|
||||
srv = find_server_for_file(file_path)
|
||||
if not (ws and gated and srv):
|
||||
return []
|
||||
with self._state_lock:
|
||||
client = self._clients.get((srv.server_id, ws))
|
||||
if client is None:
|
||||
return []
|
||||
return list(client.diagnostics_for(file_path))
|
||||
|
||||
async def _get_or_spawn(self, file_path: str) -> Optional[LSPClient]:
|
||||
srv = find_server_for_file(file_path)
|
||||
if srv is None:
|
||||
return None
|
||||
if srv.server_id in self._disabled_servers:
|
||||
eventlog.log_disabled(srv.server_id, file_path, "disabled in config")
|
||||
return None
|
||||
ws_root, gated = resolve_workspace_for_file(file_path)
|
||||
if not (ws_root and gated):
|
||||
eventlog.log_no_project_root(srv.server_id, file_path)
|
||||
return None
|
||||
per_server_root = srv.resolve_root(file_path, ws_root)
|
||||
if per_server_root is None:
|
||||
eventlog.log_disabled(
|
||||
srv.server_id, file_path, "exclude marker hit (server gated off)"
|
||||
)
|
||||
return None # exclude marker hit, server gated off
|
||||
|
||||
key = (srv.server_id, per_server_root)
|
||||
if key in self._broken:
|
||||
return None
|
||||
with self._state_lock:
|
||||
client = self._clients.get(key)
|
||||
if client is not None and client.is_running:
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
return client
|
||||
spawning = self._spawning.get(key)
|
||||
if spawning is not None:
|
||||
try:
|
||||
return await spawning
|
||||
except Exception: # noqa: BLE001
|
||||
return None
|
||||
|
||||
# Begin spawn
|
||||
loop = asyncio.get_running_loop()
|
||||
spawn_future: asyncio.Future = loop.create_future()
|
||||
with self._state_lock:
|
||||
self._spawning[key] = spawn_future
|
||||
try:
|
||||
ctx = ServerContext(
|
||||
workspace_root=per_server_root,
|
||||
install_strategy=self._install_strategy,
|
||||
binary_overrides=self._binary_overrides,
|
||||
env_overrides=self._env_overrides,
|
||||
init_overrides=self._init_overrides,
|
||||
)
|
||||
spec = srv.build_spawn(per_server_root, ctx)
|
||||
if spec is None:
|
||||
# ``build_spawn`` returns None when the binary can't be
|
||||
# located (auto-install disabled, manual-only server,
|
||||
# or install attempt failed). Surface this once via
|
||||
# the structured logger so the user can act on it.
|
||||
eventlog.log_server_unavailable(srv.server_id, srv.server_id)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
client = LSPClient(
|
||||
server_id=srv.server_id,
|
||||
workspace_root=spec.workspace_root,
|
||||
command=spec.command,
|
||||
env=spec.env,
|
||||
cwd=spec.cwd,
|
||||
initialization_options=spec.initialization_options,
|
||||
seed_diagnostics_on_first_push=spec.seed_diagnostics_on_first_push or srv.seed_first_push,
|
||||
)
|
||||
try:
|
||||
await client.start()
|
||||
except Exception as e: # noqa: BLE001
|
||||
eventlog.log_spawn_failed(srv.server_id, per_server_root, e)
|
||||
self._broken.add(key)
|
||||
spawn_future.set_result(None)
|
||||
return None
|
||||
with self._state_lock:
|
||||
self._clients[key] = client
|
||||
self._last_used[key] = time.time()
|
||||
eventlog.log_active(srv.server_id, per_server_root)
|
||||
spawn_future.set_result(client)
|
||||
return client
|
||||
finally:
|
||||
with self._state_lock:
|
||||
self._spawning.pop(key, None)
|
||||
|
||||
async def _shutdown_async(self) -> None:
|
||||
with self._state_lock:
|
||||
clients = list(self._clients.values())
|
||||
self._clients.clear()
|
||||
self._broken.clear()
|
||||
self._last_used.clear()
|
||||
await asyncio.gather(
|
||||
*(c.shutdown() for c in clients),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# status / introspection (used by ``hermes lsp status``)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""Return a snapshot of the service for the CLI status command."""
|
||||
with self._state_lock:
|
||||
clients = [
|
||||
{
|
||||
"server_id": k[0],
|
||||
"workspace_root": k[1],
|
||||
"state": c.state,
|
||||
"running": c.is_running,
|
||||
}
|
||||
for k, c in self._clients.items()
|
||||
]
|
||||
broken = list(self._broken)
|
||||
return {
|
||||
"enabled": self._enabled,
|
||||
"wait_mode": self._wait_mode,
|
||||
"wait_timeout": self._wait_timeout,
|
||||
"install_strategy": self._install_strategy,
|
||||
"clients": clients,
|
||||
"broken": broken,
|
||||
"disabled_servers": sorted(self._disabled_servers),
|
||||
}
|
||||
|
||||
|
||||
def _diag_key(d: Dict[str, Any]) -> str:
|
||||
"""Content equality key used for delta filtering. Mirrors
|
||||
:func:`agent.lsp.client._diagnostic_key`."""
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
end = rng.get("end") or {}
|
||||
code = d.get("code")
|
||||
if code is not None and not isinstance(code, str):
|
||||
code = str(code)
|
||||
return "\x00".join(
|
||||
[
|
||||
str(d.get("severity") or 1),
|
||||
str(code or ""),
|
||||
str(d.get("source") or ""),
|
||||
str(d.get("message") or "").strip(),
|
||||
f"{start.get('line', 0)}:{start.get('character', 0)}-{end.get('line', 0)}:{end.get('character', 0)}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["LSPService"]
|
||||
196
agent/lsp/protocol.py
Normal file
196
agent/lsp/protocol.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
"""Minimal LSP JSON-RPC 2.0 framer over async streams.
|
||||
|
||||
LSP wire format:
|
||||
|
||||
Content-Length: <bytes>\\r\\n
|
||||
\\r\\n
|
||||
<utf-8 JSON body>
|
||||
|
||||
The body is a JSON-RPC 2.0 envelope: request, response, or notification.
|
||||
|
||||
This module replaces what ``vscode-jsonrpc/node`` would do in a
|
||||
TypeScript implementation. We keep it deliberately small — just the
|
||||
framer + envelope helpers — so :class:`agent.lsp.client.LSPClient` can
|
||||
focus on protocol semantics.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.protocol")
|
||||
|
||||
# LSP error codes we care about. Full list in
|
||||
# https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#errorCodes
|
||||
ERROR_CONTENT_MODIFIED = -32801
|
||||
ERROR_REQUEST_CANCELLED = -32800
|
||||
ERROR_METHOD_NOT_FOUND = -32601
|
||||
|
||||
|
||||
class LSPProtocolError(Exception):
|
||||
"""Raised when the wire protocol is violated.
|
||||
|
||||
Distinct from :class:`LSPRequestError` which represents a server
|
||||
returning a JSON-RPC error response — that's protocol-conformant.
|
||||
This exception means the framing or envelope itself is broken.
|
||||
"""
|
||||
|
||||
|
||||
class LSPRequestError(Exception):
|
||||
"""Raised when an LSP request returns an error response.
|
||||
|
||||
Carries the JSON-RPC ``code``, ``message``, and optional ``data``.
|
||||
"""
|
||||
|
||||
def __init__(self, code: int, message: str, data: Any = None) -> None:
|
||||
super().__init__(f"LSP error {code}: {message}")
|
||||
self.code = code
|
||||
self.message = message
|
||||
self.data = data
|
||||
|
||||
|
||||
def encode_message(obj: dict) -> bytes:
|
||||
"""Encode a JSON-RPC envelope as a Content-Length framed byte string.
|
||||
|
||||
The body is encoded as compact UTF-8 JSON (no spaces between
|
||||
separators) — matches what ``vscode-jsonrpc`` emits and keeps the
|
||||
Content-Length count exact.
|
||||
"""
|
||||
body = json.dumps(obj, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||
header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii")
|
||||
return header + body
|
||||
|
||||
|
||||
async def read_message(reader: asyncio.StreamReader) -> Optional[dict]:
|
||||
"""Read one Content-Length framed JSON-RPC message from the stream.
|
||||
|
||||
Returns ``None`` on clean EOF (server closed stdout cleanly between
|
||||
messages — typical shutdown). Raises :class:`LSPProtocolError` on
|
||||
malformed framing.
|
||||
|
||||
The reader is advanced to just past the JSON body on success.
|
||||
"""
|
||||
headers: dict = {}
|
||||
header_bytes = 0
|
||||
while True:
|
||||
try:
|
||||
line = await reader.readuntil(b"\r\n")
|
||||
except asyncio.IncompleteReadError as e:
|
||||
# EOF while reading headers. If we hadn't started a header
|
||||
# block, treat as clean EOF; otherwise the framing is bad.
|
||||
if not e.partial and not headers:
|
||||
return None
|
||||
raise LSPProtocolError(
|
||||
f"unexpected EOF while reading LSP headers (partial={e.partial!r})"
|
||||
) from e
|
||||
# Defensive cap against a server streaming headers without ever
|
||||
# emitting CRLF-CRLF. Caps total header bytes at 8 KiB — a
|
||||
# well-behaved server fits in well under 200 bytes.
|
||||
header_bytes += len(line)
|
||||
if header_bytes > 8192:
|
||||
raise LSPProtocolError(
|
||||
f"LSP header block exceeded 8 KiB without terminator"
|
||||
)
|
||||
line = line[:-2] # strip CRLF
|
||||
if not line:
|
||||
break # blank line ends header block
|
||||
try:
|
||||
key, _, value = line.decode("ascii").partition(":")
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-ASCII LSP header: {line!r}") from e
|
||||
if not key:
|
||||
raise LSPProtocolError(f"malformed LSP header line: {line!r}")
|
||||
headers[key.strip().lower()] = value.strip()
|
||||
|
||||
cl = headers.get("content-length")
|
||||
if cl is None:
|
||||
raise LSPProtocolError(f"LSP message missing Content-Length: {headers!r}")
|
||||
try:
|
||||
n = int(cl)
|
||||
except ValueError as e:
|
||||
raise LSPProtocolError(f"non-integer Content-Length: {cl!r}") from e
|
||||
if n < 0 or n > 64 * 1024 * 1024: # 64 MiB sanity cap
|
||||
raise LSPProtocolError(f"unreasonable Content-Length: {n}")
|
||||
|
||||
try:
|
||||
body = await reader.readexactly(n)
|
||||
except asyncio.IncompleteReadError as e:
|
||||
raise LSPProtocolError(
|
||||
f"truncated LSP body: expected {n} bytes, got {len(e.partial)}"
|
||||
) from e
|
||||
|
||||
try:
|
||||
return json.loads(body.decode("utf-8"))
|
||||
except json.JSONDecodeError as e:
|
||||
raise LSPProtocolError(f"invalid JSON in LSP body: {e}") from e
|
||||
except UnicodeDecodeError as e:
|
||||
raise LSPProtocolError(f"non-UTF-8 LSP body: {e}") from e
|
||||
|
||||
|
||||
def make_request(req_id: int, method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 request envelope."""
|
||||
msg: dict = {"jsonrpc": "2.0", "id": req_id, "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_notification(method: str, params: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 notification envelope (no ``id``)."""
|
||||
msg: dict = {"jsonrpc": "2.0", "method": method}
|
||||
if params is not None:
|
||||
msg["params"] = params
|
||||
return msg
|
||||
|
||||
|
||||
def make_response(req_id: Any, result: Any) -> dict:
|
||||
"""Build a JSON-RPC 2.0 success response envelope."""
|
||||
return {"jsonrpc": "2.0", "id": req_id, "result": result}
|
||||
|
||||
|
||||
def make_error_response(req_id: Any, code: int, message: str, data: Any = None) -> dict:
|
||||
"""Build a JSON-RPC 2.0 error response envelope."""
|
||||
err: dict = {"code": code, "message": message}
|
||||
if data is not None:
|
||||
err["data"] = data
|
||||
return {"jsonrpc": "2.0", "id": req_id, "error": err}
|
||||
|
||||
|
||||
def classify_message(msg: dict) -> Tuple[str, Any]:
|
||||
"""Return ``(kind, key)`` where kind is one of ``request``,
|
||||
``response``, ``notification``, ``invalid``.
|
||||
|
||||
The key is the request id for request/response, the method name
|
||||
for notifications, and ``None`` for invalid messages.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return "invalid", None
|
||||
if msg.get("jsonrpc") != "2.0":
|
||||
return "invalid", None
|
||||
has_id = "id" in msg
|
||||
has_method = "method" in msg
|
||||
if has_id and has_method:
|
||||
return "request", msg["id"]
|
||||
if has_id and ("result" in msg or "error" in msg):
|
||||
return "response", msg["id"]
|
||||
if has_method and not has_id:
|
||||
return "notification", msg["method"]
|
||||
return "invalid", None
|
||||
|
||||
|
||||
__all__ = [
|
||||
"ERROR_CONTENT_MODIFIED",
|
||||
"ERROR_REQUEST_CANCELLED",
|
||||
"ERROR_METHOD_NOT_FOUND",
|
||||
"LSPProtocolError",
|
||||
"LSPRequestError",
|
||||
"encode_message",
|
||||
"read_message",
|
||||
"make_request",
|
||||
"make_notification",
|
||||
"make_response",
|
||||
"make_error_response",
|
||||
"classify_message",
|
||||
]
|
||||
78
agent/lsp/reporter.py
Normal file
78
agent/lsp/reporter.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
"""Format LSP diagnostics for inclusion in tool output.
|
||||
|
||||
The model sees a compact, severity-filtered, line-bounded summary of
|
||||
diagnostics introduced by the latest edit. Format matches what
|
||||
OpenCode's ``lsp/diagnostic.ts`` and Claude Code's
|
||||
``formatDiagnosticsSummary`` produce — ``<diagnostics>`` blocks with
|
||||
1-indexed line/column, capped at ``MAX_PER_FILE`` errors.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Severity-1 only by default — warnings/info/hints would flood the
|
||||
# agent. Lift this in config under ``lsp.severities`` if needed.
|
||||
SEVERITY_NAMES = {1: "ERROR", 2: "WARN", 3: "INFO", 4: "HINT"}
|
||||
DEFAULT_SEVERITIES = frozenset({1}) # ERROR only
|
||||
|
||||
MAX_PER_FILE = 20
|
||||
MAX_TOTAL_CHARS = 4000
|
||||
|
||||
|
||||
def format_diagnostic(d: Dict[str, Any]) -> str:
|
||||
"""One-line representation of a single diagnostic."""
|
||||
sev = SEVERITY_NAMES.get(d.get("severity") or 1, "ERROR")
|
||||
rng = d.get("range") or {}
|
||||
start = rng.get("start") or {}
|
||||
line = int(start.get("line", 0)) + 1
|
||||
col = int(start.get("character", 0)) + 1
|
||||
msg = str(d.get("message") or "").rstrip()
|
||||
code = d.get("code")
|
||||
code_part = f" [{code}]" if code not in (None, "") else ""
|
||||
source = d.get("source")
|
||||
source_part = f" ({source})" if source else ""
|
||||
return f"{sev} [{line}:{col}] {msg}{code_part}{source_part}"
|
||||
|
||||
|
||||
def report_for_file(
|
||||
file_path: str,
|
||||
diagnostics: List[Dict[str, Any]],
|
||||
*,
|
||||
severities: frozenset = DEFAULT_SEVERITIES,
|
||||
max_per_file: int = MAX_PER_FILE,
|
||||
) -> str:
|
||||
"""Build a ``<diagnostics file=...>`` block for one file.
|
||||
|
||||
Returns an empty string when no diagnostics pass the severity
|
||||
filter, so callers can do ``if block:`` to skip empty cases.
|
||||
"""
|
||||
if not diagnostics:
|
||||
return ""
|
||||
filtered = [d for d in diagnostics if (d.get("severity") or 1) in severities]
|
||||
if not filtered:
|
||||
return ""
|
||||
limited = filtered[:max_per_file]
|
||||
extra = len(filtered) - len(limited)
|
||||
lines = [format_diagnostic(d) for d in limited]
|
||||
body = "\n".join(lines)
|
||||
if extra > 0:
|
||||
body += f"\n... and {extra} more"
|
||||
return f"<diagnostics file=\"{file_path}\">\n{body}\n</diagnostics>"
|
||||
|
||||
|
||||
def truncate(s: str, *, limit: int = MAX_TOTAL_CHARS) -> str:
|
||||
"""Hard-cap a formatted summary string."""
|
||||
if len(s) <= limit:
|
||||
return s
|
||||
marker = "\n…[truncated]"
|
||||
return s[: limit - len(marker)] + marker
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SEVERITY_NAMES",
|
||||
"DEFAULT_SEVERITIES",
|
||||
"MAX_PER_FILE",
|
||||
"format_diagnostic",
|
||||
"report_for_file",
|
||||
"truncate",
|
||||
]
|
||||
1040
agent/lsp/servers.py
Normal file
1040
agent/lsp/servers.py
Normal file
File diff suppressed because it is too large
Load diff
223
agent/lsp/workspace.py
Normal file
223
agent/lsp/workspace.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
"""Workspace and project-root resolution for LSP.
|
||||
|
||||
Two concerns live here:
|
||||
|
||||
1. **Workspace gate** — the upper-level "is this directory a project?"
|
||||
check. Hermes only runs LSP when the cwd (or the file being edited)
|
||||
sits inside a git worktree. Files outside any git root never
|
||||
trigger LSP, even if a server is configured. This keeps Telegram
|
||||
gateway users on user-home cwd's from spawning daemons.
|
||||
|
||||
2. **NearestRoot** — the per-server project-root walk. Each language
|
||||
server cares about a different marker (``pyproject.toml`` for
|
||||
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
|
||||
wants the directory containing that marker. ``nearest_root()``
|
||||
walks up from a starting path looking for any of a list of marker
|
||||
files, optionally bailing if an exclude marker shows up first.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("agent.lsp.workspace")
|
||||
|
||||
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
|
||||
# Cleared on shutdown. Keyed by absolute resolved path so symlink
|
||||
# folds collapse to one entry.
|
||||
_workspace_cache: dict = {}
|
||||
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
"""Normalize a path for use as a stable map key.
|
||||
|
||||
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
|
||||
NOT resolve symlinks here — symlink stability matters for some
|
||||
LSP servers (rust-analyzer cares about Cargo workspace identity)
|
||||
and we want the canonical path the user typed when possible.
|
||||
"""
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
|
||||
def find_git_worktree(start: str) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
|
||||
|
||||
Returns the directory containing ``.git``, or ``None`` if no git
|
||||
root is found before hitting the filesystem root.
|
||||
|
||||
A ``.git`` *file* (not directory) means we're inside a git
|
||||
worktree set up via ``git worktree add`` — both forms count.
|
||||
"""
|
||||
try:
|
||||
start_path = Path(normalize_path(start))
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
# Pathological input (loop in symlinks, encoding error, etc.) —
|
||||
# bail out rather than crash the lint hook.
|
||||
return None
|
||||
|
||||
# Cache check
|
||||
cached = _workspace_cache.get(str(start_path))
|
||||
if cached is not None:
|
||||
root, _is_git = cached
|
||||
return root
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap: the deepest reasonable monorepo is well under 64
|
||||
# levels. Caps the walk so a pathological cwd or a symlink cycle
|
||||
# we somehow traverse can't keep us looping.
|
||||
for _ in range(64):
|
||||
git_marker = cur / ".git"
|
||||
try:
|
||||
if git_marker.exists():
|
||||
resolved = str(cur)
|
||||
_workspace_cache[str(start_path)] = (resolved, True)
|
||||
return resolved
|
||||
except OSError:
|
||||
# Permission error on a parent dir — bail out cleanly.
|
||||
break
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
break
|
||||
cur = parent
|
||||
|
||||
_workspace_cache[str(start_path)] = (None, False)
|
||||
return None
|
||||
|
||||
|
||||
def is_inside_workspace(path: str, workspace_root: str) -> bool:
|
||||
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
|
||||
|
||||
Uses absolute paths but does not resolve symlinks — a file accessed
|
||||
via a symlink that points outside the workspace still counts as
|
||||
outside. This is the conservative interpretation; matches LSP
|
||||
behaviour where servers reject didOpen for unrelated files.
|
||||
"""
|
||||
p = normalize_path(path)
|
||||
root = normalize_path(workspace_root)
|
||||
if p == root:
|
||||
return True
|
||||
# Use os.path.commonpath to handle case-insensitive filesystems
|
||||
# correctly on macOS/Windows.
|
||||
try:
|
||||
common = os.path.commonpath([p, root])
|
||||
except ValueError:
|
||||
# Different drives on Windows.
|
||||
return False
|
||||
return common == root
|
||||
|
||||
|
||||
def nearest_root(
|
||||
start: str,
|
||||
markers: Iterable[str],
|
||||
*,
|
||||
excludes: Optional[Iterable[str]] = None,
|
||||
ceiling: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Walk up from ``start`` looking for any of the given marker files.
|
||||
|
||||
Returns the **directory containing** the first matched marker, or
|
||||
``None`` if no marker is found before hitting ``ceiling`` (or the
|
||||
filesystem root if no ceiling).
|
||||
|
||||
If ``excludes`` is provided and an exclude marker matches *first*
|
||||
in the upward walk, returns ``None`` — the server is gated off
|
||||
for that file. Mirrors OpenCode's NearestRoot exclude semantics
|
||||
(e.g. typescript skips deno projects when ``deno.json`` is found
|
||||
before ``package.json``).
|
||||
"""
|
||||
start_path = Path(normalize_path(start))
|
||||
try:
|
||||
if start_path.is_file():
|
||||
start_path = start_path.parent
|
||||
except (OSError, RuntimeError, ValueError):
|
||||
return None
|
||||
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
|
||||
|
||||
markers_list = list(markers)
|
||||
excludes_list = list(excludes) if excludes else []
|
||||
|
||||
cur = start_path
|
||||
# Defensive cap matching ``find_git_worktree``. Bounded walk
|
||||
# protects against pathological inputs even though the
|
||||
# parent-equality stop normally terminates within ~10 steps.
|
||||
for _ in range(64):
|
||||
# Check excludes first — if an exclude is found at this level,
|
||||
# the server is gated off for this file.
|
||||
for exc in excludes_list:
|
||||
try:
|
||||
if (cur / exc).exists():
|
||||
return None
|
||||
except OSError:
|
||||
continue
|
||||
# Then check markers.
|
||||
for marker in markers_list:
|
||||
try:
|
||||
if (cur / marker).exists():
|
||||
return str(cur)
|
||||
except OSError:
|
||||
continue
|
||||
# Stop conditions.
|
||||
if ceiling_path is not None and cur == ceiling_path:
|
||||
return None
|
||||
parent = cur.parent
|
||||
if parent == cur:
|
||||
return None
|
||||
cur = parent
|
||||
return None
|
||||
|
||||
|
||||
def resolve_workspace_for_file(
|
||||
file_path: str,
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
) -> Tuple[Optional[str], bool]:
|
||||
"""Resolve the workspace root for a file.
|
||||
|
||||
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
|
||||
iff LSP should run for this file at all. Currently the gate is
|
||||
"file is inside a git worktree found by walking up from cwd OR
|
||||
from the file itself".
|
||||
|
||||
The cwd path takes precedence — if the agent was launched in a
|
||||
git project, that worktree is the workspace, and any edit inside
|
||||
it (regardless of where the file lives) is in-scope. If the cwd
|
||||
isn't in a git worktree, we try the file's own location as a
|
||||
fallback.
|
||||
|
||||
Returns ``(None, False)`` when neither path is in a git worktree.
|
||||
"""
|
||||
cwd = cwd or os.getcwd()
|
||||
cwd_root = find_git_worktree(cwd)
|
||||
if cwd_root is not None:
|
||||
if is_inside_workspace(file_path, cwd_root):
|
||||
return cwd_root, True
|
||||
# File is outside the cwd's worktree — try the file's own
|
||||
# location as a secondary anchor. Useful for monorepos where
|
||||
# the user opens an unrelated checkout.
|
||||
file_root = find_git_worktree(file_path)
|
||||
if file_root is not None:
|
||||
return file_root, True
|
||||
return None, False
|
||||
|
||||
|
||||
def clear_cache() -> None:
|
||||
"""Clear the workspace-resolution cache.
|
||||
|
||||
Called on service shutdown so a subsequent re-init doesn't pick
|
||||
up stale results from a previous session.
|
||||
"""
|
||||
_workspace_cache.clear()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"find_git_worktree",
|
||||
"is_inside_workspace",
|
||||
"nearest_root",
|
||||
"normalize_path",
|
||||
"resolve_workspace_for_file",
|
||||
"clear_cache",
|
||||
]
|
||||
|
|
@ -10,7 +10,7 @@ import os
|
|||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
|
@ -1330,27 +1330,66 @@ def _resolve_codex_oauth_context_length(
|
|||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
def _resolve_nous_context_length(
|
||||
model: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
) -> Tuple[Optional[int], str]:
|
||||
"""Resolve Nous Portal model context length.
|
||||
|
||||
Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
|
||||
prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
|
||||
with version normalization (dot↔dash).
|
||||
Tries the live Nous inference endpoint first (authoritative), then falls
|
||||
back to OpenRouter metadata with suffix/version matching.
|
||||
|
||||
Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
|
||||
'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
|
||||
'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6'). Version
|
||||
normalization (dot↔dash) is applied to handle name drifts.
|
||||
|
||||
Returns ``(context_length, source)`` where ``source`` is one of:
|
||||
- ``"portal"`` — live /v1/models response (authoritative)
|
||||
- ``"openrouter"`` — OpenRouter cache fallback (non-authoritative;
|
||||
callers must NOT persist this to the on-disk cache or a single
|
||||
portal blip will freeze the wrong value in forever)
|
||||
- ``""`` — could not resolve
|
||||
"""
|
||||
metadata = fetch_model_metadata() # OpenRouter cache
|
||||
# Exact match first
|
||||
# Portal first — the Nous /models endpoint is authoritative for what our
|
||||
# infrastructure enforces and may differ from OR (e.g. OR reports 1M for
|
||||
# qwen3.6-plus; the portal correctly says 262144). Fall back to the OR
|
||||
# catalog only if the portal doesn't list the model.
|
||||
if base_url:
|
||||
portal_ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if portal_ctx is not None:
|
||||
return portal_ctx, "portal"
|
||||
|
||||
metadata = fetch_model_metadata()
|
||||
|
||||
def _safe_ctx(or_id: str, entry: dict) -> Optional[int]:
|
||||
ctx = entry.get("context_length")
|
||||
if ctx is None:
|
||||
return None
|
||||
if ctx <= 32768 and _model_name_suggests_kimi(or_id):
|
||||
logger.info(
|
||||
"Rejecting OpenRouter metadata context=%s for %r "
|
||||
"(Kimi-family underreport, Nous path); falling through to hardcoded defaults",
|
||||
ctx, or_id,
|
||||
)
|
||||
return None
|
||||
return ctx
|
||||
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length")
|
||||
ctx = _safe_ctx(model, metadata[model])
|
||||
if ctx is not None:
|
||||
return ctx, "openrouter"
|
||||
|
||||
normalized = _normalize_model_version(model).lower()
|
||||
|
||||
for or_id, entry in metadata.items():
|
||||
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
|
||||
if bare.lower() == model.lower() or _normalize_model_version(bare).lower() == normalized:
|
||||
return entry.get("context_length")
|
||||
ctx = _safe_ctx(or_id, entry)
|
||||
if ctx is not None:
|
||||
return ctx, "openrouter"
|
||||
|
||||
# Partial prefix match for cases like gemini-3-flash → gemini-3-flash-preview
|
||||
# Require match to be at a word boundary (followed by -, :, or end of string)
|
||||
model_lower = model.lower()
|
||||
for or_id, entry in metadata.items():
|
||||
bare = or_id.split("/", 1)[1] if "/" in or_id else or_id
|
||||
|
|
@ -1358,9 +1397,11 @@ def _resolve_nous_context_length(model: str) -> Optional[int]:
|
|||
if candidate.startswith(query) and (
|
||||
len(candidate) == len(query) or candidate[len(query)] in "-:."
|
||||
):
|
||||
return entry.get("context_length")
|
||||
ctx = _safe_ctx(or_id, entry)
|
||||
if ctx is not None:
|
||||
return ctx, "openrouter"
|
||||
|
||||
return None
|
||||
return None, ""
|
||||
|
||||
|
||||
def get_model_context_length(
|
||||
|
|
@ -1375,14 +1416,18 @@ def get_model_context_length(
|
|||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1. Persistent cache (previously discovered via probing). Nous URLs
|
||||
bypass the cache here so step 5b can always reconcile against
|
||||
the authoritative portal /v1/models response.
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
5. Provider-aware lookups (before generic OpenRouter cache):
|
||||
a. Copilot live /models API
|
||||
b. Nous suffix-match via OpenRouter cache
|
||||
b. Nous: live /v1/models probe first (authoritative), then OR
|
||||
cache fallback with suffix/version normalisation. Only
|
||||
portal-derived values are persisted to disk.
|
||||
c. Codex OAuth /models probe
|
||||
d. GMI /models endpoint
|
||||
e. Ollama native /api/show probe (any base_url, provider-agnostic)
|
||||
|
|
@ -1437,6 +1482,28 @@ def get_model_context_length(
|
|||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
# Invalidate stale 32k cache entries for Kimi-family models.
|
||||
elif cached <= 32768 and _model_name_suggests_kimi(model):
|
||||
logger.info(
|
||||
"Dropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); "
|
||||
"re-resolving via hardcoded defaults",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
# Nous Portal: the portal /v1/models endpoint is authoritative.
|
||||
# Bypass the persistent cache so step 5b can always reconcile
|
||||
# against it — this corrects pre-fix entries seeded from the
|
||||
# OR catalog (the same OR underreport class that the Kimi/Qwen
|
||||
# DEFAULT_CONTEXT_LENGTHS overrides exist to mitigate) without
|
||||
# touching the on-disk file when the portal is unreachable.
|
||||
# The in-memory 300s endpoint metadata cache makes the per-call
|
||||
# cost amortise to ~0 within a process.
|
||||
elif _infer_provider_from_url(base_url) == "nous":
|
||||
logger.debug(
|
||||
"Bypassing persistent cache for %s@%s (Nous portal authoritative)",
|
||||
model, base_url,
|
||||
)
|
||||
# Fall through; step 5b reconciles and overwrites if portal responds.
|
||||
else:
|
||||
return cached
|
||||
|
||||
|
|
@ -1528,8 +1595,18 @@ def get_model_context_length(
|
|||
pass # Fall through to models.dev
|
||||
|
||||
if effective_provider == "nous":
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
ctx, source = _resolve_nous_context_length(
|
||||
model, base_url=base_url or "", api_key=api_key or ""
|
||||
)
|
||||
if ctx:
|
||||
# Persist ONLY portal-derived values. Caching an OR-fallback
|
||||
# value here would freeze in a wrong number on the first portal
|
||||
# blip / auth glitch and step-1 would short-circuit it forever.
|
||||
# OR's catalog is community-maintained and is precisely why the
|
||||
# Kimi/Qwen DEFAULT_CONTEXT_LENGTHS overrides exist — we don't
|
||||
# want it leaking into the persistent cache for Nous URLs.
|
||||
if base_url and source == "portal":
|
||||
save_context_length(model, base_url, ctx)
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
|
|
@ -1575,14 +1652,6 @@ def get_model_context_length(
|
|||
if model in metadata:
|
||||
or_ctx = metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
# Guard against stale OpenRouter metadata for Kimi-family models.
|
||||
# OpenRouter reports 32768 for moonshotai/kimi-k2.6, but the model
|
||||
# actually supports 262144 (models.dev + official Kimi docs agree).
|
||||
# Providers that host their own Kimi endpoints (Ollama Cloud, Kimi
|
||||
# Coding, Moonshot) would otherwise trip the 64k minimum-context
|
||||
# guard and reject a perfectly capable model.
|
||||
# The filter is narrow: only reject exactly 32768 for Kimi-named
|
||||
# models. If OpenRouter ever updates its data, the stale path
|
||||
# becomes dead code with no impact.
|
||||
if or_ctx == 32768 and _model_name_suggests_kimi(model):
|
||||
logger.info(
|
||||
"Rejecting OpenRouter metadata context=%s for %r "
|
||||
|
|
|
|||
|
|
@ -370,6 +370,17 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
|||
source_url="https://api-docs.deepseek.com/quick_start/pricing",
|
||||
pricing_version="deepseek-pricing-2026-03-16",
|
||||
),
|
||||
(
|
||||
"deepseek",
|
||||
"deepseek-v4-pro",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("1.74"),
|
||||
output_cost_per_million=Decimal("3.48"),
|
||||
cache_read_cost_per_million=Decimal("0.0145"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://api-docs.deepseek.com/quick_start/pricing",
|
||||
pricing_version="deepseek-pricing-2026-05-12",
|
||||
),
|
||||
# Google Gemini
|
||||
(
|
||||
"google",
|
||||
|
|
|
|||
|
|
@ -473,7 +473,7 @@ export default function App() {
|
|||
>
|
||||
<div
|
||||
className={cn(
|
||||
"flex h-14 shrink-0 items-center justify-between gap-2",
|
||||
"flex h-14 shrink-0 items-center justify-between gap-2 px-4",
|
||||
"border-b border-current/20",
|
||||
)}
|
||||
>
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import {
|
|||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { OAuthLoginModal } from "@/components/OAuthLoginModal";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
||||
|
|
@ -55,6 +56,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
|
|||
const [loading, setLoading] = useState(true);
|
||||
const [busyId, setBusyId] = useState<string | null>(null);
|
||||
const [loginFor, setLoginFor] = useState<OAuthProvider | null>(null);
|
||||
const [disconnectTarget, setDisconnectTarget] =
|
||||
useState<OAuthProvider | null>(null);
|
||||
const { t } = useI18n();
|
||||
|
||||
const onErrorRef = useRef(onError);
|
||||
|
|
@ -74,10 +77,8 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
|
|||
}, [refresh]);
|
||||
|
||||
const handleDisconnect = async (provider: OAuthProvider) => {
|
||||
if (!confirm(`${t.oauth.disconnect} ${provider.name}?`)) {
|
||||
return;
|
||||
}
|
||||
setBusyId(provider.id);
|
||||
setDisconnectTarget(null);
|
||||
try {
|
||||
await api.disconnectOAuthProvider(provider.id);
|
||||
onSuccess?.(`${provider.name} ${t.oauth.disconnect.toLowerCase()}ed`);
|
||||
|
|
@ -236,7 +237,7 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
|
|||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => handleDisconnect(p)}
|
||||
onClick={() => setDisconnectTarget(p)}
|
||||
disabled={isBusy}
|
||||
prefix={isBusy ? <Spinner /> : <LogOut />}
|
||||
>
|
||||
|
|
@ -266,6 +267,17 @@ export function OAuthProvidersCard({ onError, onSuccess }: Props) {
|
|||
onError={(msg) => onError?.(msg)}
|
||||
/>
|
||||
)}
|
||||
<ConfirmDialog
|
||||
open={disconnectTarget !== null}
|
||||
onCancel={() => setDisconnectTarget(null)}
|
||||
onConfirm={() => {
|
||||
if (disconnectTarget) void handleDisconnect(disconnectTarget);
|
||||
}}
|
||||
title={`${t.oauth.disconnect} ${disconnectTarget?.name ?? ""}?`}
|
||||
description={`This will remove the stored OAuth tokens for ${disconnectTarget?.name ?? "this provider"}. You will need to re-authenticate to use it again.`}
|
||||
destructive
|
||||
confirmLabel={t.oauth.disconnect}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
61
apps/dashboard/src/components/ui/checkbox.tsx
Normal file
61
apps/dashboard/src/components/ui/checkbox.tsx
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
import { cn } from "@/lib/utils";
|
||||
import { Check } from "lucide-react";
|
||||
|
||||
interface CheckboxProps
|
||||
extends Omit<React.InputHTMLAttributes<HTMLInputElement>, "type"> {
|
||||
label?: React.ReactNode;
|
||||
}
|
||||
|
||||
export function Checkbox({
|
||||
className,
|
||||
label,
|
||||
id,
|
||||
checked,
|
||||
defaultChecked,
|
||||
...props
|
||||
}: CheckboxProps) {
|
||||
// Support both controlled (checked prop) and uncontrolled (defaultChecked) usage.
|
||||
// For visual rendering, prefer `checked` if provided; otherwise fall back to defaultChecked.
|
||||
const isChecked = checked ?? defaultChecked ?? false;
|
||||
|
||||
return (
|
||||
<label
|
||||
htmlFor={id}
|
||||
className={cn(
|
||||
"group flex items-center gap-2.5 cursor-pointer select-none",
|
||||
props.disabled && "cursor-not-allowed opacity-50",
|
||||
)}
|
||||
>
|
||||
<span
|
||||
className={cn(
|
||||
"flex h-4 w-4 shrink-0 items-center justify-center transition-all",
|
||||
"border bg-background/40",
|
||||
// Focus-visible ring for keyboard accessibility
|
||||
"group-has-[:focus-visible]:ring-2 group-has-[:focus-visible]:ring-ring group-has-[:focus-visible]:ring-offset-1",
|
||||
isChecked
|
||||
? "border-foreground bg-foreground/20"
|
||||
: "border-border group-hover:border-foreground/40",
|
||||
className,
|
||||
)}
|
||||
>
|
||||
<Check
|
||||
className={cn(
|
||||
"h-3 w-3 transition-opacity",
|
||||
isChecked
|
||||
? "text-foreground opacity-100"
|
||||
: "text-foreground opacity-0",
|
||||
)}
|
||||
/>
|
||||
</span>
|
||||
<input
|
||||
type="checkbox"
|
||||
id={id}
|
||||
checked={checked}
|
||||
defaultChecked={checked === undefined ? defaultChecked : undefined}
|
||||
className="sr-only"
|
||||
{...props}
|
||||
/>
|
||||
{label && <span className="text-sm">{label}</span>}
|
||||
</label>
|
||||
);
|
||||
}
|
||||
44
apps/dashboard/src/hooks/useModalBehavior.ts
Normal file
44
apps/dashboard/src/hooks/useModalBehavior.ts
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import { useEffect, useRef } from "react";
|
||||
|
||||
/**
|
||||
* Hook that adds standard modal behaviors when `open` is true:
|
||||
* - Escape key calls `onClose`
|
||||
* - Body scroll is locked
|
||||
* - Focus is restored to the previously focused element on close
|
||||
*
|
||||
* Returns a ref to attach to the modal container (for optional future focus trapping).
|
||||
*/
|
||||
export function useModalBehavior({
|
||||
open,
|
||||
onClose,
|
||||
}: {
|
||||
open: boolean;
|
||||
onClose: () => void;
|
||||
}) {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
||||
const prevActive = document.activeElement as HTMLElement | null;
|
||||
|
||||
const onKey = (e: KeyboardEvent) => {
|
||||
if (e.key === "Escape") {
|
||||
e.preventDefault();
|
||||
onClose();
|
||||
}
|
||||
};
|
||||
|
||||
document.addEventListener("keydown", onKey);
|
||||
const prevOverflow = document.body.style.overflow;
|
||||
document.body.style.overflow = "hidden";
|
||||
|
||||
return () => {
|
||||
document.removeEventListener("keydown", onKey);
|
||||
document.body.style.overflow = prevOverflow;
|
||||
prevActive?.focus?.();
|
||||
};
|
||||
}, [open, onClose]);
|
||||
|
||||
return containerRef;
|
||||
}
|
||||
|
|
@ -75,7 +75,7 @@ export const en: Translations = {
|
|||
keys: "Keys",
|
||||
logs: "Logs",
|
||||
models: "Models",
|
||||
profiles: "profiles : multi agents",
|
||||
profiles: "Profiles",
|
||||
plugins: "Plugins",
|
||||
sessions: "Sessions",
|
||||
skills: "Skills",
|
||||
|
|
|
|||
|
|
@ -4,10 +4,12 @@ const BUILTIN: Record<string, keyof Translations["app"]["nav"]> = {
|
|||
"/chat": "chat",
|
||||
"/sessions": "sessions",
|
||||
"/analytics": "analytics",
|
||||
"/models": "models",
|
||||
"/logs": "logs",
|
||||
"/cron": "cron",
|
||||
"/skills": "skills",
|
||||
"/plugins": "plugins",
|
||||
"/profiles": "profiles",
|
||||
"/config": "config",
|
||||
"/env": "keys",
|
||||
"/docs": "documentation",
|
||||
|
|
@ -30,5 +32,10 @@ export function resolvePageTitle(
|
|||
if (key) {
|
||||
return t.app.nav[key];
|
||||
}
|
||||
// Derive title from pathname: "/profiles" → "Profiles"
|
||||
const segment = normalized.slice(1);
|
||||
if (segment) {
|
||||
return segment.charAt(0).toUpperCase() + segment.slice(1);
|
||||
}
|
||||
return t.app.webUi;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import { Button } from "@nous-research/ui/ui/components/button";
|
|||
import { ListItem } from "@nous-research/ui/ui/components/list-item";
|
||||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { useI18n } from "@/i18n";
|
||||
|
|
@ -117,7 +118,9 @@ export default function ConfigPage() {
|
|||
const [yamlText, setYamlText] = useState("");
|
||||
const [yamlLoading, setYamlLoading] = useState(false);
|
||||
const [yamlSaving, setYamlSaving] = useState(false);
|
||||
const [configPath, setConfigPath] = useState<string | null>(null);
|
||||
const [activeCategory, setActiveCategory] = useState<string>("");
|
||||
const [confirmReset, setConfirmReset] = useState(false);
|
||||
const { toast, showToast } = useToast();
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
const { t } = useI18n();
|
||||
|
|
@ -175,6 +178,10 @@ export default function ConfigPage() {
|
|||
.getDefaults()
|
||||
.then(setDefaults)
|
||||
.catch(() => {});
|
||||
api
|
||||
.getStatus()
|
||||
.then((resp) => setConfigPath(resp.config_path))
|
||||
.catch(() => {});
|
||||
}, []);
|
||||
|
||||
// Set active category when categories load
|
||||
|
|
@ -290,11 +297,17 @@ export default function ConfigPage() {
|
|||
// "reset this tab", not "wipe my entire config.yaml".
|
||||
const scopedFields = isSearching ? searchMatchedFields : activeFields;
|
||||
if (scopedFields.length === 0) return;
|
||||
setConfirmReset(true);
|
||||
};
|
||||
|
||||
const executeReset = () => {
|
||||
if (!defaults || !config) return;
|
||||
setConfirmReset(false);
|
||||
const scopedFields = isSearching ? searchMatchedFields : activeFields;
|
||||
if (scopedFields.length === 0) return;
|
||||
const scopeLabel = isSearching
|
||||
? t.config.searchResults
|
||||
: prettyCategoryName(activeCategory);
|
||||
const message = t.config.confirmResetScope.replace("{scope}", scopeLabel);
|
||||
if (!window.confirm(message)) return;
|
||||
let next: Record<string, unknown> = config;
|
||||
for (const [key] of scopedFields) {
|
||||
next = setNestedValue(next, key, getNestedValue(defaults, key));
|
||||
|
|
@ -408,7 +421,7 @@ export default function ConfigPage() {
|
|||
<div className="flex items-center gap-2">
|
||||
<Settings2 className="h-4 w-4 text-muted-foreground" />
|
||||
<code className="text-xs text-muted-foreground bg-muted/50 px-2 py-0.5">
|
||||
{t.config.configPath}
|
||||
{configPath ?? t.config.configPath}
|
||||
</code>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
|
|
@ -627,6 +640,22 @@ export default function ConfigPage() {
|
|||
</div>
|
||||
)}
|
||||
<PluginSlot name="config:bottom" />
|
||||
<ConfirmDialog
|
||||
open={confirmReset}
|
||||
onCancel={() => setConfirmReset(false)}
|
||||
onConfirm={executeReset}
|
||||
title={t.config.confirmResetScope.replace(
|
||||
"{scope}",
|
||||
isSearching
|
||||
? t.config.searchResults
|
||||
: prettyCategoryName(activeCategory),
|
||||
)}
|
||||
description={`This will reset ${
|
||||
(isSearching ? searchMatchedFields : activeFields).length
|
||||
} field(s) to their default values.`}
|
||||
destructive
|
||||
confirmLabel={t.config.resetDefaults}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { useCallback, useEffect, useState } from "react";
|
||||
import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react";
|
||||
import { useCallback, useEffect, useLayoutEffect, useState } from "react";
|
||||
import { Clock, Pause, Play, Plus, Trash2, X, Zap } from "lucide-react";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Select, SelectOption } from "@nous-research/ui/ui/components/select";
|
||||
|
|
@ -10,11 +10,13 @@ import type { CronJob } from "@/lib/api";
|
|||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
function formatTime(iso?: string | null): string {
|
||||
|
|
@ -80,11 +82,18 @@ export default function CronPage() {
|
|||
const [loading, setLoading] = useState(true);
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
const { setEnd } = usePageHeader();
|
||||
|
||||
// New job form state
|
||||
// New job modal state
|
||||
const [createModalOpen, setCreateModalOpen] = useState(false);
|
||||
const [prompt, setPrompt] = useState("");
|
||||
const [schedule, setSchedule] = useState("");
|
||||
const [name, setName] = useState("");
|
||||
const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
|
||||
const createModalRef = useModalBehavior({
|
||||
open: createModalOpen,
|
||||
onClose: closeCreateModal,
|
||||
});
|
||||
const [deliver, setDeliver] = useState("local");
|
||||
const [creating, setCreating] = useState(false);
|
||||
|
||||
|
|
@ -118,6 +127,7 @@ export default function CronPage() {
|
|||
setSchedule("");
|
||||
setName("");
|
||||
setDeliver("local");
|
||||
setCreateModalOpen(false);
|
||||
loadJobs();
|
||||
} catch (e) {
|
||||
showToast(`${t.config.failedToSave}: ${e}`, "error");
|
||||
|
|
@ -181,6 +191,22 @@ export default function CronPage() {
|
|||
),
|
||||
});
|
||||
|
||||
// Put "Create" button in page header
|
||||
useLayoutEffect(() => {
|
||||
setEnd(
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => setCreateModalOpen(true)}
|
||||
>
|
||||
<Plus className="h-3 w-3" />
|
||||
{t.common.create}
|
||||
</Button>,
|
||||
);
|
||||
return () => {
|
||||
setEnd(null);
|
||||
};
|
||||
}, [setEnd, t.common.create, loading]);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
|
|
@ -213,86 +239,110 @@ export default function CronPage() {
|
|||
loading={jobDelete.isDeleting}
|
||||
/>
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2 text-base">
|
||||
<Plus className="h-4 w-4" />
|
||||
{t.cron.newJob}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="grid gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
|
||||
<Input
|
||||
id="cron-name"
|
||||
placeholder={t.cron.namePlaceholder}
|
||||
value={name}
|
||||
onChange={(e) => setName(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
{/* Create job modal */}
|
||||
{createModalOpen && (
|
||||
<div
|
||||
ref={createModalRef}
|
||||
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
|
||||
onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="create-cron-title"
|
||||
>
|
||||
<div className="relative w-full max-w-lg border border-border bg-card shadow-2xl flex flex-col">
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
onClick={() => setCreateModalOpen(false)}
|
||||
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
|
||||
aria-label="Close"
|
||||
>
|
||||
<X />
|
||||
</Button>
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
|
||||
<textarea
|
||||
id="cron-prompt"
|
||||
className="flex min-h-[80px] w-full border border-input bg-transparent px-3 py-2 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring"
|
||||
placeholder={t.cron.promptPlaceholder}
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
<header className="p-5 pb-3 border-b border-border">
|
||||
<h2
|
||||
id="create-cron-title"
|
||||
className="font-display text-base tracking-wider uppercase"
|
||||
>
|
||||
{t.cron.newJob}
|
||||
</h2>
|
||||
</header>
|
||||
|
||||
<div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
||||
<div className="p-5 grid gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
|
||||
<Label htmlFor="cron-name">{t.cron.nameOptional}</Label>
|
||||
<Input
|
||||
id="cron-schedule"
|
||||
placeholder={t.cron.schedulePlaceholder}
|
||||
value={schedule}
|
||||
onChange={(e) => setSchedule(e.target.value)}
|
||||
id="cron-name"
|
||||
autoFocus
|
||||
placeholder={t.cron.namePlaceholder}
|
||||
value={name}
|
||||
onChange={(e) => setName(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
|
||||
<Select
|
||||
id="cron-deliver"
|
||||
value={deliver}
|
||||
onValueChange={(v) => setDeliver(v)}
|
||||
>
|
||||
<SelectOption value="local">
|
||||
{t.cron.delivery.local}
|
||||
</SelectOption>
|
||||
<SelectOption value="telegram">
|
||||
{t.cron.delivery.telegram}
|
||||
</SelectOption>
|
||||
<SelectOption value="discord">
|
||||
{t.cron.delivery.discord}
|
||||
</SelectOption>
|
||||
<SelectOption value="slack">
|
||||
{t.cron.delivery.slack}
|
||||
</SelectOption>
|
||||
<SelectOption value="email">
|
||||
{t.cron.delivery.email}
|
||||
</SelectOption>
|
||||
</Select>
|
||||
<Label htmlFor="cron-prompt">{t.cron.prompt}</Label>
|
||||
<textarea
|
||||
id="cron-prompt"
|
||||
className="flex min-h-[80px] w-full border border-border bg-background/40 px-3 py-2 text-sm font-courier shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-foreground/30 focus-visible:border-foreground/25"
|
||||
placeholder={t.cron.promptPlaceholder}
|
||||
value={prompt}
|
||||
onChange={(e) => setPrompt(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex items-end">
|
||||
<div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-schedule">{t.cron.schedule}</Label>
|
||||
<Input
|
||||
id="cron-schedule"
|
||||
placeholder={t.cron.schedulePlaceholder}
|
||||
value={schedule}
|
||||
onChange={(e) => setSchedule(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="cron-deliver">{t.cron.deliverTo}</Label>
|
||||
<Select
|
||||
id="cron-deliver"
|
||||
value={deliver}
|
||||
onValueChange={(v) => setDeliver(v)}
|
||||
>
|
||||
<SelectOption value="local">
|
||||
{t.cron.delivery.local}
|
||||
</SelectOption>
|
||||
<SelectOption value="telegram">
|
||||
{t.cron.delivery.telegram}
|
||||
</SelectOption>
|
||||
<SelectOption value="discord">
|
||||
{t.cron.delivery.discord}
|
||||
</SelectOption>
|
||||
<SelectOption value="slack">
|
||||
{t.cron.delivery.slack}
|
||||
</SelectOption>
|
||||
<SelectOption value="email">
|
||||
{t.cron.delivery.email}
|
||||
</SelectOption>
|
||||
</Select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-end">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={handleCreate}
|
||||
disabled={creating}
|
||||
prefix={<Plus />}
|
||||
className="w-full"
|
||||
prefix={creating ? <Spinner /> : <Plus />}
|
||||
>
|
||||
{creating ? t.common.creating : t.common.create}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex flex-col gap-3">
|
||||
<H2
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||
import { useCallback, useEffect, useLayoutEffect, useMemo, useState } from "react";
|
||||
import {
|
||||
Eye,
|
||||
EyeOff,
|
||||
|
|
@ -35,6 +35,7 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
|
|||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
|
@ -132,7 +133,7 @@ function EnvVarRow({
|
|||
// Compact inline row for unset, non-editing keys (used inside provider groups)
|
||||
if (compact && !info.is_set && !isEditing) {
|
||||
return (
|
||||
<div className="flex items-center justify-between gap-3 py-1.5 opacity-50 hover:opacity-100 transition-opacity">
|
||||
<div className="flex items-center justify-between gap-3 py-1.5 min-w-0 overflow-hidden opacity-50 hover:opacity-100 transition-opacity">
|
||||
<div className="flex items-center gap-2 min-w-0">
|
||||
<span className="font-mono-ui text-[0.7rem] text-muted-foreground">
|
||||
{varKey}
|
||||
|
|
@ -168,7 +169,7 @@ function EnvVarRow({
|
|||
// Non-compact unset row
|
||||
if (!info.is_set && !isEditing) {
|
||||
return (
|
||||
<div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 opacity-60 hover:opacity-100 transition-opacity">
|
||||
<div className="flex items-center justify-between gap-3 border border-border/50 px-4 py-2.5 min-w-0 overflow-hidden opacity-60 hover:opacity-100 transition-opacity">
|
||||
<div className="flex items-center gap-3 min-w-0">
|
||||
<Label className="font-mono-ui text-[0.7rem] text-muted-foreground">
|
||||
{varKey}
|
||||
|
|
@ -203,7 +204,7 @@ function EnvVarRow({
|
|||
|
||||
// Full expanded row for set keys or keys being edited
|
||||
return (
|
||||
<div className="grid gap-2 border border-border p-4">
|
||||
<div className="grid gap-2 border border-border p-4 min-w-0 overflow-hidden">
|
||||
<div className="flex items-center justify-between gap-2 flex-wrap">
|
||||
<div className="flex items-center gap-2">
|
||||
<Label className="font-mono-ui text-[0.7rem]">{varKey}</Label>
|
||||
|
|
@ -493,6 +494,7 @@ export default function EnvPage() {
|
|||
const [showAdvanced, setShowAdvanced] = useState(true); // Show all providers by default
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
const { setAfterTitle } = usePageHeader();
|
||||
|
||||
useEffect(() => {
|
||||
api
|
||||
|
|
@ -501,6 +503,58 @@ export default function EnvPage() {
|
|||
.catch(() => {});
|
||||
}, []);
|
||||
|
||||
// Scroll-to sub-nav in the page header
|
||||
const sections = useMemo(() => {
|
||||
const items: { id: string; label: string }[] = [
|
||||
{ id: "section-oauth", label: "OAuth" },
|
||||
{ id: "section-providers", label: "Providers" },
|
||||
];
|
||||
if (vars) {
|
||||
const categories = ["tool", "messaging", "setting"];
|
||||
const CATEGORY_LABELS: Record<string, string> = {
|
||||
tool: "Tools",
|
||||
messaging: "Messaging",
|
||||
setting: "Settings",
|
||||
};
|
||||
for (const cat of categories) {
|
||||
const hasEntries = Object.values(vars).some(
|
||||
(info) => info.category === cat,
|
||||
);
|
||||
if (hasEntries) {
|
||||
items.push({ id: `section-${cat}`, label: CATEGORY_LABELS[cat] ?? cat });
|
||||
}
|
||||
}
|
||||
}
|
||||
return items;
|
||||
}, [vars]);
|
||||
|
||||
useLayoutEffect(() => {
|
||||
if (!vars) {
|
||||
setAfterTitle(null);
|
||||
return;
|
||||
}
|
||||
const scrollTo = (id: string) => {
|
||||
document.getElementById(id)?.scrollIntoView({ behavior: "smooth", block: "start" });
|
||||
};
|
||||
setAfterTitle(
|
||||
<nav className="flex items-center gap-1" aria-label="Jump to section">
|
||||
{sections.map((s) => (
|
||||
<button
|
||||
key={s.id}
|
||||
type="button"
|
||||
onClick={() => scrollTo(s.id)}
|
||||
className="cursor-pointer px-2 py-0.5 text-[10px] uppercase tracking-wider text-muted-foreground hover:text-foreground border border-border/50 hover:border-foreground/30 transition-colors"
|
||||
>
|
||||
{s.label}
|
||||
</button>
|
||||
))}
|
||||
</nav>,
|
||||
);
|
||||
return () => {
|
||||
setAfterTitle(null);
|
||||
};
|
||||
}, [vars, sections, setAfterTitle]);
|
||||
|
||||
const handleSave = async (key: string) => {
|
||||
const value = edits[key];
|
||||
if (!value) return;
|
||||
|
|
@ -701,12 +755,14 @@ export default function EnvPage() {
|
|||
</Button>
|
||||
</div>
|
||||
|
||||
<OAuthProvidersCard
|
||||
onError={(msg) => showToast(msg, "error")}
|
||||
onSuccess={(msg) => showToast(msg, "success")}
|
||||
/>
|
||||
<div id="section-oauth">
|
||||
<OAuthProvidersCard
|
||||
onError={(msg) => showToast(msg, "error")}
|
||||
onSuccess={(msg) => showToast(msg, "success")}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<Card>
|
||||
<Card id="section-providers">
|
||||
<CardHeader className="border-b border-border bg-card">
|
||||
<div className="flex items-center gap-2">
|
||||
<Zap className="h-5 w-5 text-muted-foreground" />
|
||||
|
|
@ -750,7 +806,7 @@ export default function EnvPage() {
|
|||
if (totalEntries === 0) return null;
|
||||
|
||||
return (
|
||||
<Card key={category}>
|
||||
<Card key={category} id={`section-${category}`}>
|
||||
<CardHeader className="border-b border-border bg-card">
|
||||
<div className="flex items-center gap-2">
|
||||
<Icon className="h-5 w-5 text-muted-foreground" />
|
||||
|
|
@ -762,7 +818,7 @@ export default function EnvPage() {
|
|||
</CardDescription>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="grid gap-3 pt-4">
|
||||
<CardContent className="grid gap-3 pt-4 overflow-hidden">
|
||||
{setEntries.map(([key, info]) => (
|
||||
<EnvVarRow
|
||||
key={key}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import {
|
|||
Settings2,
|
||||
Star,
|
||||
Wrench,
|
||||
X,
|
||||
Zap,
|
||||
} from "lucide-react";
|
||||
import { api } from "@/lib/api";
|
||||
|
|
@ -25,6 +26,8 @@ import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
|||
import { Stats } from "@nous-research/ui/ui/components/stats";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { PluginSlot } from "@/plugins";
|
||||
|
|
@ -91,27 +94,39 @@ function TokenBar({
|
|||
if (total === 0) return null;
|
||||
|
||||
const segments = [
|
||||
{ value: cacheRead, color: "bg-blue-400/60", label: "Cache Read" },
|
||||
{ value: reasoning, color: "bg-purple-400/60", label: "Reasoning" },
|
||||
{ value: input, color: "bg-[#ffe6cb]/70", label: "Input" },
|
||||
{ value: output, color: "bg-emerald-500/70", label: "Output" },
|
||||
{ value: cacheRead, color: "bg-blue-400/60", dotColor: "bg-blue-400", label: "Cache Read" },
|
||||
{ value: reasoning, color: "bg-purple-400/60", dotColor: "bg-purple-400", label: "Reasoning" },
|
||||
{ value: input, color: "bg-[#ffe6cb]/70", dotColor: "bg-[#ffe6cb]", label: "Input" },
|
||||
{ value: output, color: "bg-emerald-500/70", dotColor: "bg-emerald-500", label: "Output" },
|
||||
].filter((s) => s.value > 0);
|
||||
|
||||
return (
|
||||
<div className="space-y-1">
|
||||
<div className="flex h-2 w-full overflow-hidden rounded-sm bg-muted/30">
|
||||
<div className="space-y-1.5">
|
||||
{/* Stacked bar — segments fill proportionally to their share of total */}
|
||||
<div className="relative flex min-h-[1.5rem] w-full items-stretch overflow-hidden">
|
||||
{segments.map((s, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className={`${s.color} transition-all duration-300`}
|
||||
className={`${s.color} relative flex items-center transition-all duration-300`}
|
||||
style={{ width: `${(s.value / total) * 100}%` }}
|
||||
/>
|
||||
>
|
||||
{/* Stepped fill pattern overlay */}
|
||||
<div
|
||||
className="absolute inset-0 opacity-30"
|
||||
style={{
|
||||
backgroundImage:
|
||||
"repeating-linear-gradient(to right, transparent 0 0.4rem, currentColor 0.4rem calc(0.4rem + 1px))",
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Legend */}
|
||||
<div className="flex flex-wrap gap-x-3 gap-y-0.5 text-[10px] text-muted-foreground">
|
||||
{segments.map((s, i) => (
|
||||
<span key={i} className="flex items-center gap-1">
|
||||
<span className={`inline-block h-1.5 w-1.5 rounded-full ${s.color}`} />
|
||||
<span className={`inline-block h-1.5 w-1.5 rounded-full ${s.dotColor}`} />
|
||||
{s.label} {formatTokens(s.value)}
|
||||
</span>
|
||||
))}
|
||||
|
|
@ -378,7 +393,7 @@ function ModelCard({
|
|||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-3 pt-0">
|
||||
<CardContent className="space-y-3 pt-3">
|
||||
<TokenBar
|
||||
input={entry.input_tokens}
|
||||
output={entry.output_tokens}
|
||||
|
|
@ -445,6 +460,157 @@ type PickerTarget =
|
|||
| { kind: "main" }
|
||||
| { kind: "aux"; task: string };
|
||||
|
||||
function AuxiliaryTasksModal({
|
||||
aux,
|
||||
refreshKey,
|
||||
onSaved,
|
||||
onClose,
|
||||
}: {
|
||||
aux: AuxiliaryModelsResponse | null;
|
||||
refreshKey: number;
|
||||
onSaved(): void;
|
||||
onClose(): void;
|
||||
}) {
|
||||
const [picker, setPicker] = useState<PickerTarget | null>(null);
|
||||
const [resetBusy, setResetBusy] = useState(false);
|
||||
const [confirmReset, setConfirmReset] = useState(false);
|
||||
const modalRef = useModalBehavior({ open: true, onClose });
|
||||
|
||||
const resetAllAux = async () => {
|
||||
setConfirmReset(false);
|
||||
setResetBusy(true);
|
||||
try {
|
||||
await api.setModelAssignment({
|
||||
scope: "auxiliary",
|
||||
task: "__reset__",
|
||||
provider: "",
|
||||
model: "",
|
||||
});
|
||||
onSaved();
|
||||
} finally {
|
||||
setResetBusy(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={modalRef}
|
||||
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
|
||||
onClick={(e) => e.target === e.currentTarget && onClose()}
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="aux-modal-title"
|
||||
>
|
||||
<div className="relative w-full max-w-2xl max-h-[80vh] border border-border bg-card shadow-2xl flex flex-col">
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
onClick={onClose}
|
||||
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
|
||||
aria-label="Close"
|
||||
>
|
||||
<X />
|
||||
</Button>
|
||||
|
||||
<header className="p-5 pb-3 border-b border-border">
|
||||
<div className="flex items-center justify-between gap-3 pr-8">
|
||||
<h2
|
||||
id="aux-modal-title"
|
||||
className="font-display text-base tracking-wider uppercase"
|
||||
>
|
||||
Auxiliary Tasks
|
||||
</h2>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setConfirmReset(true)}
|
||||
disabled={resetBusy}
|
||||
className="text-[10px] h-6"
|
||||
prefix={resetBusy ? <Spinner /> : null}
|
||||
>
|
||||
Reset all to auto
|
||||
</Button>
|
||||
</div>
|
||||
<p className="text-[10px] text-muted-foreground/80 mt-2">
|
||||
Auxiliary tasks handle side-jobs like vision, session search, and
|
||||
compression. <span className="font-mono">auto</span> means
|
||||
"use the main model". Override per-task when you want a
|
||||
cheap/fast model for a specific job.
|
||||
</p>
|
||||
</header>
|
||||
|
||||
<div className="flex-1 overflow-y-auto p-5 space-y-1">
|
||||
{AUX_TASKS.map((t) => {
|
||||
const cur = aux?.tasks.find((a) => a.task === t.key);
|
||||
const isAuto =
|
||||
!cur || cur.provider === "auto" || !cur.provider;
|
||||
return (
|
||||
<div
|
||||
key={t.key}
|
||||
className="flex items-center justify-between gap-3 px-3 py-2 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
|
||||
>
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-baseline gap-2">
|
||||
<span className="text-xs font-medium">{t.label}</span>
|
||||
<span className="text-[10px] text-muted-foreground/60">
|
||||
{t.hint}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-[10px] font-mono text-muted-foreground truncate">
|
||||
{isAuto
|
||||
? "auto (use main model)"
|
||||
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setPicker({ kind: "aux", task: t.key })}
|
||||
className="text-[10px] h-6"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{picker && picker.kind === "aux" && (
|
||||
<ModelPickerDialog
|
||||
key={`picker-${refreshKey}`}
|
||||
loader={api.getModelOptions}
|
||||
alwaysGlobal
|
||||
title={`Set Auxiliary: ${
|
||||
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
|
||||
picker.task
|
||||
}`}
|
||||
onApply={async ({ provider, model }) => {
|
||||
await api.setModelAssignment({
|
||||
scope: "auxiliary",
|
||||
task: picker.task,
|
||||
provider,
|
||||
model,
|
||||
});
|
||||
onSaved();
|
||||
}}
|
||||
onClose={() => setPicker(null)}
|
||||
/>
|
||||
)}
|
||||
<ConfirmDialog
|
||||
open={confirmReset}
|
||||
onCancel={() => setConfirmReset(false)}
|
||||
onConfirm={() => void resetAllAux()}
|
||||
title="Reset auxiliary models"
|
||||
description="Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set."
|
||||
destructive
|
||||
confirmLabel="Reset all"
|
||||
loading={resetBusy}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ModelSettingsPanel({
|
||||
aux,
|
||||
refreshKey,
|
||||
|
|
@ -454,9 +620,8 @@ function ModelSettingsPanel({
|
|||
refreshKey: number;
|
||||
onSaved(): void;
|
||||
}) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const [auxModalOpen, setAuxModalOpen] = useState(false);
|
||||
const [picker, setPicker] = useState<PickerTarget | null>(null);
|
||||
const [resetBusy, setResetBusy] = useState(false);
|
||||
|
||||
const mainProv = aux?.main.provider ?? "";
|
||||
const mainModel = aux?.main.model ?? "";
|
||||
|
|
@ -476,23 +641,10 @@ function ModelSettingsPanel({
|
|||
onSaved();
|
||||
};
|
||||
|
||||
const resetAllAux = async () => {
|
||||
if (!window.confirm("Reset every auxiliary task to 'auto'? This overrides any per-task overrides you've set.")) {
|
||||
return;
|
||||
}
|
||||
setResetBusy(true);
|
||||
try {
|
||||
await api.setModelAssignment({
|
||||
scope: "auxiliary",
|
||||
task: "__reset__",
|
||||
provider: "",
|
||||
model: "",
|
||||
});
|
||||
onSaved();
|
||||
} finally {
|
||||
setResetBusy(false);
|
||||
}
|
||||
};
|
||||
// Count how many aux tasks have overrides
|
||||
const auxOverrideCount = aux?.tasks.filter(
|
||||
(a) => a.provider && a.provider !== "auto",
|
||||
).length ?? 0;
|
||||
|
||||
return (
|
||||
<Card>
|
||||
|
|
@ -505,21 +657,10 @@ function ModelSettingsPanel({
|
|||
applies to new sessions
|
||||
</span>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setExpanded((v) => !v)}
|
||||
className="text-xs"
|
||||
>
|
||||
{expanded ? "Hide auxiliary" : "Show auxiliary"}
|
||||
<ChevronDown
|
||||
className={`h-3 w-3 transition-transform ${expanded ? "rotate-180" : ""}`}
|
||||
/>
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="space-y-3 pt-0">
|
||||
<CardContent className="space-y-3 pt-3">
|
||||
{/* Main row */}
|
||||
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
|
|
@ -544,85 +685,41 @@ function ModelSettingsPanel({
|
|||
</Button>
|
||||
</div>
|
||||
|
||||
{/* Auxiliary rows */}
|
||||
{expanded && (
|
||||
<div className="space-y-1 border-t border-border/50 pt-3">
|
||||
<div className="flex items-center justify-between pb-1">
|
||||
<div className="text-[10px] uppercase tracking-wider text-muted-foreground">
|
||||
{/* Auxiliary tasks summary + open modal */}
|
||||
<div className="flex items-center justify-between gap-3 bg-muted/20 border border-border/50 px-3 py-2">
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-center gap-2 mb-0.5">
|
||||
<Cpu className="h-3 w-3 text-muted-foreground" />
|
||||
<span className="text-xs font-medium uppercase tracking-wider">
|
||||
Auxiliary tasks
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={resetAllAux}
|
||||
disabled={resetBusy}
|
||||
className="text-[10px] h-6"
|
||||
prefix={resetBusy ? <Spinner /> : null}
|
||||
>
|
||||
Reset all to auto
|
||||
</Button>
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-xs font-mono text-muted-foreground truncate">
|
||||
{auxOverrideCount > 0
|
||||
? `${auxOverrideCount} override${auxOverrideCount > 1 ? "s" : ""} · ${AUX_TASKS.length - auxOverrideCount} auto`
|
||||
: `${AUX_TASKS.length} tasks · all auto`}
|
||||
</div>
|
||||
|
||||
<p className="text-[10px] text-muted-foreground/80 pb-2">
|
||||
Auxiliary tasks handle side-jobs like vision, session search, and
|
||||
compression. <span className="font-mono">auto</span> means
|
||||
"use the main model". Override per-task when you want a
|
||||
cheap/fast model for a specific job.
|
||||
</p>
|
||||
|
||||
{AUX_TASKS.map((t) => {
|
||||
const cur = aux?.tasks.find((a) => a.task === t.key);
|
||||
const isAuto =
|
||||
!cur || cur.provider === "auto" || !cur.provider;
|
||||
return (
|
||||
<div
|
||||
key={t.key}
|
||||
className="flex items-center justify-between gap-3 px-3 py-1.5 border border-border/30 bg-card/50 hover:bg-muted/20 transition-colors"
|
||||
>
|
||||
<div className="min-w-0 flex-1">
|
||||
<div className="flex items-baseline gap-2">
|
||||
<span className="text-xs font-medium">{t.label}</span>
|
||||
<span className="text-[10px] text-muted-foreground/60">
|
||||
{t.hint}
|
||||
</span>
|
||||
</div>
|
||||
<div className="text-[10px] font-mono text-muted-foreground truncate">
|
||||
{isAuto
|
||||
? "auto (use main model)"
|
||||
: `${cur?.provider} · ${cur?.model || "(provider default)"}`}
|
||||
</div>
|
||||
</div>
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setPicker({ kind: "aux", task: t.key })}
|
||||
className="text-[10px] h-6"
|
||||
>
|
||||
Change
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
<Button
|
||||
size="sm"
|
||||
outlined
|
||||
onClick={() => setAuxModalOpen(true)}
|
||||
className="text-xs"
|
||||
>
|
||||
Configure
|
||||
</Button>
|
||||
</div>
|
||||
|
||||
{picker && (
|
||||
<ModelPickerDialog
|
||||
key={`picker-${refreshKey}`}
|
||||
loader={api.getModelOptions}
|
||||
alwaysGlobal
|
||||
title={
|
||||
picker.kind === "main"
|
||||
? "Set Main Model"
|
||||
: `Set Auxiliary: ${
|
||||
AUX_TASKS.find((t) => t.key === picker.task)?.label ??
|
||||
picker.task
|
||||
}`
|
||||
}
|
||||
title="Set Main Model"
|
||||
onApply={async ({ provider, model }) => {
|
||||
await applyAssignment({
|
||||
scope: picker.kind === "main" ? "main" : "auxiliary",
|
||||
task: picker.kind === "main" ? "" : picker.task,
|
||||
scope: "main",
|
||||
task: "",
|
||||
provider,
|
||||
model,
|
||||
});
|
||||
|
|
@ -630,6 +727,15 @@ function ModelSettingsPanel({
|
|||
onClose={() => setPicker(null)}
|
||||
/>
|
||||
)}
|
||||
|
||||
{auxModalOpen && (
|
||||
<AuxiliaryTasksModal
|
||||
aux={aux}
|
||||
refreshKey={refreshKey}
|
||||
onSaved={onSaved}
|
||||
onClose={() => setAuxModalOpen(false)}
|
||||
/>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
|
|
@ -725,28 +831,14 @@ export default function ModelsPage() {
|
|||
<div className="flex flex-col gap-6">
|
||||
<PluginSlot name="models:top" />
|
||||
|
||||
<ModelSettingsPanel
|
||||
aux={aux}
|
||||
refreshKey={saveKey}
|
||||
onSaved={onAssigned}
|
||||
/>
|
||||
<div className="grid gap-6 lg:grid-cols-2">
|
||||
<ModelSettingsPanel
|
||||
aux={aux}
|
||||
refreshKey={saveKey}
|
||||
onSaved={onAssigned}
|
||||
/>
|
||||
|
||||
{loading && !data && (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<Spinner className="text-2xl text-primary" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<p className="text-sm text-destructive text-center">{error}</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{data && (
|
||||
<>
|
||||
{data && (
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<Stats
|
||||
|
|
@ -781,7 +873,25 @@ export default function ModelsPage() {
|
|||
/>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{loading && !data && (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
<Spinner className="text-2xl text-primary" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<Card>
|
||||
<CardContent className="py-6">
|
||||
<p className="text-sm text-destructive text-center">{error}</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
)}
|
||||
|
||||
{data && (
|
||||
<>
|
||||
{data.models.length > 0 ? (
|
||||
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-3">
|
||||
{data.models.map((m, i) => (
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { Switch } from "@nous-research/ui/ui/components/switch";
|
|||
import { Spinner } from "@nous-research/ui/ui/components/spinner";
|
||||
import { CommandBlock } from "@nous-research/ui/ui/components/command-block";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { ConfirmDialog } from "@/components/ui/confirm-dialog";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
|
|
@ -393,6 +394,7 @@ function PluginRowCard(props: PluginRowCardProps) {
|
|||
const tabPath = dm?.tab && !dm.tab.hidden ? dm.tab.override ?? dm.tab.path : null;
|
||||
|
||||
const busy = rowBusy === row.name;
|
||||
const [confirmRemove, setConfirmRemove] = useState(false);
|
||||
|
||||
const badgeTone =
|
||||
row.runtime_status === "enabled"
|
||||
|
|
@ -533,18 +535,7 @@ function PluginRowCard(props: PluginRowCardProps) {
|
|||
disabled={busy}
|
||||
ghost
|
||||
size="sm"
|
||||
onClick={() => {
|
||||
const ok =
|
||||
typeof window !== "undefined"
|
||||
? window.confirm(t.pluginsPage.removeConfirm)
|
||||
: false;
|
||||
if (!ok) return;
|
||||
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.removeAgentPlugin(row.name);
|
||||
showToast(`${row.name} removed`, "success");
|
||||
});
|
||||
}}
|
||||
onClick={() => setConfirmRemove(true)}
|
||||
>
|
||||
|
||||
{busy ? <Spinner /> : <Trash2 className="h-3.5 w-3.5" />}
|
||||
|
|
@ -576,6 +567,21 @@ function PluginRowCard(props: PluginRowCardProps) {
|
|||
) : null}
|
||||
</CardContent>
|
||||
|
||||
<ConfirmDialog
|
||||
open={confirmRemove}
|
||||
onCancel={() => setConfirmRemove(false)}
|
||||
onConfirm={() => {
|
||||
setConfirmRemove(false);
|
||||
void setRuntimeLoading(row.name, async () => {
|
||||
await api.removeAgentPlugin(row.name);
|
||||
showToast(`${row.name} removed`, "success");
|
||||
});
|
||||
}}
|
||||
title={t.pluginsPage.removeConfirm}
|
||||
description={`This will remove the "${row.name}" plugin from your agent.`}
|
||||
destructive
|
||||
confirmLabel={t.common.delete}
|
||||
/>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,21 @@
|
|||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users } from "lucide-react";
|
||||
import { useCallback, useEffect, useLayoutEffect, useRef, useState } from "react";
|
||||
import { ChevronDown, Pencil, Plus, Terminal, Trash2, Users, X } from "lucide-react";
|
||||
import { H2 } from "@/components/NouiTypography";
|
||||
import { api } from "@/lib/api";
|
||||
import type { ProfileInfo } from "@/lib/api";
|
||||
import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog";
|
||||
import { useToast } from "@/hooks/useToast";
|
||||
import { useConfirmDelete } from "@/hooks/useConfirmDelete";
|
||||
import { useModalBehavior } from "@/hooks/useModalBehavior";
|
||||
import { Toast } from "@/components/Toast";
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
|
||||
import { Card, CardContent } from "@/components/ui/card";
|
||||
import { Badge } from "@nous-research/ui/ui/components/badge";
|
||||
import { Button } from "@nous-research/ui/ui/components/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Checkbox } from "@/components/ui/checkbox";
|
||||
import { useI18n } from "@/i18n";
|
||||
import { usePageHeader } from "@/contexts/usePageHeader";
|
||||
|
||||
// Mirrors hermes_cli/profiles.py::_PROFILE_ID_RE so we can reject obviously
|
||||
// invalid names (uppercase, spaces, …) before round-tripping a doomed POST.
|
||||
|
|
@ -23,11 +26,18 @@ export default function ProfilesPage() {
|
|||
const [loading, setLoading] = useState(true);
|
||||
const { toast, showToast } = useToast();
|
||||
const { t } = useI18n();
|
||||
const { setEnd } = usePageHeader();
|
||||
|
||||
// Create form
|
||||
// Create modal
|
||||
const [createModalOpen, setCreateModalOpen] = useState(false);
|
||||
const [newName, setNewName] = useState("");
|
||||
const [cloneFromDefault, setCloneFromDefault] = useState(true);
|
||||
const [creating, setCreating] = useState(false);
|
||||
const closeCreateModal = useCallback(() => setCreateModalOpen(false), []);
|
||||
const createModalRef = useModalBehavior({
|
||||
open: createModalOpen,
|
||||
onClose: closeCreateModal,
|
||||
});
|
||||
|
||||
// Inline rename state
|
||||
const [renamingFrom, setRenamingFrom] = useState<string | null>(null);
|
||||
|
|
@ -68,6 +78,7 @@ export default function ProfilesPage() {
|
|||
await api.createProfile({ name, clone_from_default: cloneFromDefault });
|
||||
showToast(`${t.profiles.created}: ${name}`, "success");
|
||||
setNewName("");
|
||||
setCreateModalOpen(false);
|
||||
load();
|
||||
} catch (e) {
|
||||
showToast(`${t.status.error}: ${e}`, "error");
|
||||
|
|
@ -170,6 +181,22 @@ export default function ProfilesPage() {
|
|||
|
||||
const pendingName = profileDelete.pendingId;
|
||||
|
||||
// Put "Create" button in page header
|
||||
useLayoutEffect(() => {
|
||||
setEnd(
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => setCreateModalOpen(true)}
|
||||
>
|
||||
<Plus className="h-3 w-3" />
|
||||
{t.common.create}
|
||||
</Button>,
|
||||
);
|
||||
return () => {
|
||||
setEnd(null);
|
||||
};
|
||||
}, [setEnd, t.common.create, loading]);
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-24">
|
||||
|
|
@ -198,51 +225,75 @@ export default function ProfilesPage() {
|
|||
loading={profileDelete.isDeleting}
|
||||
/>
|
||||
|
||||
{/* Create new profile */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center gap-2 text-base">
|
||||
<Plus className="h-4 w-4" />
|
||||
{t.profiles.newProfile}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="grid gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="profile-name">{t.profiles.name}</Label>
|
||||
<Input
|
||||
id="profile-name"
|
||||
placeholder={t.profiles.namePlaceholder}
|
||||
value={newName}
|
||||
onChange={(e) => setNewName(e.target.value)}
|
||||
aria-invalid={
|
||||
newName.trim() !== "" &&
|
||||
!PROFILE_NAME_RE.test(newName.trim())
|
||||
}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{t.profiles.nameRule}
|
||||
</p>
|
||||
</div>
|
||||
{/* Create profile modal */}
|
||||
{createModalOpen && (
|
||||
<div
|
||||
ref={createModalRef}
|
||||
className="fixed inset-0 z-[100] flex items-center justify-center bg-background/85 backdrop-blur-sm p-4"
|
||||
onClick={(e) => e.target === e.currentTarget && setCreateModalOpen(false)}
|
||||
role="dialog"
|
||||
aria-modal="true"
|
||||
aria-labelledby="create-profile-title"
|
||||
>
|
||||
<div className="relative w-full max-w-md border border-border bg-card shadow-2xl flex flex-col">
|
||||
<Button
|
||||
ghost
|
||||
size="icon"
|
||||
onClick={() => setCreateModalOpen(false)}
|
||||
className="absolute right-2 top-2 text-muted-foreground hover:text-foreground"
|
||||
aria-label="Close"
|
||||
>
|
||||
<X />
|
||||
</Button>
|
||||
|
||||
<label className="flex items-center gap-2 text-sm cursor-pointer">
|
||||
<input
|
||||
type="checkbox"
|
||||
<header className="p-5 pb-3 border-b border-border">
|
||||
<h2
|
||||
id="create-profile-title"
|
||||
className="font-display text-base tracking-wider uppercase"
|
||||
>
|
||||
{t.profiles.newProfile}
|
||||
</h2>
|
||||
</header>
|
||||
|
||||
<div className="p-5 grid gap-4">
|
||||
<div className="grid gap-2">
|
||||
<Label htmlFor="profile-name">{t.profiles.name}</Label>
|
||||
<Input
|
||||
id="profile-name"
|
||||
autoFocus
|
||||
placeholder={t.profiles.namePlaceholder}
|
||||
value={newName}
|
||||
onChange={(e) => setNewName(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") handleCreate();
|
||||
}}
|
||||
aria-invalid={
|
||||
newName.trim() !== "" &&
|
||||
!PROFILE_NAME_RE.test(newName.trim())
|
||||
}
|
||||
/>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{t.profiles.nameRule}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<Checkbox
|
||||
id="clone-from-default"
|
||||
checked={cloneFromDefault}
|
||||
onChange={(e) => setCloneFromDefault(e.target.checked)}
|
||||
label={t.profiles.cloneFromDefault}
|
||||
/>
|
||||
{t.profiles.cloneFromDefault}
|
||||
</label>
|
||||
|
||||
<div>
|
||||
<Button onClick={handleCreate} disabled={creating}>
|
||||
<Plus className="h-3 w-3" />
|
||||
{creating ? t.common.creating : t.common.create}
|
||||
</Button>
|
||||
<div className="flex justify-end">
|
||||
<Button size="sm" onClick={handleCreate} disabled={creating}>
|
||||
<Plus className="h-3 w-3" />
|
||||
{creating ? t.common.creating : t.common.create}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* List */}
|
||||
<div className="flex flex-col gap-3">
|
||||
|
|
|
|||
45
cli.py
45
cli.py
|
|
@ -3669,7 +3669,7 @@ class HermesCLI:
|
|||
if self.show_timestamps:
|
||||
label = f"{label} {datetime.now().strftime('%H:%M')}"
|
||||
w = shutil.get_terminal_size().columns
|
||||
fill = w - 2 - len(label)
|
||||
fill = w - 2 - HermesCLI._status_bar_display_width(label)
|
||||
_cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
|
||||
|
||||
self._stream_buf += text
|
||||
|
|
@ -4214,12 +4214,34 @@ class HermesCLI:
|
|||
ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
|
||||
return False
|
||||
|
||||
def _show_security_advisories(self):
|
||||
"""Show a startup banner if any unacked security advisories match.
|
||||
|
||||
Renders a single bold-red box on stderr (so piped stdout remains
|
||||
clean) listing the worst hit and pointing at ``hermes doctor``.
|
||||
Banner-cache rate-limits this to once per 24h per advisory; full
|
||||
remediation lives behind ``hermes doctor`` so the banner stays
|
||||
small.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.security_advisories import (
|
||||
detect_compromised,
|
||||
startup_banner,
|
||||
)
|
||||
hits = detect_compromised()
|
||||
banner = startup_banner(hits)
|
||||
if banner:
|
||||
# Print to stderr — keeps stdout clean for piped automation,
|
||||
# and Rich's banner rendering already wrote to stdout above.
|
||||
print(banner, file=sys.stderr, flush=True)
|
||||
except Exception:
|
||||
# Never let the security banner block startup. Failures are
|
||||
# logged at DEBUG by the advisory module.
|
||||
pass
|
||||
|
||||
def show_banner(self):
|
||||
"""Display the welcome banner in Claude Code style."""
|
||||
self.console.clear()
|
||||
|
||||
# Get context length for display before branching so it remains
|
||||
# available to the low-context warning logic in compact mode too.
|
||||
ctx_len = None
|
||||
if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
|
||||
ctx_len = self.agent.context_compressor.context_length
|
||||
|
|
@ -8783,6 +8805,9 @@ class HermesCLI:
|
|||
elif parts[i] == "--source" and i + 1 < len(parts):
|
||||
source = parts[i + 1]
|
||||
i += 2
|
||||
elif parts[i].isdigit():
|
||||
days = int(parts[i])
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
|
|
@ -10368,7 +10393,7 @@ class HermesCLI:
|
|||
label = " ⚕ Hermes "
|
||||
if self.show_timestamps:
|
||||
label = f"{label}{datetime.now().strftime('%H:%M')} "
|
||||
fill = w - 2 - len(label)
|
||||
fill = w - 2 - HermesCLI._status_bar_display_width(label)
|
||||
_cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
|
||||
_cprint(f"{_STREAM_PAD}{sentence.rstrip()}")
|
||||
|
||||
|
|
@ -11016,10 +11041,9 @@ class HermesCLI:
|
|||
pass
|
||||
|
||||
self.show_banner()
|
||||
|
||||
# One-line Honcho session indicator (TTY-only, not captured by agent).
|
||||
# Only show when the user explicitly configured Honcho for Hermes
|
||||
# (not auto-enabled from a stray HONCHO_API_KEY env var).
|
||||
# Surface any active supply-chain security advisories right after the
|
||||
# welcome banner. Quiet/single-query paths call this themselves.
|
||||
self._show_security_advisories()
|
||||
# If resuming a session, load history and display it immediately
|
||||
# so the user has context before typing their first message.
|
||||
if self._resumed:
|
||||
|
|
@ -13528,6 +13552,9 @@ def main(
|
|||
_query_label = query or ("[image attached]" if single_query_images else "")
|
||||
if _query_label:
|
||||
cli.console.print(f"[bold blue]Query:[/] {_query_label}")
|
||||
# Surface security advisories before the agent runs — short
|
||||
# banner, doesn't depend on the welcome banner being shown.
|
||||
cli._show_security_advisories()
|
||||
cli.chat(query, images=single_query_images or None)
|
||||
cli._print_exit_summary()
|
||||
return
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ _HOME_TARGET_ENV_VARS = {
|
|||
"weixin": "WEIXIN_HOME_CHANNEL",
|
||||
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
|
||||
"qqbot": "QQBOT_HOME_CHANNEL",
|
||||
"whatsapp": "WHATSAPP_HOME_CHANNEL",
|
||||
}
|
||||
|
||||
# Legacy env var names kept for back-compat. Each entry is the current
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
Hermes Gateway - Multi-platform messaging integration.
|
||||
|
||||
This module provides a unified gateway for connecting the Hermes agent
|
||||
to various messaging platforms (Telegram, Discord, WhatsApp) with:
|
||||
to various messaging platforms (Telegram, Discord, WhatsApp, Weixin, and more) with:
|
||||
- Session management (persistent conversations with reset policies)
|
||||
- Dynamic context injection (agent knows where messages come from)
|
||||
- Delivery routing (cron job outputs to appropriate channels)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
Gateway configuration management.
|
||||
|
||||
Handles loading and validating configuration for:
|
||||
- Connected platforms (Telegram, Discord, WhatsApp)
|
||||
- Connected platforms (Telegram, Discord, WhatsApp, Weixin, and more)
|
||||
- Home channels for each platform
|
||||
- Session reset policies
|
||||
- Delivery preferences
|
||||
|
|
|
|||
|
|
@ -1168,6 +1168,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
))
|
||||
# Ensure SSE drain loops can terminate without relying on polling
|
||||
# agent_task.done(), which can race with queue timeout checks.
|
||||
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
|
||||
|
||||
return await self._write_sse_chat_completion(
|
||||
request, completion_id, model_name, created, _stream_q,
|
||||
|
|
@ -2197,6 +2200,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
agent_ref=agent_ref,
|
||||
gateway_session_key=gateway_session_key,
|
||||
))
|
||||
# Ensure SSE drain loops can terminate without relying on polling
|
||||
# agent_task.done(), which can race with queue timeout checks.
|
||||
agent_task.add_done_callback(lambda _fut: _stream_q.put(None))
|
||||
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
model_name = body.get("model", self._model_name)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Base platform adapter interface.
|
||||
|
||||
All platform adapters (Telegram, Discord, WhatsApp) inherit from this
|
||||
All platform adapters (Telegram, Discord, WhatsApp, Weixin, and more) inherit from this
|
||||
and implement the required methods.
|
||||
"""
|
||||
|
||||
|
|
@ -1743,6 +1743,55 @@ class BasePlatformAdapter(ABC):
|
|||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
async def send_clarify(
|
||||
self,
|
||||
chat_id: str,
|
||||
question: str,
|
||||
choices: Optional[list],
|
||||
clarify_id: str,
|
||||
session_key: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a clarify prompt to the user.
|
||||
|
||||
Two render modes:
|
||||
|
||||
* **Multiple choice** (``choices`` is a non-empty list) — adapters
|
||||
that override this should render inline buttons (one per choice
|
||||
plus a final "Other" / free-text option). Button callbacks
|
||||
MUST resolve via
|
||||
``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)``
|
||||
with the chosen string. Picking the "Other" button calls
|
||||
``mark_awaiting_text(clarify_id)`` so the next message in the
|
||||
session is captured as the response.
|
||||
|
||||
* **Open-ended** (``choices`` is None or empty) — render the
|
||||
question as a plain text message; the next user message in the
|
||||
session is captured by the gateway's text-intercept and
|
||||
resolves the clarify automatically (see
|
||||
``GatewayRunner._maybe_intercept_clarify_text``).
|
||||
|
||||
The default implementation falls back to a numbered text list,
|
||||
which works on every platform — the user replies with a number
|
||||
("2") or with the literal choice text, and the gateway intercepts
|
||||
and resolves. Adapters with native button UIs (Telegram, Discord)
|
||||
SHOULD override this for a richer UX.
|
||||
"""
|
||||
if choices:
|
||||
lines = [f"❓ {question}", ""]
|
||||
for i, choice in enumerate(choices, start=1):
|
||||
lines.append(f" {i}. {choice}")
|
||||
lines.append("")
|
||||
lines.append("Reply with the number, the option text, or your own answer.")
|
||||
text = "\n".join(lines)
|
||||
else:
|
||||
text = f"❓ {question}"
|
||||
return await self.send(
|
||||
chat_id=chat_id,
|
||||
content=text,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def send_private_notice(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -2831,6 +2880,58 @@ class BasePlatformAdapter(ABC):
|
|||
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
|
||||
return
|
||||
|
||||
# Clarify text-capture bypass: if the agent is blocked on a
|
||||
# clarify_tool call awaiting a free-form text response (open-
|
||||
# ended clarify, or user picked "Other"), the next non-command
|
||||
# message in this session MUST reach the runner so the
|
||||
# clarify-intercept can resolve it and unblock the agent.
|
||||
#
|
||||
# Without this bypass: the message gets queued in
|
||||
# _pending_messages AND triggers an interrupt, killing the
|
||||
# agent run mid-clarify and discarding the user's answer.
|
||||
# Same shape as the /approve deadlock fix (PR #4926) — both
|
||||
# cases are "agent thread blocked on Event.wait, message must
|
||||
# reach the resolver before being treated as a new turn."
|
||||
if not cmd:
|
||||
try:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
_has_text_clarify = (
|
||||
_clarify_mod.get_pending_for_session(session_key) is not None
|
||||
)
|
||||
except Exception:
|
||||
_has_text_clarify = False
|
||||
|
||||
if _has_text_clarify:
|
||||
logger.debug(
|
||||
"[%s] Routing message to clarify text-intercept for %s",
|
||||
self.name, session_key,
|
||||
)
|
||||
try:
|
||||
_thread_meta = _thread_metadata_for_source(
|
||||
event.source, _reply_anchor_for_event(event)
|
||||
)
|
||||
response = await self._message_handler(event)
|
||||
_text, _eph_ttl = self._unwrap_ephemeral(response)
|
||||
if _text:
|
||||
_r = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=_text,
|
||||
reply_to=_reply_anchor_for_event(event),
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
if _eph_ttl > 0 and _r.success and _r.message_id:
|
||||
self._schedule_ephemeral_delete(
|
||||
chat_id=event.source.chat_id,
|
||||
message_id=_r.message_id,
|
||||
ttl_seconds=_eph_ttl,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Clarify text-intercept dispatch failed: %s",
|
||||
self.name, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
if self._busy_session_handler is not None:
|
||||
try:
|
||||
if await self._busy_session_handler(event, session_key):
|
||||
|
|
|
|||
|
|
@ -86,8 +86,32 @@ def _clean_discord_id(entry: str) -> str:
|
|||
|
||||
|
||||
def check_discord_requirements() -> bool:
|
||||
"""Check if Discord dependencies are available."""
|
||||
return DISCORD_AVAILABLE
|
||||
"""Check if Discord dependencies are available.
|
||||
|
||||
Lazy-installs discord.py via ``tools.lazy_deps.ensure("platform.discord")``
|
||||
on first call if not present. After successful install, re-binds module
|
||||
globals so ``DISCORD_AVAILABLE`` becomes True.
|
||||
"""
|
||||
global DISCORD_AVAILABLE, discord, DiscordMessage, Intents, commands
|
||||
if DISCORD_AVAILABLE:
|
||||
return True
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("platform.discord", prompt=False)
|
||||
except Exception:
|
||||
return False
|
||||
try:
|
||||
import discord as _discord
|
||||
from discord import Message as _DM, Intents as _Intents
|
||||
from discord.ext import commands as _commands
|
||||
except ImportError:
|
||||
return False
|
||||
discord = _discord
|
||||
DiscordMessage = _DM
|
||||
Intents = _Intents
|
||||
commands = _commands
|
||||
DISCORD_AVAILABLE = True
|
||||
return True
|
||||
|
||||
|
||||
def _build_allowed_mentions():
|
||||
|
|
|
|||
|
|
@ -103,8 +103,58 @@ _TELEGRAM_IMAGE_EXT_TO_MIME = {
|
|||
|
||||
|
||||
def check_telegram_requirements() -> bool:
|
||||
"""Check if Telegram dependencies are available."""
|
||||
return TELEGRAM_AVAILABLE
|
||||
"""Check if Telegram dependencies are available.
|
||||
|
||||
If python-telegram-bot is missing, attempts to lazy-install it via
|
||||
``tools.lazy_deps.ensure("platform.telegram")``. After a successful
|
||||
install, re-imports the SDK and flips ``TELEGRAM_AVAILABLE`` to True
|
||||
so the adapter's class-level type aliases get rebound.
|
||||
"""
|
||||
global TELEGRAM_AVAILABLE, Update, Bot, Message, InlineKeyboardButton
|
||||
global InlineKeyboardMarkup, LinkPreviewOptions, Application
|
||||
global CommandHandler, CallbackQueryHandler, TelegramMessageHandler
|
||||
global ContextTypes, filters, ParseMode, ChatType, HTTPXRequest
|
||||
if TELEGRAM_AVAILABLE:
|
||||
return True
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("platform.telegram", prompt=False)
|
||||
except Exception:
|
||||
return False
|
||||
try:
|
||||
from telegram import Update as _Update, Bot as _Bot, Message as _Message
|
||||
from telegram import InlineKeyboardButton as _IKB, InlineKeyboardMarkup as _IKM
|
||||
try:
|
||||
from telegram import LinkPreviewOptions as _LPO
|
||||
except ImportError:
|
||||
_LPO = None
|
||||
from telegram.ext import (
|
||||
Application as _App, CommandHandler as _CH,
|
||||
CallbackQueryHandler as _CQH,
|
||||
MessageHandler as _MH,
|
||||
ContextTypes as _CT, filters as _filters,
|
||||
)
|
||||
from telegram.constants import ParseMode as _PM, ChatType as _CtT
|
||||
from telegram.request import HTTPXRequest as _HR
|
||||
except ImportError:
|
||||
return False
|
||||
Update = _Update
|
||||
Bot = _Bot
|
||||
Message = _Message
|
||||
InlineKeyboardButton = _IKB
|
||||
InlineKeyboardMarkup = _IKM
|
||||
LinkPreviewOptions = _LPO
|
||||
Application = _App
|
||||
CommandHandler = _CH
|
||||
CallbackQueryHandler = _CQH
|
||||
TelegramMessageHandler = _MH
|
||||
ContextTypes = _CT
|
||||
filters = _filters
|
||||
ParseMode = _PM
|
||||
ChatType = _CtT
|
||||
HTTPXRequest = _HR
|
||||
TELEGRAM_AVAILABLE = True
|
||||
return True
|
||||
|
||||
|
||||
# Matches every character that MarkdownV2 requires to be backslash-escaped
|
||||
|
|
@ -377,6 +427,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Slash-confirm button state: confirm_id → session_key (for /reload-mcp
|
||||
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
|
||||
self._slash_confirm_state: Dict[str, str] = {}
|
||||
# Clarify button state: clarify_id → session_key (for the clarify tool's
|
||||
# multiple-choice prompts; see GatewayRunner clarify_callback wiring).
|
||||
self._clarify_state: Dict[str, str] = {}
|
||||
# Notification mode for message sends.
|
||||
# "important" — only final responses, approvals, and slash confirmations
|
||||
# trigger notifications; tool progress, streaming, status
|
||||
|
|
@ -2165,6 +2218,80 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_clarify(
|
||||
self,
|
||||
chat_id: str,
|
||||
question: str,
|
||||
choices: Optional[list],
|
||||
clarify_id: str,
|
||||
session_key: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Render a clarify prompt with one inline button per choice.
|
||||
|
||||
Multi-choice mode (``choices`` non-empty): renders one button per
|
||||
option plus a final "✏️ Other (type answer)" button. Picking the
|
||||
"Other" button flips the entry into text-capture mode so the next
|
||||
message becomes the response.
|
||||
|
||||
Open-ended mode (``choices`` empty): renders the question as plain
|
||||
text — no buttons. The next message in the session is captured by
|
||||
the gateway's text-intercept and resolves the clarify.
|
||||
"""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
text = f"❓ {_html.escape(question)}"
|
||||
thread_id = self._metadata_thread_id(metadata)
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(chat_id),
|
||||
"text": text,
|
||||
"parse_mode": ParseMode.HTML,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
|
||||
if choices:
|
||||
# Telegram caps callback_data at 64 bytes; keep "cl:<id>:<idx>"
|
||||
# short. Button label is also capped (~64 chars in practice).
|
||||
rows = []
|
||||
for idx, choice in enumerate(choices):
|
||||
label = str(choice)
|
||||
if len(label) > 60:
|
||||
label = label[:57] + "..."
|
||||
rows.append([
|
||||
InlineKeyboardButton(
|
||||
f"{idx + 1}. {label}",
|
||||
callback_data=f"cl:{clarify_id}:{idx}",
|
||||
)
|
||||
])
|
||||
rows.append([
|
||||
InlineKeyboardButton(
|
||||
"✏️ Other (type answer)",
|
||||
callback_data=f"cl:{clarify_id}:other",
|
||||
)
|
||||
])
|
||||
kwargs["reply_markup"] = InlineKeyboardMarkup(rows)
|
||||
|
||||
reply_to_id = self._reply_to_message_id_for_send(None, metadata)
|
||||
kwargs["reply_to_message_id"] = reply_to_id
|
||||
kwargs.update(
|
||||
self._thread_kwargs_for_send(
|
||||
chat_id,
|
||||
thread_id,
|
||||
metadata,
|
||||
reply_to_message_id=reply_to_id,
|
||||
)
|
||||
)
|
||||
|
||||
msg = await self._send_message_with_thread_fallback(**kwargs)
|
||||
self._clarify_state[clarify_id] = session_key
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_clarify failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -2650,6 +2777,111 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
|
||||
return
|
||||
|
||||
# --- Clarify callbacks (cl:clarify_id:idx | cl:clarify_id:other) ---
|
||||
if data.startswith("cl:"):
|
||||
parts = data.split(":", 2)
|
||||
if len(parts) == 3:
|
||||
clarify_id = parts[1]
|
||||
choice_token = parts[2]
|
||||
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(
|
||||
caller_id,
|
||||
chat_id=query_chat_id,
|
||||
chat_type=str(query_chat_type) if query_chat_type is not None else None,
|
||||
thread_id=str(query_thread_id) if query_thread_id is not None else None,
|
||||
user_name=query_user_name,
|
||||
):
|
||||
await query.answer(text="⛔ You are not authorized to answer this prompt.")
|
||||
return
|
||||
|
||||
session_key = self._clarify_state.get(clarify_id)
|
||||
if not session_key:
|
||||
await query.answer(text="This prompt has already been resolved.")
|
||||
return
|
||||
|
||||
user_display = getattr(query.from_user, "first_name", "User")
|
||||
|
||||
if choice_token == "other":
|
||||
# Flip into text-capture mode and tell the user to type
|
||||
# their answer. The gateway's text-intercept will pick
|
||||
# up the next message in this session and resolve the
|
||||
# clarify. Do NOT pop _clarify_state yet — we still
|
||||
# need it if the user is slow to respond and the entry
|
||||
# is cleared by something else.
|
||||
try:
|
||||
from tools.clarify_gateway import mark_awaiting_text
|
||||
mark_awaiting_text(clarify_id)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] mark_awaiting_text failed: %s", self.name, exc)
|
||||
|
||||
await query.answer(text="✏️ Type your answer in the chat.")
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"❓ {query.message.text or ''}\n\n<i>Awaiting typed response from {_html.escape(user_display)}…</i>",
|
||||
parse_mode=ParseMode.HTML,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# Numeric choice → resolve immediately with the chosen text
|
||||
try:
|
||||
idx = int(choice_token)
|
||||
except (ValueError, TypeError):
|
||||
await query.answer(text="Invalid choice.")
|
||||
return
|
||||
|
||||
# Look up the choice text from the entry registered in the
|
||||
# clarify primitive. Fall back to the index if the entry
|
||||
# has been cleaned up (race with timeout / session reset).
|
||||
resolved_text: Optional[str] = None
|
||||
try:
|
||||
from tools.clarify_gateway import _entries as _clarify_entries # type: ignore
|
||||
entry = _clarify_entries.get(clarify_id)
|
||||
if entry and entry.choices and 0 <= idx < len(entry.choices):
|
||||
resolved_text = entry.choices[idx]
|
||||
except Exception:
|
||||
resolved_text = None
|
||||
|
||||
if resolved_text is None:
|
||||
# Race: entry vanished. Echo the index as a number so
|
||||
# the agent at least sees an intentional response
|
||||
# rather than nothing.
|
||||
resolved_text = f"choice {idx + 1}"
|
||||
|
||||
# Pop state and resolve
|
||||
self._clarify_state.pop(clarify_id, None)
|
||||
try:
|
||||
from tools.clarify_gateway import resolve_gateway_clarify
|
||||
resolved = resolve_gateway_clarify(clarify_id, resolved_text)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] resolve_gateway_clarify failed: %s", self.name, exc)
|
||||
resolved = False
|
||||
|
||||
await query.answer(text=f"✓ {resolved_text[:60]}")
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"❓ {_html.escape(query.message.text or '')}\n\n<b>{_html.escape(user_display)}:</b> {_html.escape(resolved_text)}",
|
||||
parse_mode=ParseMode.HTML,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if resolved:
|
||||
logger.info(
|
||||
"Telegram clarify button resolved (id=%s, choice=%r, user=%s)",
|
||||
clarify_id, resolved_text, user_display,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Telegram clarify button: resolve_gateway_clarify returned False (id=%s)",
|
||||
clarify_id,
|
||||
)
|
||||
return
|
||||
|
||||
# --- Update prompt callbacks ---
|
||||
if not data.startswith("update_prompt:"):
|
||||
return
|
||||
|
|
@ -4529,6 +4761,27 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
|
||||
return False
|
||||
|
||||
async def _clear_reactions(self, chat_id: str, message_id: str) -> bool:
|
||||
"""Clear all reactions from a Telegram message.
|
||||
|
||||
Calling ``set_message_reaction`` with ``reaction=None`` (or an empty
|
||||
sequence) is the documented Bot API way to remove all bot-set
|
||||
reactions on a message — equivalent to Bot API 10.0's
|
||||
``deleteMessageReaction`` but supported in PTB 22.6 already.
|
||||
"""
|
||||
if not self._bot:
|
||||
return False
|
||||
try:
|
||||
await self._bot.set_message_reaction(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
reaction=None,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("[%s] clear reactions failed: %s", self.name, e)
|
||||
return False
|
||||
|
||||
async def on_processing_start(self, event: MessageEvent) -> None:
|
||||
"""Add an in-progress reaction when message processing begins."""
|
||||
if not self._reactions_enabled():
|
||||
|
|
@ -4543,12 +4796,23 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
|
||||
Unlike Discord (additive reactions), Telegram's set_message_reaction
|
||||
replaces all existing reactions in one call — no remove step needed.
|
||||
|
||||
On CANCELLED outcomes (e.g. the user runs ``/stop``, or a session is
|
||||
interrupted mid-flight), we explicitly clear the 👀 in-progress
|
||||
reaction so it doesn't linger on the user's message indefinitely.
|
||||
Without this clear, the only way to remove the 👀 was to wait for
|
||||
another agent run to swap it to 👍/👎 — which never happens if the
|
||||
cancellation was the last activity in the chat.
|
||||
"""
|
||||
if not self._reactions_enabled():
|
||||
return
|
||||
chat_id = getattr(event.source, "chat_id", None)
|
||||
message_id = getattr(event, "message_id", None)
|
||||
if chat_id and message_id and outcome != ProcessingOutcome.CANCELLED:
|
||||
if not (chat_id and message_id):
|
||||
return
|
||||
if outcome == ProcessingOutcome.CANCELLED:
|
||||
await self._clear_reactions(chat_id, message_id)
|
||||
else:
|
||||
await self._set_reaction(
|
||||
chat_id,
|
||||
message_id,
|
||||
|
|
|
|||
133
gateway/run.py
133
gateway/run.py
|
|
@ -3275,6 +3275,30 @@ class GatewayRunner:
|
|||
write_runtime_status(gateway_state="starting", exit_reason=None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Log any active supply-chain security advisories. Operators see this
|
||||
# in gateway.log and `hermes status` surfaces it; we do NOT block
|
||||
# startup or surface it inline to user messages, since the gateway
|
||||
# operator is the one who can act on it (uninstall the package,
|
||||
# rotate credentials). See hermes_cli/security_advisories.py.
|
||||
try:
|
||||
from hermes_cli.security_advisories import (
|
||||
detect_compromised,
|
||||
gateway_log_message,
|
||||
)
|
||||
_adv_hits = detect_compromised()
|
||||
_adv_msg = gateway_log_message(_adv_hits)
|
||||
if _adv_msg:
|
||||
logger.warning("%s", _adv_msg)
|
||||
logger.warning(
|
||||
"Run `hermes doctor` on the gateway host for full "
|
||||
"remediation steps."
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"security advisory check failed at gateway startup",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Warn if no user allowlists are configured and open access is not opted in
|
||||
_builtin_allowed_vars = (
|
||||
|
|
@ -5804,6 +5828,37 @@ class GatewayRunner:
|
|||
)
|
||||
_update_prompts.pop(_quick_key, None)
|
||||
|
||||
# Intercept messages that are responses to a pending clarify
|
||||
# request that is awaiting free-form text (either an open-ended
|
||||
# clarify with no choices, or one where the user picked the
|
||||
# "Other" button). The first non-empty user message in the
|
||||
# session resolves the clarify and unblocks the agent thread —
|
||||
# we do NOT route it to the agent as a new turn.
|
||||
try:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
_pending_clarify = _clarify_mod.get_pending_for_session(_quick_key)
|
||||
except Exception:
|
||||
_pending_clarify = None
|
||||
if _pending_clarify is not None:
|
||||
_raw_clarify_reply = (event.text or "").strip()
|
||||
# Skip slash commands — the user clearly wanted to issue a
|
||||
# command, not answer the clarify. Leave the clarify pending
|
||||
# so the user can retry; if it times out, the agent unblocks
|
||||
# with an empty response.
|
||||
if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"):
|
||||
_resolved = _clarify_mod.resolve_gateway_clarify(
|
||||
_pending_clarify.clarify_id, _raw_clarify_reply,
|
||||
)
|
||||
if _resolved:
|
||||
logger.info(
|
||||
"Gateway intercepted clarify text response (session=%s, id=%s)",
|
||||
_quick_key, _pending_clarify.clarify_id,
|
||||
)
|
||||
# Acknowledge with empty string so adapters that emit
|
||||
# the agent's response don't double-post. The agent
|
||||
# itself will produce the next user-facing message.
|
||||
return ""
|
||||
|
||||
# Intercept messages that are responses to a pending /reload-mcp
|
||||
# (or future) slash-confirm prompt. Recognized confirm replies are
|
||||
# /approve, /always, /cancel (plus short aliases). Anything else
|
||||
|
|
@ -14933,6 +14988,76 @@ class GatewayRunner:
|
|||
if _pdc is not None:
|
||||
_pdc[session_key] = _release_bg_review_messages
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Clarify callback: present a clarify prompt and block on a response.
|
||||
#
|
||||
# Runs on the agent's worker thread (see clarify_tool's synchronous
|
||||
# callback contract). Bridges sync→async by scheduling the
|
||||
# adapter's send_clarify on the gateway event loop, then blocks on
|
||||
# the clarify primitive's threading.Event with a configurable
|
||||
# timeout. Returns the user's response string, or a sentinel
|
||||
# explaining that no response arrived (so the agent can adapt
|
||||
# rather than hang forever).
|
||||
# ------------------------------------------------------------------
|
||||
def _clarify_callback_sync(question: str, choices) -> str:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
import uuid as _uuid
|
||||
|
||||
if not _status_adapter:
|
||||
return ""
|
||||
|
||||
clarify_id = _uuid.uuid4().hex[:10]
|
||||
_clarify_mod.register(
|
||||
clarify_id=clarify_id,
|
||||
session_key=session_key or "",
|
||||
question=question,
|
||||
choices=list(choices) if choices else None,
|
||||
)
|
||||
|
||||
# Pause typing — like approval, we don't want a "thinking..."
|
||||
# status to obscure the prompt or block the user from typing
|
||||
# an "Other" response on platforms that disable input while
|
||||
# typing is active (Slack Assistant API).
|
||||
try:
|
||||
_status_adapter.pause_typing_for_chat(_status_chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
send_ok = False
|
||||
try:
|
||||
fut = asyncio.run_coroutine_threadsafe(
|
||||
_status_adapter.send_clarify(
|
||||
chat_id=_status_chat_id,
|
||||
question=question,
|
||||
choices=list(choices) if choices else None,
|
||||
clarify_id=clarify_id,
|
||||
session_key=session_key or "",
|
||||
metadata=_status_thread_metadata,
|
||||
),
|
||||
_loop_for_step,
|
||||
)
|
||||
result = fut.result(timeout=15)
|
||||
send_ok = bool(getattr(result, "success", False))
|
||||
except Exception as exc:
|
||||
logger.warning("Clarify send failed: %s", exc)
|
||||
send_ok = False
|
||||
|
||||
if not send_ok:
|
||||
# Couldn't deliver the prompt — clean up and return
|
||||
# sentinel so the agent can fall back to a sensible
|
||||
# default rather than hanging.
|
||||
_clarify_mod.clear_session(session_key or "")
|
||||
return "[clarify prompt could not be delivered]"
|
||||
|
||||
timeout = _clarify_mod.get_clarify_timeout()
|
||||
response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout))
|
||||
if response is None or response == "":
|
||||
# Timeout or session-boundary cancellation
|
||||
return f"[user did not respond within {int(timeout / 60)}m]"
|
||||
return response
|
||||
|
||||
agent.clarify_callback = _clarify_callback_sync
|
||||
|
||||
# Store agent reference for interrupt support
|
||||
agent_holder[0] = agent
|
||||
# Capture the full tool definitions for transcript logging
|
||||
|
|
@ -15204,6 +15329,14 @@ class GatewayRunner:
|
|||
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
|
||||
finally:
|
||||
unregister_gateway_notify(_approval_session_key)
|
||||
# Cancel any pending clarify entries so blocked agent
|
||||
# threads don't hang past the end of the run (interrupt,
|
||||
# completion, gateway shutdown). Idempotent.
|
||||
try:
|
||||
from tools.clarify_gateway import clear_session as _clear_clarify_session
|
||||
_clear_clarify_session(_approval_session_key)
|
||||
except Exception:
|
||||
pass
|
||||
reset_current_session_key(_approval_session_token)
|
||||
result_holder[0] = result
|
||||
|
||||
|
|
|
|||
|
|
@ -124,16 +124,33 @@ def get_process_start_time(pid: int) -> Optional[int]:
|
|||
|
||||
|
||||
def _read_process_cmdline(pid: int) -> Optional[str]:
|
||||
"""Return the process command line as a space-separated string."""
|
||||
"""Return the process command line as a space-separated string.
|
||||
|
||||
On Linux, reads /proc/<pid>/cmdline directly. On macOS and other
|
||||
platforms without /proc, falls back to ``ps -p <pid> -o command=``.
|
||||
"""
|
||||
cmdline_path = Path(f"/proc/{pid}/cmdline")
|
||||
try:
|
||||
raw = cmdline_path.read_bytes()
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
return None
|
||||
pass
|
||||
else:
|
||||
if raw:
|
||||
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
|
||||
|
||||
if not raw:
|
||||
return None
|
||||
return raw.replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ps", "-p", str(pid), "-o", "command="],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _looks_like_gateway_process(pid: int) -> bool:
|
||||
|
|
@ -594,6 +611,22 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
|
|||
and current_start != existing.get("start_time")
|
||||
):
|
||||
stale = True
|
||||
# When start_time comparison is unavailable (macOS / Windows
|
||||
# have no /proc, so both sides are None), fall back to
|
||||
# checking the live process command line. When cmdline is
|
||||
# also unreadable (Windows has no ps), consult the lock
|
||||
# record's own argv — the gateway writes it at startup and
|
||||
# it's the only identity signal on platforms without ps.
|
||||
# Both oracles must indicate "not a gateway" to mark stale.
|
||||
if (
|
||||
not stale
|
||||
and existing.get("start_time") is None
|
||||
and current_start is None
|
||||
and not _looks_like_gateway_process(existing_pid)
|
||||
):
|
||||
live_cmdline = _read_process_cmdline(existing_pid)
|
||||
if live_cmdline is not None or not _record_looks_like_gateway(existing):
|
||||
stale = True
|
||||
# Check if process is stopped (Ctrl+Z / SIGTSTP) — stopped
|
||||
# processes still appear alive to _pid_exists but are not
|
||||
# actually running. Treat them as stale so --replace works.
|
||||
|
|
|
|||
|
|
@ -4046,6 +4046,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
|
|||
return get_qwen_auth_status()
|
||||
if target == "google-gemini-cli":
|
||||
return get_gemini_oauth_auth_status()
|
||||
if target == "minimax-oauth":
|
||||
return get_minimax_oauth_auth_status()
|
||||
if target == "copilot-acp":
|
||||
return get_external_process_provider_status(target)
|
||||
# API-key providers
|
||||
|
|
@ -4757,6 +4759,20 @@ def _minimax_request_user_code(
|
|||
return payload
|
||||
|
||||
|
||||
def _minimax_expired_in_looks_like_unix_ms(expired_in: int, *, now_ms: int) -> bool:
|
||||
"""True if ``expired_in`` is plausibly a unix-ms absolute time (vs TTL seconds)."""
|
||||
return int(expired_in) > (now_ms // 2)
|
||||
|
||||
|
||||
def _minimax_resolve_token_expiry_unix(expired_in: int, *, now: datetime) -> float:
|
||||
"""Return access-token expiry as unix seconds (MiniMax uses ms epoch or TTL seconds)."""
|
||||
raw = int(expired_in)
|
||||
now_ms = int(now.timestamp() * 1000)
|
||||
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
|
||||
return raw / 1000.0
|
||||
return now.timestamp() + max(1, raw)
|
||||
|
||||
|
||||
def _minimax_poll_token(
|
||||
client: httpx.Client, *, portal_base_url: str, client_id: str,
|
||||
user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
|
||||
|
|
@ -4765,12 +4781,11 @@ def _minimax_poll_token(
|
|||
# Defensive parsing: if it's small enough to be a duration, treat as seconds.
|
||||
import time as _time
|
||||
now_ms = int(_time.time() * 1000)
|
||||
if expired_in > now_ms // 2:
|
||||
# Looks like a unix-ms timestamp.
|
||||
deadline = expired_in / 1000.0
|
||||
raw = int(expired_in)
|
||||
if _minimax_expired_in_looks_like_unix_ms(raw, now_ms=now_ms):
|
||||
deadline = raw / 1000.0
|
||||
else:
|
||||
# Treat as duration in seconds from now.
|
||||
deadline = _time.time() + max(1, expired_in)
|
||||
deadline = _time.time() + max(1, raw)
|
||||
interval = max(2.0, (interval_ms or 2000) / 1000.0)
|
||||
|
||||
while _time.time() < deadline:
|
||||
|
|
@ -4884,8 +4899,10 @@ def _minimax_oauth_login(
|
|||
)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_in_s = int(token_data["expired_in"])
|
||||
expires_at = now.timestamp() + expires_in_s
|
||||
expires_at_unix = _minimax_resolve_token_expiry_unix(
|
||||
int(token_data["expired_in"]), now=now,
|
||||
)
|
||||
expires_in_s = max(0, int(expires_at_unix - now.timestamp()))
|
||||
|
||||
auth_state = {
|
||||
"provider": "minimax-oauth",
|
||||
|
|
@ -4899,7 +4916,7 @@ def _minimax_oauth_login(
|
|||
"refresh_token": token_data["refresh_token"],
|
||||
"resource_url": token_data.get("resource_url"),
|
||||
"obtained_at": now.isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
|
||||
"expires_in": expires_in_s,
|
||||
}
|
||||
|
||||
|
|
@ -4960,14 +4977,16 @@ def _refresh_minimax_oauth_state(
|
|||
relogin_required=True,
|
||||
)
|
||||
now_dt = datetime.now(timezone.utc)
|
||||
expires_in_s = int(payload["expired_in"])
|
||||
expires_at_unix = _minimax_resolve_token_expiry_unix(
|
||||
int(payload["expired_in"]), now=now_dt,
|
||||
)
|
||||
expires_in_s = max(0, int(expires_at_unix - now_dt.timestamp()))
|
||||
new_state = dict(state)
|
||||
new_state.update({
|
||||
"access_token": payload["access_token"],
|
||||
"refresh_token": payload.get("refresh_token", state["refresh_token"]),
|
||||
"obtained_at": now_dt.isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
|
||||
tz=timezone.utc).isoformat(),
|
||||
"expires_at": datetime.fromtimestamp(expires_at_unix, tz=timezone.utc).isoformat(),
|
||||
"expires_in": expires_in_s,
|
||||
})
|
||||
_minimax_save_auth_state(new_state)
|
||||
|
|
@ -5252,6 +5271,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
|||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
union_with_portal_free_recommendations,
|
||||
union_with_portal_paid_recommendations,
|
||||
)
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
||||
|
|
@ -5260,19 +5280,27 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
|||
if model_ids:
|
||||
pricing = get_pricing_for_provider("nous")
|
||||
free_tier = check_nous_free_tier()
|
||||
_portal_for_recs = auth_state.get("portal_base_url", "")
|
||||
if free_tier:
|
||||
# The Portal's freeRecommendedModels endpoint is the
|
||||
# source of truth for what's free *right now*. Augment
|
||||
# the curated list with anything new the Portal flags
|
||||
# as free so users on older Hermes builds still see
|
||||
# newly-launched free models without a CLI release.
|
||||
_portal_for_recs = auth_state.get("portal_base_url", "")
|
||||
model_ids, pricing = union_with_portal_free_recommendations(
|
||||
model_ids, pricing, _portal_for_recs,
|
||||
)
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True,
|
||||
)
|
||||
else:
|
||||
# Paid-tier mirror: pull paidRecommendedModels so newly
|
||||
# launched paid models surface in the picker even if
|
||||
# the in-repo curated list and docs-hosted manifest
|
||||
# haven't caught up yet.
|
||||
model_ids, pricing = union_with_portal_paid_recommendations(
|
||||
model_ids, pricing, _portal_for_recs,
|
||||
)
|
||||
_portal = auth_state.get("portal_base_url", "")
|
||||
if model_ids:
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
|
|
|
|||
|
|
@ -375,10 +375,12 @@ def auth_add_command(args) -> None:
|
|||
return
|
||||
|
||||
if provider == "minimax-oauth":
|
||||
from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
|
||||
creds = resolve_minimax_oauth_runtime_credentials()
|
||||
creds = auth_mod._minimax_oauth_login(
|
||||
open_browser=not getattr(args, "no_browser", False),
|
||||
timeout_seconds=getattr(args, "timeout", None) or 15.0,
|
||||
)
|
||||
label = (getattr(args, "label", None) or "").strip() or label_from_token(
|
||||
creds["api_key"],
|
||||
creds["access_token"],
|
||||
_oauth_default_label(provider, len(pool.entries()) + 1),
|
||||
)
|
||||
entry = PooledCredential(
|
||||
|
|
@ -388,8 +390,9 @@ def auth_add_command(args) -> None:
|
|||
auth_type=AUTH_TYPE_OAUTH,
|
||||
priority=0,
|
||||
source=f"{SOURCE_MANUAL}:minimax_oauth",
|
||||
access_token=creds["api_key"],
|
||||
base_url=creds.get("base_url"),
|
||||
access_token=creds["access_token"],
|
||||
refresh_token=creds.get("refresh_token"),
|
||||
base_url=creds.get("inference_base_url"),
|
||||
)
|
||||
pool.add_entry(entry)
|
||||
print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
|
||||
|
|
|
|||
|
|
@ -468,20 +468,23 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
|
|||
|
||||
Telegram command names cannot contain hyphens, so they are replaced with
|
||||
underscores. Aliases are skipped -- Telegram shows one menu entry per
|
||||
canonical command. Commands that require arguments are skipped because
|
||||
selecting a Telegram BotCommand sends only ``/command`` and would execute
|
||||
an incomplete command.
|
||||
canonical command.
|
||||
|
||||
Plugin-registered slash commands are included so plugins get native
|
||||
autocomplete in Telegram without touching core code.
|
||||
Built-in commands that require arguments (e.g. /queue, /steer, /background)
|
||||
are **included** because their handlers return usage text when selected
|
||||
without a payload, making them discoverable via autocomplete.
|
||||
|
||||
Plugin-registered slash commands that require arguments are **excluded**
|
||||
because plugins may not provide a no-arg usage fallback.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
result: list[tuple[str, str]] = []
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
if _requires_argument(cmd.args_hint):
|
||||
continue
|
||||
# Built-in arg-taking commands are included — their handlers show
|
||||
# usage text when invoked without arguments, and hiding them from
|
||||
# the menu hurts discoverability (issue #24312).
|
||||
tg_name = _sanitize_telegram_name(cmd.name)
|
||||
if tg_name:
|
||||
result.append((tg_name, cmd.description))
|
||||
|
|
@ -1359,9 +1362,9 @@ class SlashCommandCompleter(Completer):
|
|||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=2,
|
||||
cwd=cwd,
|
||||
cwd=cwd, encoding="utf-8", errors="replace",
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
if proc.returncode == 0 and proc.stdout and proc.stdout.strip():
|
||||
raw = proc.stdout.strip().split("\n")
|
||||
# Store relative paths
|
||||
for p in raw[:5000]:
|
||||
|
|
|
|||
|
|
@ -477,6 +477,12 @@ DEFAULT_CONFIG = {
|
|||
# threshold before escalating to a full timeout. The warning fires
|
||||
# once per run and does not interrupt the agent. 0 = disable warning.
|
||||
"gateway_timeout_warning": 900,
|
||||
# Maximum time (seconds) the gateway will block an agent waiting for
|
||||
# a clarify-tool response from the user. Hit this and the agent
|
||||
# unblocks with "[user did not respond within Xm]" so it can adapt
|
||||
# rather than pinning the running-agent guard forever. CLI clarify
|
||||
# blocks indefinitely (input() is synchronous) and ignores this.
|
||||
"clarify_timeout": 600,
|
||||
# Periodic "still working" notification interval (seconds).
|
||||
# Sends a status message every N seconds so the user knows the
|
||||
# agent hasn't died during long tasks. 0 = disable notifications.
|
||||
|
|
@ -628,6 +634,12 @@ DEFAULT_CONFIG = {
|
|||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
# When false (default), each session gets a random userId (ephemeral).
|
||||
"managed_persistence": False,
|
||||
# Optional externally managed Camofox identity. Useful when another
|
||||
# app owns the visible browser and Hermes should operate in it.
|
||||
"user_id": "",
|
||||
"session_key": "",
|
||||
# Rehydrate tab_id from Camofox before creating a new tab.
|
||||
"adopt_existing_tab": False,
|
||||
},
|
||||
},
|
||||
|
||||
|
|
@ -917,6 +929,14 @@ DEFAULT_CONFIG = {
|
|||
"persistent_output": True,
|
||||
"persistent_output_max_lines": 200,
|
||||
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
|
||||
# File-mutation verifier footer. When true (default), the agent
|
||||
# appends a one-line advisory to its final response whenever a
|
||||
# write_file / patch call failed during the turn and was never
|
||||
# superseded by a successful write to the same path. This catches
|
||||
# the "batch of parallel patches, half fail, model claims success"
|
||||
# class of over-claim that otherwise forces users to run
|
||||
# `git status` to verify edits landed. Set false to suppress.
|
||||
"file_mutation_verifier": True,
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
# UI language for static user-facing messages (approval prompts, a
|
||||
|
|
@ -1338,6 +1358,21 @@ DEFAULT_CONFIG = {
|
|||
"domains": [],
|
||||
"shared_files": [],
|
||||
},
|
||||
# Acknowledged supply-chain security advisories. Each entry is the
|
||||
# ID of an advisory the user has read and acted on (uninstalled the
|
||||
# compromised package, rotated credentials). Acked advisories no
|
||||
# longer trigger the startup banner. Add via `hermes doctor --ack
|
||||
# <id>`; remove by editing the list directly. See
|
||||
# ``hermes_cli/security_advisories.py`` for the catalog.
|
||||
"acked_advisories": [],
|
||||
# Allow Hermes to lazy-install opt-in backend packages from PyPI
|
||||
# the first time the user enables a backend that needs them
|
||||
# (e.g. installing ``elevenlabs`` when the user picks ElevenLabs as
|
||||
# their TTS provider). Set to false to require explicit
|
||||
# ``pip install`` for everything beyond the base set — appropriate
|
||||
# for restricted networks, audited environments, or air-gapped
|
||||
# systems where any runtime install is unacceptable.
|
||||
"allow_lazy_installs": True,
|
||||
},
|
||||
|
||||
"cron": {
|
||||
|
|
@ -1476,6 +1511,53 @@ DEFAULT_CONFIG = {
|
|||
"backup_keep": 5,
|
||||
},
|
||||
|
||||
# Language Server Protocol — semantic diagnostics from real
|
||||
# language servers (pyright, gopls, rust-analyzer, etc.) wired
|
||||
# into the post-write lint check used by ``write_file`` and
|
||||
# ``patch``.
|
||||
#
|
||||
# LSP is gated on git-workspace detection: when the agent's
|
||||
# cwd (or the file being edited) is inside a git worktree, LSP
|
||||
# runs against that workspace. When neither is in a git repo,
|
||||
# LSP stays dormant and the in-process syntax check is the only
|
||||
# tier — handy for Telegram/Discord chats where the cwd is the
|
||||
# user's home directory.
|
||||
"lsp": {
|
||||
# Master toggle. Setting this to false disables the entire
|
||||
# subsystem — no servers spawn, no background event loop, no
|
||||
# cost.
|
||||
"enabled": True,
|
||||
|
||||
# Diagnostic-wait mode for the post-write check.
|
||||
# ``"document"`` waits up to ``wait_timeout`` seconds for the
|
||||
# current file's diagnostics; ``"full"`` additionally requests
|
||||
# workspace-wide diagnostics (slower).
|
||||
"wait_mode": "document",
|
||||
"wait_timeout": 5.0,
|
||||
|
||||
# How to handle missing server binaries.
|
||||
# ``"auto"`` — try to install via npm/go/pip into
|
||||
# ``<HERMES_HOME>/lsp/bin/`` on first use.
|
||||
# ``"manual"`` — only use binaries already on PATH.
|
||||
# ``"off"`` — alias for ``manual``.
|
||||
"install_strategy": "auto",
|
||||
|
||||
# Per-server overrides. Each key is a server_id from the
|
||||
# registry (``pyright``, ``typescript``, ``gopls``,
|
||||
# ``rust-analyzer``, etc.) and accepts:
|
||||
# disabled: true
|
||||
# — skip this server even when its extensions match
|
||||
# command: ["full/path/to/server", "--stdio"]
|
||||
# — pin a custom binary path; bypasses auto-install
|
||||
# env: {"KEY": "value"}
|
||||
# — extra env vars passed to the spawned process
|
||||
# initialization_options: {...}
|
||||
# — merged into the LSP ``initializationOptions``
|
||||
# Empty by default; the registry defaults work for typical
|
||||
# setups.
|
||||
"servers": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -287,7 +287,8 @@ def _build_apikey_providers_list() -> list:
|
|||
(_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
|
||||
if _pp.base_url else None
|
||||
)
|
||||
_static.append((_label, _key_vars, _models_url, _base_var, True))
|
||||
_hc = getattr(_pp, "supports_health_check", True)
|
||||
_static.append((_label, _key_vars, _models_url, _base_var, _hc))
|
||||
except Exception:
|
||||
pass
|
||||
return _static
|
||||
|
|
@ -296,19 +297,101 @@ def _build_apikey_providers_list() -> list:
|
|||
def run_doctor(args):
|
||||
"""Run diagnostic checks."""
|
||||
should_fix = getattr(args, 'fix', False)
|
||||
ack_target = getattr(args, 'ack', None)
|
||||
|
||||
# Doctor runs from the interactive CLI, so CLI-gated tool availability
|
||||
# checks (like cronjob management) should see the same context as `hermes`.
|
||||
os.environ.setdefault("HERMES_INTERACTIVE", "1")
|
||||
|
||||
|
||||
# Handle `hermes doctor --ack <id>` as a fast path. Persist the ack and
|
||||
# return without running the rest of the diagnostics — the user has
|
||||
# already seen the advisory and just wants to silence it.
|
||||
if ack_target:
|
||||
from hermes_cli.security_advisories import (
|
||||
ADVISORIES,
|
||||
ack_advisory,
|
||||
)
|
||||
valid_ids = {a.id for a in ADVISORIES}
|
||||
if ack_target not in valid_ids:
|
||||
print(color(
|
||||
f"Unknown advisory ID: {ack_target!r}. Known IDs: "
|
||||
f"{', '.join(sorted(valid_ids)) or '(none)'}",
|
||||
Colors.RED,
|
||||
))
|
||||
sys.exit(2)
|
||||
if ack_advisory(ack_target):
|
||||
print(color(
|
||||
f" ✓ Acknowledged advisory {ack_target}. "
|
||||
f"It will no longer trigger startup banners.",
|
||||
Colors.GREEN,
|
||||
))
|
||||
else:
|
||||
print(color(
|
||||
f" ✗ Failed to persist ack for {ack_target}. "
|
||||
f"Check ~/.hermes/config.yaml is writable.",
|
||||
Colors.RED,
|
||||
))
|
||||
sys.exit(1)
|
||||
return
|
||||
|
||||
issues = []
|
||||
manual_issues = [] # issues that can't be auto-fixed
|
||||
fixed_count = 0
|
||||
|
||||
|
||||
print()
|
||||
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
|
||||
print(color("│ 🩺 Hermes Doctor │", Colors.CYAN))
|
||||
print(color("└─────────────────────────────────────────────────────────┘", Colors.CYAN))
|
||||
|
||||
# =========================================================================
|
||||
# Check: Security advisories (RUNS FIRST — these are the most urgent)
|
||||
# =========================================================================
|
||||
print()
|
||||
print(color("◆ Security Advisories", Colors.CYAN, Colors.BOLD))
|
||||
try:
|
||||
from hermes_cli.security_advisories import (
|
||||
detect_compromised,
|
||||
filter_unacked,
|
||||
full_remediation_text,
|
||||
get_acked_ids,
|
||||
)
|
||||
all_hits = detect_compromised()
|
||||
fresh_hits = filter_unacked(all_hits)
|
||||
if fresh_hits:
|
||||
for hit in fresh_hits:
|
||||
check_fail(
|
||||
f"{hit.advisory.title}",
|
||||
f"({hit.package}=={hit.installed_version})",
|
||||
)
|
||||
# Print the full remediation block, indented under the
|
||||
# check_fail header so it reads as a single section.
|
||||
for line in full_remediation_text(hit):
|
||||
if line:
|
||||
print(f" {color(line, Colors.YELLOW)}")
|
||||
else:
|
||||
print()
|
||||
# Funnel into the action list so the summary block surfaces it
|
||||
# for users who scroll past the section.
|
||||
manual_issues.append(
|
||||
f"Resolve security advisory {hit.advisory.id}: "
|
||||
f"uninstall {hit.package}=={hit.installed_version} and "
|
||||
f"rotate credentials, then run "
|
||||
f"`hermes doctor --ack {hit.advisory.id}`."
|
||||
)
|
||||
# Acked-but-still-installed: show as informational so the user
|
||||
# knows the package is still on disk after the ack.
|
||||
acked_ids = get_acked_ids()
|
||||
for h in all_hits:
|
||||
if h.advisory.id in acked_ids:
|
||||
check_warn(
|
||||
f"{h.package}=={h.installed_version} still installed "
|
||||
f"(advisory {h.advisory.id} acknowledged)",
|
||||
)
|
||||
else:
|
||||
check_ok("No active security advisories")
|
||||
except Exception as e:
|
||||
# Never let a bug in the advisory check block the rest of doctor.
|
||||
check_warn(f"Security advisory check failed: {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Check: Python version
|
||||
|
|
|
|||
|
|
@ -2164,7 +2164,7 @@ Environment="PATH={sane_path}"
|
|||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
RestartSec=5
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
|
|
@ -2199,7 +2199,7 @@ Environment="PATH={sane_path}"
|
|||
Environment="VIRTUAL_ENV={venv_dir}"
|
||||
Environment="HERMES_HOME={hermes_home}"
|
||||
Restart=always
|
||||
RestartSec=60
|
||||
RestartSec=5
|
||||
RestartMaxDelaySec=300
|
||||
RestartSteps=5
|
||||
RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
|
||||
|
|
@ -3658,6 +3658,15 @@ def _all_platforms() -> list[dict]:
|
|||
``hermes setup gateway`` without needing the gateway to be running.
|
||||
Built-ins keep their dict shape; plugin entries are adapted to the same
|
||||
shape with ``_registry_entry`` holding the source.
|
||||
|
||||
Platform-specific gating: some platforms can't be configured on
|
||||
every host. Currently:
|
||||
- Matrix is hidden on Windows. The [matrix] extra pulls
|
||||
``mautrix[encryption]`` -> ``python-olm``, which has no Windows
|
||||
wheel and needs ``make`` + libolm to build from sdist. There's
|
||||
no native Windows path that works, so we don't offer it in the
|
||||
picker. Users who want Matrix on Windows can run hermes under
|
||||
WSL.
|
||||
"""
|
||||
# Populate the registry so plugin platforms are visible. Idempotent.
|
||||
# Bundled platform plugins (``kind: platform``) auto-load unconditionally,
|
||||
|
|
@ -3671,6 +3680,11 @@ def _all_platforms() -> list[dict]:
|
|||
logger.debug("plugin discovery failed during platform enumeration: %s", e)
|
||||
|
||||
platforms = [dict(p) for p in _PLATFORMS]
|
||||
|
||||
# Drop platforms that can't function on this host. See docstring.
|
||||
if sys.platform == "win32":
|
||||
platforms = [p for p in platforms if p.get("key") != "matrix"]
|
||||
|
||||
by_key = {p["key"]: p for p in platforms}
|
||||
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -2607,6 +2607,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
check_nous_free_tier,
|
||||
partition_nous_models_by_tier,
|
||||
union_with_portal_free_recommendations,
|
||||
union_with_portal_paid_recommendations,
|
||||
)
|
||||
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
|
@ -2662,6 +2663,10 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
# with the Portal's freeRecommendedModels list so newly-launched free
|
||||
# models show up even if this CLI build's hardcoded curated list and
|
||||
# docs-hosted manifest haven't caught up yet.
|
||||
#
|
||||
# For paid users: mirror the same idea with paidRecommendedModels so
|
||||
# newly-launched paid models surface in the picker too — independent
|
||||
# of CLI release cadence.
|
||||
unavailable_models: list[str] = []
|
||||
if free_tier:
|
||||
model_ids, pricing = union_with_portal_free_recommendations(
|
||||
|
|
@ -2670,6 +2675,10 @@ def _model_flow_nous(config, current_model="", args=None):
|
|||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True
|
||||
)
|
||||
else:
|
||||
model_ids, pricing = union_with_portal_paid_recommendations(
|
||||
model_ids, pricing, _nous_portal_url,
|
||||
)
|
||||
|
||||
if not model_ids and not unavailable_models:
|
||||
print("No models available for Nous Portal after filtering.")
|
||||
|
|
@ -9412,7 +9421,7 @@ def main():
|
|||
gateway_parser = subparsers.add_parser(
|
||||
"gateway",
|
||||
help="Messaging gateway management",
|
||||
description="Manage the messaging gateway (Telegram, Discord, WhatsApp)",
|
||||
description="Manage the messaging gateway (Telegram, Discord, WhatsApp, Weixin, and more)",
|
||||
)
|
||||
gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
|
||||
|
||||
|
|
@ -9555,6 +9564,17 @@ def main():
|
|||
|
||||
gateway_parser.set_defaults(func=cmd_gateway)
|
||||
|
||||
# =========================================================================
|
||||
# lsp command
|
||||
# =========================================================================
|
||||
try:
|
||||
from agent.lsp.cli import register_subparser as _lsp_register
|
||||
_lsp_register(subparsers)
|
||||
except Exception as _lsp_err: # noqa: BLE001
|
||||
# LSP is optional infrastructure — never let a registration
|
||||
# failure break the CLI overall.
|
||||
logger.debug("LSP CLI registration failed: %s", _lsp_err)
|
||||
|
||||
# =========================================================================
|
||||
# setup command
|
||||
# =========================================================================
|
||||
|
|
@ -10117,6 +10137,16 @@ def main():
|
|||
doctor_parser.add_argument(
|
||||
"--fix", action="store_true", help="Attempt to fix issues automatically"
|
||||
)
|
||||
doctor_parser.add_argument(
|
||||
"--ack",
|
||||
metavar="ADVISORY_ID",
|
||||
default=None,
|
||||
help=(
|
||||
"Acknowledge a security advisory by ID and exit. After ack, the "
|
||||
"advisory will no longer trigger startup banners. Run `hermes "
|
||||
"doctor` first to see active advisories and their IDs."
|
||||
),
|
||||
)
|
||||
doctor_parser.set_defaults(func=cmd_doctor)
|
||||
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -621,6 +621,71 @@ def union_with_portal_free_recommendations(
|
|||
return (augmented_ids, augmented_pricing)
|
||||
|
||||
|
||||
def union_with_portal_paid_recommendations(
|
||||
curated_ids: list[str],
|
||||
pricing: dict[str, dict[str, str]],
|
||||
portal_base_url: str = "",
|
||||
*,
|
||||
force_refresh: bool = False,
|
||||
) -> tuple[list[str], dict[str, dict[str, str]]]:
|
||||
"""Augment curated list with the Portal's ``paidRecommendedModels``.
|
||||
|
||||
Mirror of :func:`union_with_portal_free_recommendations` for paid-tier
|
||||
users. The Portal's ``/api/nous/recommended-models`` endpoint advertises
|
||||
which paid models are blessed *right now* — independent of what the
|
||||
in-repo ``_PROVIDER_MODELS["nous"]`` list happens to contain or whether
|
||||
the docs-hosted catalog manifest has been rebuilt since the last release.
|
||||
|
||||
For paid-tier users this lets newly-launched paid models surface in the
|
||||
picker even if the user is running an older Hermes that doesn't ship
|
||||
them in its hardcoded curated list. This function returns an augmented
|
||||
``(model_ids, pricing)`` pair where:
|
||||
|
||||
* Portal paid recommendations missing from ``curated_ids`` are
|
||||
appended at the front (so the picker shows them first).
|
||||
* ``pricing`` is left untouched — we deliberately do NOT synthesize
|
||||
pricing entries for paid models. Live pricing is fetched separately
|
||||
via :func:`get_pricing_for_provider`; if the live endpoint hasn't
|
||||
published pricing yet, the picker shows a blank price column rather
|
||||
than fabricating numbers. (The free helper synthesizes ``$0`` so
|
||||
:func:`partition_nous_models_by_tier` keeps free models selectable;
|
||||
no equivalent gating applies on the paid side, so synthesis would
|
||||
only mislead the user.)
|
||||
|
||||
Failures (network, parse, missing field) are silent and degrade to
|
||||
returning the inputs unchanged — never block the picker on a
|
||||
Portal-side hiccup.
|
||||
"""
|
||||
try:
|
||||
payload = fetch_nous_recommended_models(
|
||||
portal_base_url, force_refresh=force_refresh
|
||||
)
|
||||
except Exception:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
paid_block = payload.get("paidRecommendedModels") if isinstance(payload, dict) else None
|
||||
if not isinstance(paid_block, list) or not paid_block:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
portal_paid_ids: list[str] = []
|
||||
for entry in paid_block:
|
||||
name = _extract_model_name(entry)
|
||||
if name:
|
||||
portal_paid_ids.append(name)
|
||||
if not portal_paid_ids:
|
||||
return (list(curated_ids), dict(pricing))
|
||||
|
||||
augmented_ids = list(curated_ids)
|
||||
seen = set(augmented_ids)
|
||||
# Prepend Portal paid recommendations that aren't already curated, so
|
||||
# the Portal-blessed picks surface first in the picker.
|
||||
new_ones = [mid for mid in portal_paid_ids if mid not in seen]
|
||||
if new_ones:
|
||||
augmented_ids = new_ones + augmented_ids
|
||||
|
||||
return (augmented_ids, dict(pricing))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TTL cache for free-tier detection — avoids repeated API calls within a
|
||||
# session while still picking up upgrades quickly.
|
||||
|
|
|
|||
|
|
@ -205,6 +205,14 @@ def _resolve_runtime_from_pool_entry(
|
|||
elif provider == "google-gemini-cli":
|
||||
api_mode = "chat_completions"
|
||||
base_url = base_url or "cloudcode-pa://google"
|
||||
elif provider == "minimax-oauth":
|
||||
# MiniMax OAuth tokens are valid only against the Anthropic Messages
|
||||
# compatible endpoint. Do not honor stale model.api_mode values from a
|
||||
# prior OpenAI-compatible provider, or the client will hit
|
||||
# /chat/completions under /anthropic and receive a bare nginx 404.
|
||||
api_mode = "anthropic_messages"
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
base_url = base_url or (pconfig.inference_base_url if pconfig else "")
|
||||
elif provider == "anthropic":
|
||||
api_mode = "anthropic_messages"
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
|
|
|
|||
451
hermes_cli/security_advisories.py
Normal file
451
hermes_cli/security_advisories.py
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
"""
|
||||
Security advisory checker for Hermes Agent.
|
||||
|
||||
Detects known-compromised Python packages installed in the active venv
|
||||
(supply-chain attacks like the Mini Shai-Hulud worm of May 2026 that
|
||||
poisoned ``mistralai 2.4.6`` on PyPI) and surfaces remediation guidance to
|
||||
the user.
|
||||
|
||||
Design goals:
|
||||
|
||||
- **Cheap.** A single ``importlib.metadata.version()`` call per advisory
|
||||
package. Safe to run on every CLI startup.
|
||||
- **Loud when it matters, silent otherwise.** If no compromised package is
|
||||
installed, the user sees nothing.
|
||||
- **Acknowledgeable.** Once the user has read and acted on an advisory they
|
||||
can dismiss it via ``hermes doctor --ack <id>``; the ack is persisted to
|
||||
``config.security.acked_advisories`` and survives restart.
|
||||
- **Extensible.** Adding a new advisory is one entry in ``ADVISORIES``;
|
||||
adding a new compromised version is a one-line edit. No code changes
|
||||
needed when the next worm hits.
|
||||
|
||||
The check is invoked from three places:
|
||||
|
||||
1. ``hermes doctor`` (and ``hermes doctor --ack <id>``)
|
||||
2. CLI startup banner (one short line, then full guidance via
|
||||
``hermes doctor``)
|
||||
3. Gateway startup (logged to gateway.log; first interactive message gets
|
||||
a one-line operator banner)
|
||||
|
||||
This module is intentionally dependency-free beyond the stdlib so it can
|
||||
run in environments where the rest of Hermes failed to import.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Advisory catalog
|
||||
#
|
||||
# Each advisory is a community-facing security warning about one or more
|
||||
# specific package versions that are known to be compromised. To add a new
|
||||
# advisory:
|
||||
#
|
||||
# 1. Append a new ``Advisory`` to ``ADVISORIES`` below
|
||||
# 2. Set ``compromised`` to a tuple of ``(pkg_name, frozenset_of_versions)``
|
||||
# — version strings must match what ``importlib.metadata.version()``
|
||||
# returns. Use an empty frozenset to flag *any installed version*
|
||||
# (rare; only when the maintainer namespace itself is compromised).
|
||||
# 3. Write 2-4 short ``remediation`` lines a non-expert can copy/paste.
|
||||
#
|
||||
# Do NOT remove old advisories. Once an advisory ships, leave it in place so
|
||||
# users running an older release with the compromised package still get
|
||||
# warned. Mark superseded ones via ``superseded_by`` if needed.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Advisory:
|
||||
"""One security advisory entry.
|
||||
|
||||
Attributes:
|
||||
id: stable identifier used for acks (e.g. ``shai-hulud-2026-05``).
|
||||
Lowercase-hyphen, never reused.
|
||||
title: one-line headline shown in banners.
|
||||
summary: 1-3 sentence description of what was compromised and how.
|
||||
url: reference URL (Socket advisory, GitHub advisory, PyPI page).
|
||||
compromised: tuple of ``(package_name, frozenset_of_versions)``
|
||||
pairs. Empty frozenset means "any version of this package is
|
||||
considered suspect" — use sparingly.
|
||||
remediation: ordered list of steps the user should take. First step
|
||||
should be the uninstall command; subsequent steps the credential
|
||||
audit / rotation guidance.
|
||||
published: ISO date string for sort order.
|
||||
"""
|
||||
|
||||
id: str
|
||||
title: str
|
||||
summary: str
|
||||
url: str
|
||||
compromised: tuple[tuple[str, frozenset[str]], ...]
|
||||
remediation: tuple[str, ...]
|
||||
published: str = ""
|
||||
severity: str = "high" # low / medium / high / critical
|
||||
|
||||
|
||||
ADVISORIES: tuple[Advisory, ...] = (
|
||||
Advisory(
|
||||
id="shai-hulud-2026-05",
|
||||
title="Mini Shai-Hulud worm — mistralai 2.4.6 compromised on PyPI",
|
||||
summary=(
|
||||
"PyPI quarantined the mistralai package on 2026-05-12 after a "
|
||||
"malicious 2.4.6 release. The worm steals credentials from "
|
||||
"environment variables and credential files (~/.npmrc, ~/.pypirc, "
|
||||
"~/.aws/credentials, GitHub PATs, cloud SDK tokens) and exfils "
|
||||
"them to a hardcoded webhook. If you ran any Python process that "
|
||||
"imported mistralai 2.4.6 — including hermes when configured "
|
||||
"with provider=mistral for TTS or STT — assume those credentials "
|
||||
"are exposed."
|
||||
),
|
||||
url="https://socket.dev/blog/mini-shai-hulud-worm-pypi",
|
||||
compromised=(
|
||||
("mistralai", frozenset({"2.4.6"})),
|
||||
),
|
||||
remediation=(
|
||||
"Run: pip uninstall -y mistralai (or: uv pip uninstall mistralai)",
|
||||
"Rotate API keys in ~/.hermes/.env (OpenRouter, Anthropic, OpenAI, "
|
||||
"Nous, GitHub, AWS, Google, Mistral, etc.).",
|
||||
"Audit ~/.npmrc, ~/.pypirc, ~/.aws/credentials, ~/.config/gh/hosts.yml, "
|
||||
"and any other credential files for tokens that may have been read.",
|
||||
"Check GitHub for unexpected new SSH keys, deploy keys, or webhook "
|
||||
"additions on repos you have admin on.",
|
||||
"After cleanup: hermes doctor --ack shai-hulud-2026-05 to dismiss "
|
||||
"this warning.",
|
||||
),
|
||||
published="2026-05-12",
|
||||
severity="critical",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Detection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AdvisoryHit:
|
||||
"""One package-version match against an advisory."""
|
||||
|
||||
advisory: Advisory
|
||||
package: str
|
||||
installed_version: str
|
||||
|
||||
|
||||
def _installed_version(pkg_name: str) -> Optional[str]:
|
||||
"""Return the installed version of ``pkg_name``, or None if not installed.
|
||||
|
||||
Uses ``importlib.metadata`` so we don't depend on pip being importable
|
||||
inside the active venv (uv-created venvs may lack pip).
|
||||
"""
|
||||
try:
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
except ImportError: # py<3.8 — Hermes requires 3.10+ but defensive.
|
||||
return None
|
||||
try:
|
||||
return version(pkg_name)
|
||||
except PackageNotFoundError:
|
||||
return None
|
||||
except Exception:
|
||||
# Some metadata corruption modes raise ValueError or OSError. Don't
|
||||
# let advisory checking crash the CLI startup path.
|
||||
logger.debug("importlib.metadata.version(%s) raised", pkg_name, exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def detect_compromised(
|
||||
advisories: Iterable[Advisory] = ADVISORIES,
|
||||
) -> list[AdvisoryHit]:
|
||||
"""Scan installed packages and return all advisory hits.
|
||||
|
||||
A "hit" means an advisory's listed package is installed AND the version
|
||||
is in the compromised set (or the compromised set is empty, meaning
|
||||
*any* version is suspect).
|
||||
"""
|
||||
hits: list[AdvisoryHit] = []
|
||||
for advisory in advisories:
|
||||
for pkg_name, bad_versions in advisory.compromised:
|
||||
installed = _installed_version(pkg_name)
|
||||
if installed is None:
|
||||
continue
|
||||
if not bad_versions or installed in bad_versions:
|
||||
hits.append(AdvisoryHit(
|
||||
advisory=advisory,
|
||||
package=pkg_name,
|
||||
installed_version=installed,
|
||||
))
|
||||
return hits
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Acknowledgement persistence
|
||||
#
|
||||
# Acks live under ``security.acked_advisories`` in config.yaml as a list of
|
||||
# advisory IDs. The list is the only state — no per-host data, no
|
||||
# timestamps, no fingerprints. Users sharing a config.yaml across machines
|
||||
# (rare but possible) get the same dismissal everywhere, which is the
|
||||
# correct behavior for a global advisory.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_acked_ids() -> set[str]:
|
||||
"""Return the set of advisory IDs the user has dismissed.
|
||||
|
||||
Returns an empty set if config can't be loaded (don't block startup
|
||||
just because config is broken — the advisory will keep firing until
|
||||
config is repaired, which is fine).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception:
|
||||
logger.debug("Could not load config for advisory acks", exc_info=True)
|
||||
return set()
|
||||
sec = cfg.get("security") or {}
|
||||
raw = sec.get("acked_advisories") or []
|
||||
if not isinstance(raw, list):
|
||||
return set()
|
||||
return {str(x).strip() for x in raw if str(x).strip()}
|
||||
|
||||
|
||||
def ack_advisory(advisory_id: str) -> bool:
|
||||
"""Persist an ack for ``advisory_id``. Returns True on success.
|
||||
|
||||
Idempotent — acking an already-acked ID is a no-op.
|
||||
"""
|
||||
advisory_id = advisory_id.strip()
|
||||
if not advisory_id:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config, save_config
|
||||
except Exception:
|
||||
logger.warning("Could not import config module to persist ack")
|
||||
return False
|
||||
try:
|
||||
cfg = load_config()
|
||||
sec = cfg.setdefault("security", {})
|
||||
existing = sec.get("acked_advisories") or []
|
||||
if not isinstance(existing, list):
|
||||
existing = []
|
||||
if advisory_id not in existing:
|
||||
existing.append(advisory_id)
|
||||
sec["acked_advisories"] = existing
|
||||
save_config(cfg)
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("Failed to persist advisory ack for %s", advisory_id)
|
||||
return False
|
||||
|
||||
|
||||
def filter_unacked(hits: list[AdvisoryHit]) -> list[AdvisoryHit]:
|
||||
"""Return only hits whose advisories the user has not dismissed."""
|
||||
if not hits:
|
||||
return []
|
||||
acked = get_acked_ids()
|
||||
return [h for h in hits if h.advisory.id not in acked]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Rendering helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _term_supports_color() -> bool:
|
||||
if os.environ.get("NO_COLOR"):
|
||||
return False
|
||||
if not sys.stdout.isatty():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def short_banner_lines(hits: list[AdvisoryHit]) -> list[str]:
|
||||
"""Return 1-3 short lines suitable for a startup banner.
|
||||
|
||||
Caller is responsible for color/styling. Always names the worst hit
|
||||
explicitly so the user knows what's wrong without running doctor.
|
||||
"""
|
||||
if not hits:
|
||||
return []
|
||||
primary = hits[0]
|
||||
lines = [
|
||||
f"SECURITY ADVISORY [{primary.advisory.id}]: {primary.advisory.title}",
|
||||
f" Detected: {primary.package}=={primary.installed_version}",
|
||||
" Run 'hermes doctor' for remediation steps.",
|
||||
]
|
||||
if len(hits) > 1:
|
||||
lines.insert(1, f" ({len(hits) - 1} additional advisor"
|
||||
f"{'ies' if len(hits) > 2 else 'y'} also active.)")
|
||||
return lines
|
||||
|
||||
|
||||
def full_remediation_text(hit: AdvisoryHit) -> list[str]:
|
||||
"""Return a multi-line block describing the advisory + remediation."""
|
||||
a = hit.advisory
|
||||
lines = [
|
||||
f"=== {a.title} ===",
|
||||
f"ID: {a.id} Severity: {a.severity} Published: {a.published}",
|
||||
f"Detected: {hit.package}=={hit.installed_version}",
|
||||
f"Reference: {a.url}",
|
||||
"",
|
||||
a.summary,
|
||||
"",
|
||||
"Remediation:",
|
||||
]
|
||||
for i, step in enumerate(a.remediation, 1):
|
||||
lines.append(f" {i}. {step}")
|
||||
return lines
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Startup-banner gating
|
||||
#
|
||||
# We do NOT want to hammer the user with the banner on every command. Once
|
||||
# they've seen it inside a 24h window we cache that fact in
|
||||
# ``~/.hermes/cache/advisory_banner_seen`` (a single line per advisory ID:
|
||||
# ``<id> <iso8601_timestamp>``).
|
||||
#
|
||||
# Acked advisories never re-banner. Cached-but-not-acked advisories
|
||||
# re-banner after 24h so the user doesn't fully forget.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
_BANNER_CACHE_FILE = "advisory_banner_seen"
|
||||
_BANNER_REPEAT_HOURS = 24
|
||||
|
||||
|
||||
def _banner_cache_path() -> Optional[Path]:
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
cache_dir = Path(get_hermes_home()) / "cache"
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
return cache_dir / _BANNER_CACHE_FILE
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _read_banner_cache() -> dict[str, float]:
|
||||
p = _banner_cache_path()
|
||||
if p is None or not p.exists():
|
||||
return {}
|
||||
out: dict[str, float] = {}
|
||||
try:
|
||||
for line in p.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
advisory_id, ts = parts
|
||||
try:
|
||||
out[advisory_id] = float(ts)
|
||||
except ValueError:
|
||||
continue
|
||||
except Exception:
|
||||
return {}
|
||||
return out
|
||||
|
||||
|
||||
def _write_banner_cache(seen: dict[str, float]) -> None:
|
||||
p = _banner_cache_path()
|
||||
if p is None:
|
||||
return
|
||||
try:
|
||||
lines = [f"{aid} {ts}" for aid, ts in seen.items()]
|
||||
p.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
except Exception:
|
||||
logger.debug("Could not write advisory banner cache", exc_info=True)
|
||||
|
||||
|
||||
def hits_due_for_banner(
|
||||
hits: list[AdvisoryHit],
|
||||
*,
|
||||
repeat_hours: int = _BANNER_REPEAT_HOURS,
|
||||
) -> list[AdvisoryHit]:
|
||||
"""Return only hits whose banner is due (not acked, not recently shown).
|
||||
|
||||
Side effect: stamps the banner cache for any hit that's about to be
|
||||
shown. Callers should subsequently render the result.
|
||||
"""
|
||||
import time
|
||||
|
||||
fresh = filter_unacked(hits)
|
||||
if not fresh:
|
||||
return []
|
||||
now = time.time()
|
||||
cache = _read_banner_cache()
|
||||
cutoff = now - (repeat_hours * 3600)
|
||||
|
||||
due: list[AdvisoryHit] = []
|
||||
for hit in fresh:
|
||||
last = cache.get(hit.advisory.id, 0.0)
|
||||
if last < cutoff:
|
||||
due.append(hit)
|
||||
cache[hit.advisory.id] = now
|
||||
if due:
|
||||
_write_banner_cache(cache)
|
||||
return due
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Public entry points used by doctor / CLI / gateway
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def render_doctor_section(hits: list[AdvisoryHit]) -> tuple[bool, list[str]]:
|
||||
"""Render the security-advisory section for ``hermes doctor``.
|
||||
|
||||
Returns ``(has_problems, lines)``. Caller is responsible for printing
|
||||
with whatever color scheme it uses.
|
||||
"""
|
||||
fresh = filter_unacked(hits)
|
||||
if not fresh:
|
||||
return False, ["No active security advisories. ✓"]
|
||||
|
||||
lines: list[str] = []
|
||||
for i, hit in enumerate(fresh):
|
||||
if i:
|
||||
lines.append("")
|
||||
lines.extend(full_remediation_text(hit))
|
||||
return True, lines
|
||||
|
||||
|
||||
def startup_banner(hits: list[AdvisoryHit]) -> Optional[str]:
|
||||
"""Return a printable startup banner, or None if nothing is due.
|
||||
|
||||
Updates the banner cache as a side effect (so the next call within
|
||||
24h returns None for the same hit).
|
||||
"""
|
||||
due = hits_due_for_banner(hits)
|
||||
if not due:
|
||||
return None
|
||||
lines = short_banner_lines(due)
|
||||
if _term_supports_color():
|
||||
red = "\x1b[1;31m"
|
||||
reset = "\x1b[0m"
|
||||
return red + "\n".join(lines) + reset
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def gateway_log_message(hits: list[AdvisoryHit]) -> Optional[str]:
|
||||
"""Return a one-line log message for gateway operators, or None."""
|
||||
fresh = filter_unacked(hits)
|
||||
if not fresh:
|
||||
return None
|
||||
if len(fresh) == 1:
|
||||
h = fresh[0]
|
||||
return (f"Security advisory [{h.advisory.id}] active: "
|
||||
f"{h.package}=={h.installed_version} matches {h.advisory.title}. "
|
||||
f"See {h.advisory.url}")
|
||||
return (f"{len(fresh)} security advisories active "
|
||||
f"(IDs: {', '.join(h.advisory.id for h in fresh)}). "
|
||||
f"Run `hermes doctor` on the gateway host for details.")
|
||||
|
|
@ -205,15 +205,9 @@ TOOL_CATEGORIES = {
|
|||
],
|
||||
"tts_provider": "elevenlabs",
|
||||
},
|
||||
{
|
||||
"name": "Mistral (Voxtral TTS)",
|
||||
"badge": "paid",
|
||||
"tag": "Multilingual, native Opus",
|
||||
"env_vars": [
|
||||
{"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
|
||||
],
|
||||
"tts_provider": "mistral",
|
||||
},
|
||||
# Mistral (Voxtral TTS) temporarily hidden — `mistralai` PyPI
|
||||
# package is currently quarantined (malicious 2.4.6 release on
|
||||
# 2026-05-12). Restore this entry once PyPI un-quarantines.
|
||||
{
|
||||
"name": "Google Gemini TTS",
|
||||
"badge": "preview",
|
||||
|
|
|
|||
|
|
@ -59,10 +59,22 @@ try:
|
|||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
except ImportError:
|
||||
raise SystemExit(
|
||||
"Web UI requires fastapi and uvicorn.\n"
|
||||
f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
|
||||
)
|
||||
# First try lazy-installing the dashboard extras. Only the user actually
|
||||
# running `hermes dashboard` needs fastapi+uvicorn; lazy install keeps
|
||||
# them out of every other install path. After install, re-import.
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("tool.dashboard", prompt=False)
|
||||
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, Response
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import BaseModel
|
||||
except Exception:
|
||||
raise SystemExit(
|
||||
"Web UI requires fastapi and uvicorn.\n"
|
||||
f"Install with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'"
|
||||
)
|
||||
|
||||
WEB_DIST = Path(os.environ["HERMES_WEB_DIST"]) if "HERMES_WEB_DIST" in os.environ else Path(__file__).parent / "web_dist"
|
||||
_log = logging.getLogger(__name__)
|
||||
|
|
@ -280,7 +292,9 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
|
|||
"stt.provider": {
|
||||
"type": "select",
|
||||
"description": "Speech-to-text provider",
|
||||
"options": ["local", "groq", "openai", "mistral", "xai", "elevenlabs"],
|
||||
# "mistral" temporarily removed — mistralai PyPI package quarantined
|
||||
# (malicious 2.4.6 release on 2026-05-12). Restore once available.
|
||||
"options": ["local", "groq", "openai", "xai", "elevenlabs"],
|
||||
},
|
||||
"stt.elevenlabs.model_id": {
|
||||
"type": "select",
|
||||
|
|
@ -2808,6 +2822,7 @@ def _minimax_poller(session_id: str) -> None:
|
|||
"""
|
||||
from hermes_cli.auth import (
|
||||
_minimax_poll_token,
|
||||
_minimax_resolve_token_expiry_unix,
|
||||
_minimax_save_auth_state,
|
||||
MINIMAX_OAUTH_GLOBAL_INFERENCE,
|
||||
MINIMAX_OAUTH_SCOPE,
|
||||
|
|
@ -2845,8 +2860,10 @@ def _minimax_poller(session_id: str) -> None:
|
|||
# dashboard path; cn-region operators can still use the CLI
|
||||
# flow which supports `--region cn`.
|
||||
now = datetime.now(timezone.utc)
|
||||
expires_in_s = int(token_data["expired_in"])
|
||||
expires_at_ts = now.timestamp() + expires_in_s
|
||||
expires_at_ts = _minimax_resolve_token_expiry_unix(
|
||||
int(token_data["expired_in"]), now=now,
|
||||
)
|
||||
expires_in_s = max(0, int(expires_at_ts - now.timestamp()))
|
||||
auth_state = {
|
||||
"provider": "minimax-oauth",
|
||||
"region": sess.get("region", "global"),
|
||||
|
|
@ -4802,6 +4819,9 @@ def _get_dashboard_plugins(force_rescan: bool = False) -> list:
|
|||
global _dashboard_plugins_cache
|
||||
if _dashboard_plugins_cache is None or force_rescan:
|
||||
_dashboard_plugins_cache = _discover_dashboard_plugins()
|
||||
elif _dashboard_plugins_cache:
|
||||
if any(not Path(p["_dir"]).is_dir() for p in _dashboard_plugins_cache):
|
||||
_dashboard_plugins_cache = _discover_dashboard_plugins()
|
||||
return _dashboard_plugins_cache
|
||||
|
||||
|
||||
|
|
@ -5213,11 +5233,33 @@ def start_server(
|
|||
if open_browser:
|
||||
import webbrowser
|
||||
|
||||
def _open():
|
||||
time.sleep(1.0)
|
||||
webbrowser.open(f"http://{host}:{port}")
|
||||
# On headless Linux (no DISPLAY or WAYLAND_DISPLAY) some registered
|
||||
# browsers are TUI programs (links, lynx, www-browser) that try to
|
||||
# take over the terminal. That can send SIGHUP to the server process
|
||||
# and cause an immediate exit even though uvicorn bound successfully.
|
||||
# Skip the auto-open attempt on headless systems and let the user
|
||||
# open the URL manually. macOS and Windows are always considered
|
||||
# display-capable.
|
||||
_has_display = (
|
||||
sys.platform != "linux"
|
||||
or bool(os.environ.get("DISPLAY"))
|
||||
or bool(os.environ.get("WAYLAND_DISPLAY"))
|
||||
)
|
||||
|
||||
threading.Thread(target=_open, daemon=True).start()
|
||||
if _has_display:
|
||||
def _open():
|
||||
try:
|
||||
time.sleep(1.0)
|
||||
webbrowser.open(f"http://{host}:{port}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
threading.Thread(target=_open, daemon=True).start()
|
||||
else:
|
||||
_log.debug(
|
||||
"Skipping browser-open: no DISPLAY or WAYLAND_DISPLAY detected "
|
||||
"(headless Linux). Pass --no-open to suppress this detection."
|
||||
)
|
||||
|
||||
print(f" Hermes Web UI → http://{host}:{port}")
|
||||
uvicorn.run(app, host=host, port=port, log_level="warning")
|
||||
|
|
|
|||
14
plugins/example-dashboard/dashboard/manifest.json
Normal file
14
plugins/example-dashboard/dashboard/manifest.json
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"name": "example",
|
||||
"label": "Example",
|
||||
"description": "Example dashboard plugin — used by test suite for auth coverage",
|
||||
"icon": "Sparkles",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/example",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"slots": [],
|
||||
"entry": "dist/index.js",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
17
plugins/example-dashboard/dashboard/plugin_api.py
Normal file
17
plugins/example-dashboard/dashboard/plugin_api.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
"""Example dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/example/ by the dashboard plugin system.
|
||||
|
||||
This minimal plugin exists so the test suite has a stable, side-effect-free
|
||||
GET endpoint to verify that plugin API routes work with auth.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/hello")
|
||||
async def hello():
|
||||
"""Simple greeting endpoint to demonstrate plugin API routes."""
|
||||
return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"}
|
||||
|
|
@ -875,6 +875,13 @@ class HindsightMemoryProvider(MemoryProvider):
|
|||
"Hindsight local runtime is unavailable"
|
||||
+ (f": {reason}" if reason else "")
|
||||
)
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("memory.hindsight", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as _e:
|
||||
raise ImportError(str(_e))
|
||||
from hindsight import HindsightEmbedded
|
||||
HindsightEmbedded.__del__ = lambda self: None
|
||||
llm_provider = self._config.get("llm_provider", "")
|
||||
|
|
|
|||
|
|
@ -687,12 +687,28 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
"For local instances, set HONCHO_BASE_URL instead."
|
||||
)
|
||||
|
||||
# Lazy-install the honcho SDK on demand. ensure() honors
|
||||
# security.allow_lazy_installs (default true). On failure we surface
|
||||
# the original ImportError-shape message so existing callers still get
|
||||
# the "go run hermes honcho setup" hint they used to.
|
||||
try:
|
||||
from tools.lazy_deps import FeatureUnavailable, ensure as _lazy_ensure
|
||||
_lazy_ensure("memory.honcho", prompt=False)
|
||||
except ImportError:
|
||||
# lazy_deps module missing — fall through to the raw import below.
|
||||
pass
|
||||
except Exception:
|
||||
# FeatureUnavailable or unexpected error. Don't crash here; let the
|
||||
# actual import attempt produce the canonical error message.
|
||||
pass
|
||||
|
||||
try:
|
||||
from honcho import Honcho
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"honcho-ai is required for Honcho integration. "
|
||||
"Install it with: pip install honcho-ai"
|
||||
"Install it with: pip install honcho-ai "
|
||||
"(or run `hermes honcho setup` to configure)."
|
||||
)
|
||||
|
||||
# Allow config.yaml honcho.base_url to override the SDK's environment
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ xiaomi = ProviderProfile(
|
|||
aliases=("mimo", "xiaomi-mimo"),
|
||||
env_vars=("XIAOMI_API_KEY",),
|
||||
base_url="https://api.xiaomimimo.com/v1",
|
||||
supports_health_check=False, # /v1/models returns 401 even with valid key
|
||||
)
|
||||
|
||||
register_provider(xiaomi)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ class ProviderProfile:
|
|||
base_url: str = ""
|
||||
models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
|
||||
auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
|
||||
supports_health_check: bool = True # False → doctor skips /models probe for this provider
|
||||
|
||||
# ── Model catalog ─────────────────────────────────────────
|
||||
# fallback_models: curated list shown in /model picker when live fetch fails.
|
||||
|
|
|
|||
213
pyproject.toml
213
pyproject.toml
|
|
@ -11,44 +11,55 @@ requires-python = ">=3.11"
|
|||
authors = [{ name = "Nous Research" }]
|
||||
license = { text = "MIT" }
|
||||
dependencies = [
|
||||
# Core — pinned to known-good ranges to limit supply chain attack surface
|
||||
"openai>=2.21.0,<3",
|
||||
"anthropic>=0.39.0,<1",
|
||||
"python-dotenv>=1.2.1,<2",
|
||||
"fire>=0.7.1,<1",
|
||||
"httpx[socks]>=0.28.1,<1",
|
||||
"rich>=14.3.3,<15",
|
||||
"tenacity>=9.1.4,<10",
|
||||
"pyyaml>=6.0.2,<7",
|
||||
"ruamel.yaml>=0.18.16,<0.19",
|
||||
"requests>=2.33.0,<3", # CVE-2026-25645
|
||||
"jinja2>=3.1.5,<4",
|
||||
"pydantic>=2.12.5,<3",
|
||||
# Core — every direct dep is exact-pinned to ==X.Y.Z (no ranges).
|
||||
# Rationale: ranges allow PyPI to ship a fresh version of a transitive
|
||||
# at any time without a code review on our side. Exact pins mean the
|
||||
# only way a new package version reaches a user is via an intentional
|
||||
# update on our end (bump the pin in this file, regenerate uv.lock).
|
||||
# This was tightened on 2026-05-12 in response to the Mini Shai-Hulud
|
||||
# worm hitting mistralai 2.4.6 on PyPI; if that release had been
|
||||
# captured by `mistralai>=2.3.0,<3` rather than an exact pin, every
|
||||
# install in the hours before the quarantine would have pulled it.
|
||||
#
|
||||
# When updating: bump the version below AND regenerate uv.lock with
|
||||
# `uv lock` so the transitive resolution stays consistent. Don't
|
||||
# introduce ranges back without a written justification.
|
||||
#
|
||||
# Scope rule: only packages used by EVERY hermes session belong here.
|
||||
# Anything that's provider-specific (`anthropic`, `firecrawl-py`,
|
||||
# `exa-py`, `fal-client`, `edge-tts`, `parallel-web`) belongs in an
|
||||
# extra and gets lazy-installed via `tools/lazy_deps.py` when the
|
||||
# user picks that backend. Smaller `dependencies` = smaller blast
|
||||
# radius for the next supply-chain attack.
|
||||
"openai==2.24.0",
|
||||
"python-dotenv==1.2.1",
|
||||
"fire==0.7.1",
|
||||
"httpx[socks]==0.28.1",
|
||||
"rich==14.3.3",
|
||||
"tenacity==9.1.4",
|
||||
"pyyaml==6.0.3",
|
||||
"ruamel.yaml==0.18.17",
|
||||
"requests==2.33.0", # CVE-2026-25645
|
||||
"jinja2==3.1.6",
|
||||
"pydantic==2.12.5",
|
||||
# Interactive CLI (prompt_toolkit is used directly by cli.py)
|
||||
"prompt_toolkit>=3.0.52,<4",
|
||||
# Tools
|
||||
"exa-py>=2.9.0,<3",
|
||||
"firecrawl-py>=4.16.0,<5",
|
||||
"parallel-web>=0.4.2,<1",
|
||||
"fal-client>=0.13.1,<1",
|
||||
"prompt_toolkit==3.0.52",
|
||||
# Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
|
||||
"croniter>=6.0.0,<7",
|
||||
# Text-to-speech (Edge TTS is free, no API key needed)
|
||||
"edge-tts>=7.2.7,<8",
|
||||
"croniter==6.0.0",
|
||||
# Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
|
||||
"PyJWT[crypto]>=2.12.0,<3", # CVE-2026-32597
|
||||
"PyJWT[crypto]==2.12.1", # CVE-2026-32597
|
||||
# Windows has no IANA tzdata shipped with the OS, so Python's ``zoneinfo``
|
||||
# (PEP 615) raises ``ZoneInfoNotFoundError`` for every non-UTC timezone
|
||||
# out of the box. ``tzdata`` ships the Olson database as a data package
|
||||
# Python resolves automatically. No-op on Linux/macOS (which have
|
||||
# /usr/share/zoneinfo). Credits: PR #13182 (@sprmn24).
|
||||
"tzdata>=2023.3; sys_platform == 'win32'",
|
||||
"tzdata==2025.3; sys_platform == 'win32'",
|
||||
# Cross-platform process / PID management. `psutil` is the canonical
|
||||
# answer for "is this PID alive" and process-tree walking across Linux,
|
||||
# macOS and Windows. It replaces POSIX-only idioms like `os.kill(pid, 0)`
|
||||
# (which is a silent killer on Windows — see CONTRIBUTING.md) and
|
||||
# `os.killpg` (which doesn't exist on Windows).
|
||||
"psutil>=5.9.0,<8",
|
||||
"psutil==7.2.2",
|
||||
"fastapi>=0.104.0,<1",
|
||||
"uvicorn[standard]>=0.24.0,<1",
|
||||
"ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
|
||||
|
|
@ -56,45 +67,78 @@ dependencies = [
|
|||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
modal = ["modal>=1.0.0,<2"]
|
||||
daytona = ["daytona>=0.148.0,<1"]
|
||||
vercel = ["vercel>=0.5.7,<0.6.0"]
|
||||
hindsight = ["hindsight-client>=0.4.22"]
|
||||
dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "pytest-split>=0.9,<1", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
|
||||
messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
|
||||
# Native Anthropic provider — only needed when provider=anthropic (not via
|
||||
# OpenRouter or other aggregators).
|
||||
anthropic = ["anthropic==0.86.0"]
|
||||
# Web search backends — each only loaded when the user picks it as their
|
||||
# search provider (configured via `hermes tools` or config.yaml).
|
||||
exa = ["exa-py==2.10.2"]
|
||||
firecrawl = ["firecrawl-py==4.17.0"]
|
||||
parallel-web = ["parallel-web==0.4.2"]
|
||||
# Image generation backends
|
||||
fal = ["fal-client==0.13.1"]
|
||||
# Edge TTS — default TTS provider but still optional (users can pick
|
||||
# ElevenLabs / OpenAI / MiniMax instead).
|
||||
edge-tts = ["edge-tts==7.2.7"]
|
||||
modal = ["modal==1.3.4"]
|
||||
daytona = ["daytona==0.155.0"]
|
||||
vercel = ["vercel==0.5.7"]
|
||||
hindsight = ["hindsight-client==0.6.1"]
|
||||
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
|
||||
messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
|
||||
cron = [] # croniter is now a core dependency; this extra kept for back-compat
|
||||
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||
matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29", "aiohttp-socks>=0.10,<1"]
|
||||
cli = ["simple-term-menu>=1.0,<2"]
|
||||
tts-premium = ["elevenlabs>=1.0,<2"]
|
||||
slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1"]
|
||||
matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"]
|
||||
cli = ["simple-term-menu==1.6.6"]
|
||||
tts-premium = ["elevenlabs==1.59.0"]
|
||||
voice = [
|
||||
# Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
|
||||
# so keep it out of the base install for source-build packagers like Homebrew.
|
||||
"faster-whisper>=1.0.0,<2",
|
||||
"sounddevice>=0.4.6,<1",
|
||||
"numpy>=1.24.0,<3",
|
||||
"faster-whisper==1.2.1",
|
||||
"sounddevice==0.5.5",
|
||||
"numpy==2.4.3",
|
||||
]
|
||||
pty = [
|
||||
<<<<<<< HEAD
|
||||
# Kept as a no-op back-compat alias — `ptyprocess` and `pywinpty` are now
|
||||
# in the main `dependencies` list (with the same platform markers), so
|
||||
# any existing `pip install hermes-agent[pty]` invocations resolve cleanly
|
||||
# without pulling in extra packages.
|
||||
=======
|
||||
"ptyprocess==0.7.0; sys_platform != 'win32'",
|
||||
"pywinpty==2.0.15; sys_platform == 'win32'",
|
||||
>>>>>>> main
|
||||
]
|
||||
honcho = ["honcho-ai>=2.0.1,<3"]
|
||||
mcp = ["mcp>=1.2.0,<2"]
|
||||
homeassistant = ["aiohttp>=3.9.0,<4"]
|
||||
sms = ["aiohttp>=3.9.0,<4"]
|
||||
honcho = ["honcho-ai==2.0.1"]
|
||||
mcp = ["mcp==1.26.0"]
|
||||
homeassistant = ["aiohttp==3.13.3"]
|
||||
sms = ["aiohttp==3.13.3"]
|
||||
# Computer use — macOS background desktop control via cua-driver (MCP stdio).
|
||||
# The cua-driver binary itself is installed via `hermes tools` post-setup
|
||||
# (curl install script); this extra just pins the MCP client used to talk
|
||||
# to it, which is already provided by the `mcp` extra.
|
||||
computer-use = ["mcp>=1.2.0,<2"]
|
||||
acp = ["agent-client-protocol>=0.9.0,<1.0"]
|
||||
mistral = ["mistralai>=2.3.0,<3"]
|
||||
bedrock = ["boto3>=1.35.0,<2"]
|
||||
computer-use = ["mcp==1.26.0"]
|
||||
acp = ["agent-client-protocol==0.9.0"]
|
||||
# mistral: extra REMOVED 2026-05-12 — `mistralai` PyPI project quarantined
|
||||
# after malicious 2.4.6 release (Mini Shai-Hulud worm). Every version of
|
||||
# `mistralai` returns 404 on PyPI right now, so any pin we'd write is
|
||||
# unresolvable, which breaks `uv lock --check` in CI.
|
||||
#
|
||||
# To restore once PyPI un-quarantines:
|
||||
# 1. Verify the new release is clean (read the changelog, check Socket
|
||||
# advisory page, confirm no malicious code review findings).
|
||||
# 2. Add back: mistral = ["mistralai==<verified-version>"]
|
||||
# 3. Re-enable Mistral in:
|
||||
# - tools/lazy_deps.py (LAZY_DEPS["tts.mistral"], LAZY_DEPS["stt.mistral"])
|
||||
# - hermes_cli/tools_config.py (un-hide from provider picker)
|
||||
# - hermes_cli/web_server.py (re-add to dashboard STT options)
|
||||
# - tools/transcription_tools.py / tools/tts_tool.py (drop disabled stubs)
|
||||
# 4. Run `uv lock` to regenerate transitives.
|
||||
# 5. Optionally re-add to [all] only after a few days of clean operation.
|
||||
bedrock = ["boto3==1.42.89"]
|
||||
termux = [
|
||||
# Baseline Android / Termux path for reliable fresh installs.
|
||||
"python-telegram-bot[webhooks]>=22.6,<23",
|
||||
"python-telegram-bot[webhooks]==22.6",
|
||||
"hermes-agent[cron]",
|
||||
"hermes-agent[cli]",
|
||||
"hermes-agent[pty]",
|
||||
|
|
@ -103,80 +147,75 @@ termux = [
|
|||
"hermes-agent[acp]",
|
||||
]
|
||||
termux-all = [
|
||||
# Best-effort "install all" profile for Termux: include broad extras that
|
||||
# are known to resolve on Android, while intentionally excluding extras that
|
||||
# currently hard-fail from missing/broken Android wheels/toolchains.
|
||||
#
|
||||
# Excluded for now:
|
||||
# - matrix (mautrix[encryption] -> python-olm build failures on Termux)
|
||||
# - voice (faster-whisper chain requires ctranslate2/av builds not packaged)
|
||||
# Best-effort "install all" profile for Termux. Same policy as [all]:
|
||||
# only includes extras that aren't covered by `tools/lazy_deps.py`.
|
||||
# Backends like telegram/slack/dingtalk/feishu/honcho lazy-install at
|
||||
# first use, so they're no longer eager-installed here.
|
||||
"hermes-agent[termux]",
|
||||
"hermes-agent[messaging]",
|
||||
"hermes-agent[slack]",
|
||||
"hermes-agent[tts-premium]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[google]",
|
||||
"hermes-agent[mistral]",
|
||||
"hermes-agent[bedrock]",
|
||||
"hermes-agent[homeassistant]",
|
||||
"hermes-agent[sms]",
|
||||
"hermes-agent[web]",
|
||||
]
|
||||
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
|
||||
dingtalk = ["dingtalk-stream==0.24.3", "alibabacloud-dingtalk==2.2.42", "qrcode==7.4.2"]
|
||||
feishu = ["lark-oapi==1.5.3", "qrcode==7.4.2"]
|
||||
google = [
|
||||
# Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
|
||||
# Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with
|
||||
# the [all] extra and users don't hit runtime `pip install` paths that fail
|
||||
# in environments without pip (e.g. Nix-managed Python).
|
||||
"google-api-python-client>=2.100,<3",
|
||||
"google-auth-oauthlib>=1.0,<2",
|
||||
"google-auth-httplib2>=0.2,<1",
|
||||
"google-api-python-client==2.194.0",
|
||||
"google-auth-oauthlib==1.3.1",
|
||||
"google-auth-httplib2==0.3.1",
|
||||
]
|
||||
youtube = [
|
||||
# Required by skills/media/youtube-content and
|
||||
# optional-skills/productivity/memento-flashcards (youtube_quiz.py).
|
||||
# Without this declaration uv sync omits the package and both skills fail
|
||||
# at first invocation with ModuleNotFoundError (issue #22243).
|
||||
"youtube-transcript-api>=1.2.0",
|
||||
"youtube-transcript-api==1.2.4",
|
||||
]
|
||||
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
|
||||
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
|
||||
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
|
||||
rl = [
|
||||
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
|
||||
"tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b",
|
||||
"fastapi>=0.104.0,<1",
|
||||
"uvicorn[standard]>=0.24.0,<1",
|
||||
"wandb>=0.15.0,<1",
|
||||
"fastapi==0.133.1",
|
||||
"uvicorn[standard]==0.41.0",
|
||||
"wandb==0.25.1",
|
||||
]
|
||||
yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
|
||||
all = [
|
||||
"hermes-agent[modal]",
|
||||
"hermes-agent[daytona]",
|
||||
"hermes-agent[vercel]",
|
||||
"hermes-agent[messaging]",
|
||||
# matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
|
||||
# modern macOS (archived libolm, C++ errors with Clang 21+). On Linux the
|
||||
# [matrix] extra's own marker pulls in the [e2e] variant automatically.
|
||||
"hermes-agent[matrix]; sys_platform == 'linux'",
|
||||
# Policy (2026-05-12): `[all]` includes only extras that genuinely
|
||||
# CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
|
||||
# session can use, things needed before the agent loop is alive
|
||||
# (terminal/CLI), and skill deps that packagers (Nix, AUR, Homebrew)
|
||||
# need in the wheel. Anything an opt-in backend (provider, search,
|
||||
# TTS, image, memory, messaging platform, terminal sandbox) needs
|
||||
# MUST live exclusively in `LAZY_DEPS` and resolve at first use —
|
||||
# otherwise one quarantined PyPI release breaks every fresh install.
|
||||
#
|
||||
# Removed from [all] on 2026-05-12 (covered by lazy-install):
|
||||
# anthropic, exa, firecrawl, parallel-web, fal, edge-tts,
|
||||
# modal, daytona, vercel, messaging (telegram/discord/slack),
|
||||
# matrix, slack, honcho, voice (faster-whisper),
|
||||
# dingtalk, feishu, bedrock, tts-premium (elevenlabs)
|
||||
#
|
||||
# Why: the matrix extra in particular pulls `mautrix[encryption]`
|
||||
# which depends on `python-olm`. python-olm has Linux-only wheels and
|
||||
# no native build path on Windows or modern macOS. With matrix in
|
||||
# [all], `uv sync --locked` on Windows tried to build it from sdist
|
||||
# and failed on `make`. Lazy-install routes that build to first use,
|
||||
# where the user is expected to have a toolchain available.
|
||||
"hermes-agent[cron]",
|
||||
"hermes-agent[cli]",
|
||||
"hermes-agent[dev]",
|
||||
"hermes-agent[tts-premium]",
|
||||
"hermes-agent[slack]",
|
||||
"hermes-agent[pty]",
|
||||
"hermes-agent[honcho]",
|
||||
"hermes-agent[mcp]",
|
||||
"hermes-agent[homeassistant]",
|
||||
"hermes-agent[sms]",
|
||||
"hermes-agent[acp]",
|
||||
"hermes-agent[voice]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[google]",
|
||||
"hermes-agent[mistral]",
|
||||
"hermes-agent[bedrock]",
|
||||
"hermes-agent[web]",
|
||||
"hermes-agent[youtube]",
|
||||
]
|
||||
|
|
|
|||
219
run_agent.py
219
run_agent.py
|
|
@ -347,6 +347,10 @@ _PARALLEL_SAFE_TOOLS = frozenset({
|
|||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# Tools that mutate files on disk. Used by the per-turn verifier that
|
||||
# surfaces silently-failed file edits so the model can't over-claim success.
|
||||
_FILE_MUTATING_TOOLS = frozenset({"write_file", "patch"})
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
|
@ -524,6 +528,68 @@ def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
|
|||
value["text_summary"] = value["text_summary"] + hint
|
||||
|
||||
|
||||
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
|
||||
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
|
||||
|
||||
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
|
||||
For ``patch`` in V4A patch mode we parse the patch content for
|
||||
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
|
||||
the verifier can track each file in a multi-file patch separately.
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return []
|
||||
if tool_name == "write_file":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
# tool_name == "patch"
|
||||
mode = args.get("mode") or "replace"
|
||||
if mode == "replace":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
if mode == "patch":
|
||||
body = args.get("patch") or ""
|
||||
if not isinstance(body, str) or not body:
|
||||
return []
|
||||
import re as _re
|
||||
paths: List[str] = []
|
||||
for _m in _re.finditer(
|
||||
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
|
||||
body,
|
||||
_re.MULTILINE,
|
||||
):
|
||||
p = _m.group(1).strip()
|
||||
if p:
|
||||
paths.append(p)
|
||||
return paths
|
||||
return []
|
||||
|
||||
|
||||
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
|
||||
"""Pull a one-line error summary out of a tool result for footer display."""
|
||||
text = _multimodal_text_summary(result) if result is not None else ""
|
||||
if not isinstance(text, str):
|
||||
try:
|
||||
text = str(text)
|
||||
except Exception:
|
||||
return ""
|
||||
# Try to parse JSON and pull the ``error`` field — tool handlers return
|
||||
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
|
||||
stripped = text.strip()
|
||||
if stripped.startswith("{"):
|
||||
try:
|
||||
import json as _json
|
||||
data = _json.loads(stripped)
|
||||
if isinstance(data, dict) and isinstance(data.get("error"), str):
|
||||
text = data["error"]
|
||||
except Exception:
|
||||
pass
|
||||
# Collapse whitespace, trim to max_len.
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_len:
|
||||
text = text[: max_len - 1] + "…"
|
||||
return text
|
||||
|
||||
|
||||
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Strip image blobs from a message for trajectory saving.
|
||||
|
||||
|
|
@ -5346,6 +5412,103 @@ class AIAgent:
|
|||
self._pending_steer = None
|
||||
return text
|
||||
|
||||
def _record_file_mutation_result(
|
||||
self,
|
||||
tool_name: str,
|
||||
args: Dict[str, Any],
|
||||
result: Any,
|
||||
is_error: bool,
|
||||
) -> None:
|
||||
"""Record a ``write_file`` / ``patch`` outcome for the turn-end verifier.
|
||||
|
||||
On failure, store ``{path: {error_preview, tool}}`` entries. On
|
||||
success, remove any prior failure entries for the same paths (the
|
||||
model recovered within the turn). Silently no-ops if the per-turn
|
||||
state dict hasn't been initialised yet (e.g. a tool dispatched
|
||||
outside ``run_conversation``).
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return
|
||||
state = getattr(self, "_turn_failed_file_mutations", None)
|
||||
if state is None:
|
||||
return
|
||||
targets = _extract_file_mutation_targets(tool_name, args)
|
||||
if not targets:
|
||||
return
|
||||
if is_error:
|
||||
preview = _extract_error_preview(result)
|
||||
for path in targets:
|
||||
# Keep the FIRST error we saw for a given path unless we
|
||||
# later see success. A repeated failure with a different
|
||||
# message shouldn't silently overwrite the original.
|
||||
if path not in state:
|
||||
state[path] = {
|
||||
"tool": tool_name,
|
||||
"error_preview": preview,
|
||||
}
|
||||
else:
|
||||
for path in targets:
|
||||
state.pop(path, None)
|
||||
|
||||
def _file_mutation_verifier_enabled(self) -> bool:
|
||||
"""Check whether the per-turn file-mutation verifier footer is on.
|
||||
|
||||
Config path: ``display.file_mutation_verifier`` (bool, default True).
|
||||
``HERMES_FILE_MUTATION_VERIFIER`` env var overrides config. Exposed
|
||||
as a method so tests can patch a single seam without reaching into
|
||||
the private ``_turn_failed_file_mutations`` state dict.
|
||||
"""
|
||||
try:
|
||||
import os as _os
|
||||
env = _os.environ.get("HERMES_FILE_MUTATION_VERIFIER")
|
||||
if env is not None:
|
||||
return env.strip().lower() not in ("0", "false", "no", "off")
|
||||
# Read from the persisted config.yaml so gateway and CLI share
|
||||
# the same setting. Import lazily to avoid a startup-time cycle.
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_config
|
||||
_cfg = _load_config() or {}
|
||||
except Exception:
|
||||
_cfg = {}
|
||||
_display = _cfg.get("display") if isinstance(_cfg, dict) else None
|
||||
if isinstance(_display, dict) and "file_mutation_verifier" in _display:
|
||||
return bool(_display.get("file_mutation_verifier"))
|
||||
except Exception:
|
||||
pass
|
||||
return True # safe default: verifier on
|
||||
|
||||
@staticmethod
|
||||
def _format_file_mutation_failure_footer(failed: Dict[str, Dict[str, Any]]) -> str:
|
||||
"""Render the per-turn failed-mutation dict as a user-facing footer.
|
||||
|
||||
Displays up to 10 paths with their first error preview, then a
|
||||
count of any additional failures. Returns an empty string when
|
||||
the dict is empty so callers can concatenate unconditionally.
|
||||
"""
|
||||
if not failed:
|
||||
return ""
|
||||
lines = [
|
||||
"⚠️ File-mutation verifier: "
|
||||
f"{len(failed)} file(s) were NOT modified this turn despite any "
|
||||
"wording above that may suggest otherwise. Run `git status` or "
|
||||
"`read_file` to confirm."
|
||||
]
|
||||
shown = 0
|
||||
for path, info in failed.items():
|
||||
if shown >= 10:
|
||||
break
|
||||
preview = (info.get("error_preview") or "").strip()
|
||||
tool = info.get("tool") or "patch"
|
||||
if preview:
|
||||
lines.append(f" • {path} — [{tool}] {preview}")
|
||||
else:
|
||||
lines.append(f" • {path} — [{tool}] failed")
|
||||
shown += 1
|
||||
remaining = len(failed) - shown
|
||||
if remaining > 0:
|
||||
lines.append(f" • … and {remaining} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
|
||||
"""Append any pending /steer text to the last tool result in this turn.
|
||||
|
||||
|
|
@ -10872,6 +11035,17 @@ class AIAgent:
|
|||
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier.
|
||||
# `blocked` calls never actually ran — don't let a guardrail
|
||||
# block count as either a failure or a success.
|
||||
if not blocked:
|
||||
try:
|
||||
self._record_file_mutation_result(
|
||||
function_name, function_args, function_result, is_error,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not blocked and self.tool_progress_callback:
|
||||
try:
|
||||
self.tool_progress_callback(
|
||||
|
|
@ -11298,6 +11472,18 @@ class AIAgent:
|
|||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier. See
|
||||
# the concurrent path for the rationale; both paths must feed
|
||||
# the same state so the footer reflects every tool call in the
|
||||
# turn, not just the parallel ones.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
self._record_file_mutation_result(
|
||||
function_name, function_args, function_result, _is_error_result,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not _execution_blocked and self.tool_progress_callback:
|
||||
try:
|
||||
self.tool_progress_callback(
|
||||
|
|
@ -11995,6 +12181,14 @@ class AIAgent:
|
|||
truncated_response_prefix = ""
|
||||
compression_attempts = 0
|
||||
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
|
||||
|
||||
# Per-turn file-mutation verifier state. Keyed by resolved path;
|
||||
# each failed ``write_file`` / ``patch`` call records the error
|
||||
# preview. Later successful writes to the same path remove the
|
||||
# entry (the model recovered). At end-of-turn, any entries still
|
||||
# present are surfaced in an advisory footer so the model cannot
|
||||
# over-claim success while the file is actually unchanged on disk.
|
||||
self._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Record the execution thread so interrupt()/clear_interrupt() can
|
||||
# scope the tool-level interrupt signal to THIS agent's thread only.
|
||||
|
|
@ -15310,6 +15504,31 @@ class AIAgent:
|
|||
else:
|
||||
logger.info(_diag_msg, *_diag_args)
|
||||
|
||||
# File-mutation verifier footer.
|
||||
# If one or more ``write_file`` / ``patch`` calls failed during this
|
||||
# turn and were never superseded by a successful write to the same
|
||||
# path, append an advisory footer to the assistant response. This
|
||||
# catches the specific case — reported by Ben Eng (#15524-adjacent)
|
||||
# — where a model issues a batch of parallel patches, half of them
|
||||
# fail with "Could not find old_string", and the model summarises
|
||||
# the turn claiming every file was edited. The user then has to
|
||||
# manually run ``git status`` to catch the lie. With this footer
|
||||
# the truth is surfaced on every turn, so over-claiming is
|
||||
# structurally impossible past the model.
|
||||
#
|
||||
# Gate: only applied when a real text response exists for this
|
||||
# turn and the user didn't interrupt. Empty/interrupted turns
|
||||
# already have other surface text that shouldn't be augmented.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
_failed = getattr(self, "_turn_failed_file_mutations", None) or {}
|
||||
if _failed and self._file_mutation_verifier_enabled():
|
||||
footer = self._format_file_mutation_failure_footer(_failed)
|
||||
if footer:
|
||||
final_response = final_response.rstrip() + "\n\n" + footer
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can transform the LLM's output text before it's returned.
|
||||
|
|
|
|||
|
|
@ -793,30 +793,100 @@ function Install-Dependencies {
|
|||
# Tell uv to install into our venv (no activation needed)
|
||||
$env:VIRTUAL_ENV = "$InstallDir\venv"
|
||||
}
|
||||
|
||||
# Install main package. Tiered fallback so a single flaky git+https dep
|
||||
# (atroposlib / tinker in the [rl] extra) doesn't silently drop
|
||||
# dashboard/MCP/cron/messaging extras. Each tier's stdout/stderr is
|
||||
|
||||
# Hash-verified install (Tier 0) — when uv.lock is present, prefer
|
||||
# `uv sync --locked`. The lockfile records SHA256 hashes for every
|
||||
# transitive dependency, so a compromised transitive (different hash
|
||||
# than what we shipped) is REJECTED by the resolver. This is the
|
||||
# *only* path that protects against the "direct dep is fine, but the
|
||||
# dep's dep got worm-poisoned overnight" failure mode. The
|
||||
# `uv pip install` tiers below re-resolve transitives fresh from PyPI
|
||||
# without any hash verification — they exist to keep installs working
|
||||
# when the lockfile is stale, missing, or out-of-sync with the
|
||||
# current extras spec, NOT because they're equivalent in posture.
|
||||
if (Test-Path "uv.lock") {
|
||||
Write-Info "Trying tier: hash-verified (uv.lock) ..."
|
||||
# Critical flag choice: `--extra all`, NOT `--all-extras`.
|
||||
# --all-extras = every [project.optional-dependencies] key,
|
||||
# bypassing the curated [all] extra. On Windows
|
||||
# that means [matrix] -> python-olm (no wheel,
|
||||
# needs `make` to build from sdist) and the
|
||||
# install fails.
|
||||
# --extra all = just the [all] extra's contents (curated).
|
||||
& $UvCmd sync --extra all --locked
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Success "Main package installed (hash-verified via uv.lock)"
|
||||
$script:InstalledTier = "hash-verified (uv.lock)"
|
||||
# Skip the rest of the tiered cascade — we already have a
|
||||
# complete, hash-verified install.
|
||||
$skipPipFallback = $true
|
||||
} else {
|
||||
Write-Warn "uv.lock sync failed (lockfile may be stale), falling back to PyPI resolve..."
|
||||
$skipPipFallback = $false
|
||||
}
|
||||
} else {
|
||||
Write-Info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
|
||||
$skipPipFallback = $false
|
||||
}
|
||||
|
||||
# Install main package. Tiered fallback so a single flaky transitive
|
||||
# doesn't silently drop everything. Each tier's stdout/stderr is
|
||||
# preserved — no Out-Null swallowing — so the user can see what failed.
|
||||
#
|
||||
# Tier 1: [all] — everything, including RL git+https deps (best case).
|
||||
# Tier 2: [core-extras] synthesised locally — all PyPI-only extras we
|
||||
# ship (web, mcp, cron, cli, voice, messaging, slack, dev, acp,
|
||||
# pty, homeassistant, sms, tts-premium, honcho, google, mistral,
|
||||
# bedrock, dingtalk, feishu, modal, daytona, vercel). Drops [rl]
|
||||
# and [matrix] (linux-only) which are the usual failure culprits.
|
||||
# Tier 3: [web,mcp,cron,cli,messaging,dev] — the minimum we strongly
|
||||
# believe a user expects `hermes dashboard` / slash commands /
|
||||
# cron / messaging platforms to work out of the box.
|
||||
# Tier 4: bare `.` — last-resort so at least the core CLI launches.
|
||||
# Tier 1: [all] — the curated extra in pyproject.toml.
|
||||
# Tier 2: [all] minus the currently-broken extras list ($brokenExtras).
|
||||
# Edit $brokenExtras below when something on PyPI breaks; this
|
||||
# lets users keep the rest of [all] when one transitive is
|
||||
# unavailable. The list of [all]'s contents is parsed from
|
||||
# pyproject.toml at runtime — there is NO hand-mirrored copy
|
||||
# to drift out of sync.
|
||||
# Tier 3: bare `.` — last-resort so at least the core CLI launches.
|
||||
|
||||
# Currently-broken extras. Edit this list when an upstream package
|
||||
# gets quarantined / yanked / breaks resolution. Empty means everything
|
||||
# in [all] should be installable; populate with the names of extras
|
||||
# whose deps are temporarily unavailable.
|
||||
$brokenExtras = @()
|
||||
|
||||
# Parse [project.optional-dependencies].all from pyproject.toml.
|
||||
# tomllib is stdlib on Python 3.11+ which the bootstrap guarantees.
|
||||
$pythonExeForParse = if (-not $NoVenv) { "$InstallDir\venv\Scripts\python.exe" } else { (& $UvCmd python find $PythonVersion) }
|
||||
$allExtras = @()
|
||||
if (Test-Path $pythonExeForParse) {
|
||||
$parsed = & $pythonExeForParse -c @"
|
||||
import re, sys, tomllib
|
||||
try:
|
||||
with open('pyproject.toml', 'rb') as fh:
|
||||
data = tomllib.load(fh)
|
||||
specs = data['project']['optional-dependencies']['all']
|
||||
out = []
|
||||
for s in specs:
|
||||
m = re.search(r'hermes-agent\[([\w-]+)\]', s)
|
||||
if m: out.append(m.group(1))
|
||||
print(','.join(out))
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
"@ 2>$null
|
||||
if ($LASTEXITCODE -eq 0 -and $parsed) {
|
||||
$allExtras = $parsed.Trim().Split(',')
|
||||
}
|
||||
}
|
||||
if (-not $allExtras -or $allExtras.Count -eq 0) {
|
||||
Write-Warn "Could not parse [all] from pyproject.toml; Tier 2 will be a no-op."
|
||||
$safeAll = "all"
|
||||
} else {
|
||||
$safeAll = ($allExtras | Where-Object { $brokenExtras -notcontains $_ }) -join ","
|
||||
}
|
||||
$brokenLabel = if ($brokenExtras) { ($brokenExtras -join ", ") } else { "none" }
|
||||
|
||||
$installTiers = @(
|
||||
@{ Name = "all (with RL/matrix extras)"; Spec = ".[all]" },
|
||||
@{ Name = "PyPI-only extras (no git deps)"; Spec = ".[web,mcp,cron,cli,voice,messaging,slack,dev,acp,pty,homeassistant,sms,tts-premium,honcho,google,mistral,bedrock,dingtalk,feishu,modal,daytona,vercel]" },
|
||||
@{ Name = "dashboard + core platforms"; Spec = ".[web,mcp,cron,cli,messaging,dev]" },
|
||||
@{ Name = "all"; Spec = ".[all]" },
|
||||
@{ Name = "all minus known-broken ($brokenLabel)"; Spec = ".[$safeAll]" },
|
||||
@{ Name = "core only (no extras)"; Spec = "." }
|
||||
)
|
||||
$installed = $false
|
||||
foreach ($tier in $installTiers) {
|
||||
$installed = $skipPipFallback
|
||||
if (-not $skipPipFallback) {
|
||||
foreach ($tier in $installTiers) {
|
||||
Write-Info "Trying tier: $($tier.Name) ..."
|
||||
& $UvCmd pip install -e $tier.Spec
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
|
|
@ -826,6 +896,7 @@ function Install-Dependencies {
|
|||
break
|
||||
}
|
||||
Write-Warn "Tier '$($tier.Name)' failed (exit $LASTEXITCODE). Trying next tier..."
|
||||
}
|
||||
}
|
||||
if (-not $installed) {
|
||||
throw "Failed to install hermes-agent package even with no extras. Inspect the uv pip install output above."
|
||||
|
|
|
|||
|
|
@ -366,7 +366,27 @@ install_uv() {
|
|||
|
||||
# Install uv
|
||||
log_info "Installing uv (fast Python package manager)..."
|
||||
if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
|
||||
# Capture installer output so a failure shows the user WHY (network,
|
||||
# glibc mismatch on old distros, missing curl, ~/.local/bin not
|
||||
# writable, disk full, corp proxy / TLS interception, etc.) instead
|
||||
# of the previous "✗ Failed to install uv" with zero diagnostic.
|
||||
#
|
||||
# Two-stage: download the installer, then run it. Piping
|
||||
# `curl | sh` masks curl failures (sh exits 0 on empty stdin)
|
||||
# and conflates network errors with installer errors.
|
||||
local _uv_install_log _uv_installer
|
||||
_uv_install_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
|
||||
_uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
|
||||
if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_install_log"; then
|
||||
log_error "Failed to download uv installer from https://astral.sh/uv/install.sh"
|
||||
log_info "curl output:"
|
||||
sed 's/^/ /' "$_uv_install_log" >&2
|
||||
log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
|
||||
rm -f "$_uv_install_log" "$_uv_installer"
|
||||
exit 1
|
||||
fi
|
||||
if sh "$_uv_installer" >>"$_uv_install_log" 2>&1; then
|
||||
rm -f "$_uv_installer"
|
||||
# uv installs to ~/.local/bin by default
|
||||
if [ -x "$HOME/.local/bin/uv" ]; then
|
||||
UV_CMD="$HOME/.local/bin/uv"
|
||||
|
|
@ -375,15 +395,22 @@ install_uv() {
|
|||
elif command -v uv &> /dev/null; then
|
||||
UV_CMD="uv"
|
||||
else
|
||||
log_error "uv installed but not found on PATH"
|
||||
log_error "uv installer reported success but binary not found on PATH"
|
||||
log_info "Installer output:"
|
||||
sed 's/^/ /' "$_uv_install_log" >&2
|
||||
log_info "Try adding ~/.local/bin to your PATH and re-running"
|
||||
rm -f "$_uv_install_log"
|
||||
exit 1
|
||||
fi
|
||||
rm -f "$_uv_install_log"
|
||||
UV_VERSION=$($UV_CMD --version 2>/dev/null)
|
||||
log_success "uv installed ($UV_VERSION)"
|
||||
else
|
||||
log_error "Failed to install uv"
|
||||
log_info "Installer output:"
|
||||
sed 's/^/ /' "$_uv_install_log" >&2
|
||||
log_info "Install manually: https://docs.astral.sh/uv/getting-started/installation/"
|
||||
rm -f "$_uv_install_log" "$_uv_installer"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
|
@ -1060,20 +1087,151 @@ install_deps() {
|
|||
fi
|
||||
|
||||
# Install the main package in editable mode with all extras.
|
||||
# Try [all] first, fall back to base install if extras have issues.
|
||||
ALL_INSTALL_LOG=$(mktemp)
|
||||
if ! $UV_CMD pip install -e ".[all]" 2>"$ALL_INSTALL_LOG"; then
|
||||
log_warn "Full install (.[all]) failed, trying base install..."
|
||||
log_info "Reason: $(tail -5 "$ALL_INSTALL_LOG" | head -3)"
|
||||
rm -f "$ALL_INSTALL_LOG"
|
||||
if ! $UV_CMD pip install -e "."; then
|
||||
log_error "Package installation failed."
|
||||
log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
|
||||
log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
|
||||
exit 1
|
||||
#
|
||||
# Hash-verified install (Tier 0) — when uv.lock is present, prefer
|
||||
# `uv sync --locked`. The lockfile records SHA256 hashes for every
|
||||
# transitive, so a compromised transitive (different hash than what
|
||||
# we shipped) is REJECTED by the resolver. This is the *only* path
|
||||
# that protects against the "direct dep is fine, but the dep's dep
|
||||
# got worm-poisoned overnight" failure mode. All `uv pip install`
|
||||
# tiers below re-resolve transitives fresh from PyPI without any
|
||||
# hash verification — they exist to keep installs working when the
|
||||
# lockfile is stale, missing, or out-of-sync with the current
|
||||
# extras spec, NOT because they're equivalent in posture.
|
||||
if [ -f "uv.lock" ]; then
|
||||
log_info "Trying tier: hash-verified (uv.lock) ..."
|
||||
log_info "(this resolves + downloads the curated [all] set — first run on a"
|
||||
log_info " fresh venv can take 1-5 minutes; uv prints progress below)"
|
||||
# Stream uv's progress directly to the user instead of swallowing
|
||||
# it with `2>"$(mktemp)"`. Two reasons:
|
||||
# 1. `--extra all --locked` against a fresh venv has to pull
|
||||
# every transitive — silencing stderr makes the install
|
||||
# look frozen for minutes on slow networks. Users see
|
||||
# "Trying tier: hash-verified ..." and assume it's hung.
|
||||
# 2. The previous `2>"$(mktemp)"` substituted the path at
|
||||
# command-build time but never saved it, so on failure the
|
||||
# uv error message was unreachable — the user just got the
|
||||
# generic "lockfile may be stale" warning.
|
||||
#
|
||||
# Critical flag choice: `--extra all`, NOT `--all-extras`.
|
||||
# --all-extras = every [project.optional-dependencies] key.
|
||||
# This bypasses the curated `[all]` extra
|
||||
# entirely and pulls e.g. [matrix] (which
|
||||
# needs python-olm + make on Windows) and
|
||||
# [rl] (git+https deps that fail offline).
|
||||
# --extra all = install just the `[all]` extra's contents.
|
||||
# This respects the curation in pyproject.toml.
|
||||
# uv's own progress UI handles TTY detection and downgrades
|
||||
# gracefully when stdout/stderr aren't terminals.
|
||||
if UV_PROJECT_ENVIRONMENT="$INSTALL_DIR/venv" $UV_CMD sync --extra all --locked; then
|
||||
log_success "Main package installed (hash-verified via uv.lock)"
|
||||
log_success "All dependencies installed"
|
||||
return 0
|
||||
fi
|
||||
log_warn "uv.lock sync failed (see uv output above), falling back to PyPI resolve..."
|
||||
else
|
||||
rm -f "$ALL_INSTALL_LOG"
|
||||
log_info "uv.lock not found — falling back to PyPI resolve (no hash verification)"
|
||||
fi
|
||||
|
||||
# Multi-tier fallback. The point of the tiers is that ONE compromised
|
||||
# PyPI package (a worm-poisoned release that gets quarantined, like
|
||||
# mistralai 2.4.6 in May 2026) shouldn't be able to silently demote a
|
||||
# fresh install all the way down to "core only" — the user should keep
|
||||
# everything else they signed up for.
|
||||
#
|
||||
# Tier 1: [all] — the curated extra in pyproject.toml.
|
||||
# Tier 2: [all] minus the currently-broken extras list (_BROKEN_EXTRAS).
|
||||
# Edit _BROKEN_EXTRAS below when something on PyPI breaks; this
|
||||
# lets users keep the rest of [all] when one transitive is
|
||||
# unavailable. The list of [all]'s contents is parsed from
|
||||
# pyproject.toml at runtime — there is NO hand-mirrored copy
|
||||
# to drift out of sync. If you want to change what [all]
|
||||
# contains, edit pyproject.toml only.
|
||||
# Tier 3: bare `.` — last-resort so at least the core CLI launches.
|
||||
# Skipped tiers like "PyPI-only extras (no git deps)" used to
|
||||
# exist to dodge [rl] / [matrix] git+sdist deps; those are no
|
||||
# longer in [all] post-2026-05-12 lazy-install migration, so
|
||||
# a separate PyPI-only tier had no remaining content.
|
||||
local _BROKEN_EXTRAS=() # populate when an extra becomes unresolvable
|
||||
|
||||
# Parse [project.optional-dependencies].all from pyproject.toml.
|
||||
# tomllib is stdlib on Python 3.11+ which uv's bootstrap guarantees.
|
||||
# Falls back to a hand list if parse fails — defensive only.
|
||||
local _ALL_EXTRAS_CSV
|
||||
_ALL_EXTRAS_CSV="$(
|
||||
"$PYTHON_PATH" - <<'PY' 2>/dev/null
|
||||
import re, sys, tomllib
|
||||
try:
|
||||
with open("pyproject.toml", "rb") as fh:
|
||||
data = tomllib.load(fh)
|
||||
specs = data["project"]["optional-dependencies"]["all"]
|
||||
extras = []
|
||||
for s in specs:
|
||||
m = re.search(r"hermes-agent\[([\w-]+)\]", s)
|
||||
if m:
|
||||
extras.append(m.group(1))
|
||||
print(",".join(extras))
|
||||
except Exception as e:
|
||||
print("", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
PY
|
||||
)"
|
||||
if [ -z "$_ALL_EXTRAS_CSV" ]; then
|
||||
log_warn "Could not parse [all] from pyproject.toml; falling back to .[all] only."
|
||||
_ALL_EXTRAS_CSV=""
|
||||
fi
|
||||
|
||||
# Build "[all] minus broken" spec by filtering the parsed list.
|
||||
local _SAFE_SPEC=".[all]"
|
||||
if [ -n "$_ALL_EXTRAS_CSV" ] && [ "${#_BROKEN_EXTRAS[@]}" -gt 0 ]; then
|
||||
local _SAFE_EXTRAS=()
|
||||
local _e _b _skip
|
||||
IFS=',' read -ra _ALL_EXTRAS_ARR <<< "$_ALL_EXTRAS_CSV"
|
||||
for _e in "${_ALL_EXTRAS_ARR[@]}"; do
|
||||
_skip=false
|
||||
for _b in "${_BROKEN_EXTRAS[@]}"; do
|
||||
if [ "$_e" = "$_b" ]; then _skip=true; break; fi
|
||||
done
|
||||
if [ "$_skip" = false ]; then _SAFE_EXTRAS+=("$_e"); fi
|
||||
done
|
||||
_SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
|
||||
fi
|
||||
|
||||
ALL_INSTALL_LOG=$(mktemp)
|
||||
local _installed=false
|
||||
local _tier_name=""
|
||||
|
||||
install_tier() {
|
||||
local name="$1"; local spec="$2"
|
||||
log_info "Trying tier: $name ..."
|
||||
if $UV_CMD pip install -e "$spec" 2>"$ALL_INSTALL_LOG"; then
|
||||
log_success "Main package installed ($name)"
|
||||
_installed=true
|
||||
_tier_name="$name"
|
||||
return 0
|
||||
fi
|
||||
log_warn "Tier '$name' failed. Top of pip output:"
|
||||
head -5 "$ALL_INSTALL_LOG" | sed 's/^/ /' >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
install_tier "all" ".[all]" \
|
||||
|| install_tier "all minus known-broken (${_BROKEN_EXTRAS[*]:-none})" "$_SAFE_SPEC" \
|
||||
|| install_tier "core only (no extras)" "."
|
||||
|
||||
rm -f "$ALL_INSTALL_LOG"
|
||||
|
||||
if [ "$_installed" = false ]; then
|
||||
log_error "Package installation failed even with no extras."
|
||||
log_info "Check that build tools are installed: sudo apt install build-essential python3-dev"
|
||||
log_info "Then re-run: cd $INSTALL_DIR && uv pip install -e '.[all]'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$_tier_name" != "all (with RL/matrix extras)" ]; then
|
||||
log_warn "Note: installed via fallback tier ($_tier_name)."
|
||||
log_info "Some optional features may be missing. After resolving any"
|
||||
log_info "PyPI/network issue, re-run: $UV_CMD pip install -e '.[all]'"
|
||||
fi
|
||||
|
||||
log_success "Main package installed"
|
||||
|
|
|
|||
|
|
@ -53,12 +53,15 @@ AUTHOR_MAP = {
|
|||
"421774554@qq.com": "wuli666",
|
||||
"harish.kukreja@gmail.com": "counterposition",
|
||||
"1046611633@qq.com": "zhengyn0001",
|
||||
"db@project-aeon.com": "db-aeon",
|
||||
"ahmed@abadr.net": "ahmedbadr3",
|
||||
"cleo@edaphic.xyz": "curiouscleo",
|
||||
"hirokazu.ogawa@kwansei.ac.jp": "hrkzogw",
|
||||
"datapod.k@gmail.com": "dandacompany",
|
||||
"treydong.zh@gmail.com": "TreyDong",
|
||||
"kyanam.preetham@gmail.com": "pkyanam",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0",
|
||||
"hugosequier@gmail.com": "Hugo-SEQUIER",
|
||||
"128259593+Gutslabs@users.noreply.github.com": "Gutslabs",
|
||||
"50326054+nocturnum91@users.noreply.github.com": "nocturnum91",
|
||||
|
|
@ -137,6 +140,22 @@ AUTHOR_MAP = {
|
|||
"tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
|
||||
"leon@agentlinker.ai": "agentlinker",
|
||||
"santoshhumagain1887@gmail.com": "npmisantosh",
|
||||
"39641663+luarss@users.noreply.github.com": "luarss",
|
||||
"16263913+zccyman@users.noreply.github.com": "zccyman",
|
||||
"ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
|
||||
"98612432+Osraka@users.noreply.github.com": "Osraka",
|
||||
"112634774+ryptotalent@users.noreply.github.com": "ryptotalent",
|
||||
"270097726+hookinglau@users.noreply.github.com": "hookinglau",
|
||||
"5029547+AllynSheep@users.noreply.github.com": "AllynSheep",
|
||||
"allyn0306@gmail.com": "AllynSheep",
|
||||
"46887634+aqilaziz@users.noreply.github.com": "aqilaziz",
|
||||
"gonzes7@gmail.com": "aqilaziz",
|
||||
"6966326+laoli-no1@users.noreply.github.com": "laoli-no1",
|
||||
"laoli_no1@163.com": "laoli-no1",
|
||||
"39730900+NorethSea@users.noreply.github.com": "NorethSea",
|
||||
"963979204@qq.com": "NorethSea",
|
||||
"2283389+JamesX88@users.noreply.github.com": "JamesX88",
|
||||
"JamesX88@users.noreply.github.com": "JamesX88",
|
||||
"novax635@gmail.com": "novax635",
|
||||
"krionex1@gmail.com": "Krionex",
|
||||
"rxdxxxx@users.noreply.github.com": "rxdxxxx",
|
||||
|
|
|
|||
|
|
@ -82,7 +82,22 @@ else
|
|||
echo -e "${GREEN}✓${NC} uv found ($UV_VERSION)"
|
||||
else
|
||||
echo -e "${CYAN}→${NC} Installing uv..."
|
||||
if curl -LsSf https://astral.sh/uv/install.sh | sh 2>/dev/null; then
|
||||
# Capture installer output so a failure shows the user WHY
|
||||
# (network, glibc mismatch on old distros, missing curl, disk
|
||||
# full, etc.) instead of "✗ Failed to install uv" with zero
|
||||
# diagnostic. Two-stage to avoid `curl | sh` masking curl
|
||||
# failures (sh exits 0 on empty stdin under no pipefail).
|
||||
_uv_log="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-install.$$.log")"
|
||||
_uv_installer="$(mktemp 2>/dev/null || echo "/tmp/hermes-uv-installer.$$.sh")"
|
||||
if ! curl -LsSf https://astral.sh/uv/install.sh -o "$_uv_installer" 2>"$_uv_log"; then
|
||||
echo -e "${RED}✗${NC} Failed to download uv installer."
|
||||
sed 's/^/ /' "$_uv_log" >&2
|
||||
echo -e "${CYAN}→${NC} Install manually: https://docs.astral.sh/uv/"
|
||||
rm -f "$_uv_log" "$_uv_installer"
|
||||
exit 1
|
||||
fi
|
||||
if sh "$_uv_installer" >>"$_uv_log" 2>&1; then
|
||||
rm -f "$_uv_installer"
|
||||
if [ -x "$HOME/.local/bin/uv" ]; then
|
||||
UV_CMD="$HOME/.local/bin/uv"
|
||||
elif [ -x "$HOME/.cargo/bin/uv" ]; then
|
||||
|
|
@ -90,14 +105,22 @@ else
|
|||
fi
|
||||
|
||||
if [ -n "$UV_CMD" ]; then
|
||||
rm -f "$_uv_log"
|
||||
UV_VERSION=$($UV_CMD --version 2>/dev/null)
|
||||
echo -e "${GREEN}✓${NC} uv installed ($UV_VERSION)"
|
||||
else
|
||||
echo -e "${RED}✗${NC} uv installed but not found. Add ~/.local/bin to PATH and retry."
|
||||
echo -e "${RED}✗${NC} uv installer reported success but binary not found. Add ~/.local/bin to PATH and retry."
|
||||
echo -e "${CYAN}→${NC} Installer output:"
|
||||
sed 's/^/ /' "$_uv_log" >&2
|
||||
rm -f "$_uv_log"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo -e "${RED}✗${NC} Failed to install uv. Visit https://docs.astral.sh/uv/"
|
||||
echo -e "${RED}✗${NC} Failed to install uv."
|
||||
echo -e "${CYAN}→${NC} Installer output:"
|
||||
sed 's/^/ /' "$_uv_log" >&2
|
||||
echo -e "${CYAN}→${NC} Install manually: https://docs.astral.sh/uv/"
|
||||
rm -f "$_uv_log" "$_uv_installer"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
|
@ -183,17 +206,63 @@ if is_termux; then
|
|||
else
|
||||
# Prefer uv sync with lockfile (hash-verified installs) when available,
|
||||
# fall back to pip install for compatibility or when lockfile is stale.
|
||||
#
|
||||
# Multi-tier pip fallback. Goal: ONE compromised PyPI package
|
||||
# (mistralai 2.4.6 in May 2026 → quarantined) shouldn't silently demote
|
||||
# a fresh setup to "core only". Edit _BROKEN_EXTRAS when a transitive
|
||||
# breaks; users keep voice / honcho / google / slack / matrix etc. even
|
||||
# if mistral can't resolve.
|
||||
_BROKEN_EXTRAS=() # populate when an extra becomes unresolvable
|
||||
_ALL_EXTRAS=(
|
||||
modal daytona vercel messaging matrix cron cli dev tts-premium slack
|
||||
pty honcho mcp homeassistant sms acp voice dingtalk feishu google
|
||||
bedrock web youtube
|
||||
)
|
||||
_SAFE_EXTRAS=()
|
||||
for _e in "${_ALL_EXTRAS[@]}"; do
|
||||
_skip=false
|
||||
for _b in "${_BROKEN_EXTRAS[@]}"; do
|
||||
[ "$_e" = "$_b" ] && _skip=true && break
|
||||
done
|
||||
[ "$_skip" = false ] && _SAFE_EXTRAS+=("$_e")
|
||||
done
|
||||
_SAFE_SPEC=".[$(IFS=,; echo "${_SAFE_EXTRAS[*]}")]"
|
||||
_try_install() {
|
||||
$UV_CMD pip install -e ".[all]" \
|
||||
|| $UV_CMD pip install -e "$_SAFE_SPEC" \
|
||||
|| $UV_CMD pip install -e "."
|
||||
}
|
||||
|
||||
if [ -f "uv.lock" ]; then
|
||||
# Hash-verified install (preferred). The lockfile records SHA256
|
||||
# hashes for every transitive — a compromised transitive would have
|
||||
# a different hash and be REJECTED by uv. This is the only path
|
||||
# that protects against transitive-package supply-chain attacks
|
||||
# (the direct deps in pyproject.toml are exact-pinned, but
|
||||
# `uv pip install` re-resolves transitives fresh from PyPI).
|
||||
echo -e "${CYAN}→${NC} Using uv.lock for hash-verified installation..."
|
||||
UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>/dev/null && \
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed (lockfile verified)" || {
|
||||
echo -e "${YELLOW}⚠${NC} Lockfile install failed (may be outdated), falling back to pip install..."
|
||||
$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed"
|
||||
}
|
||||
echo -e "${CYAN}→${NC} (first run on a fresh venv can take 1-5 minutes; uv prints progress below)"
|
||||
# Critical flag choice: `--extra all`, NOT `--all-extras`. The
|
||||
# latter installs every [project.optional-dependencies] key,
|
||||
# bypassing the curated [all] extra and pulling backends like
|
||||
# [matrix] (python-olm needs make on Windows) and [rl] (git+https
|
||||
# deps that fail offline). See pyproject.toml's [all] for the
|
||||
# curated set, and tools/lazy_deps.py for backends that install
|
||||
# at first use.
|
||||
# Also: stream stderr through directly so the user sees uv's
|
||||
# progress UI instead of staring at a frozen prompt.
|
||||
if UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --extra all --locked; then
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed (hash-verified via uv.lock)"
|
||||
else
|
||||
echo -e "${YELLOW}⚠${NC} Lockfile sync failed (see uv output above)."
|
||||
echo -e "${YELLOW}⚠${NC} Falling back to PyPI resolve — transitives will NOT be hash-verified."
|
||||
_try_install
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
|
||||
fi
|
||||
else
|
||||
$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed"
|
||||
echo -e "${YELLOW}⚠${NC} uv.lock not found — installing without hash verification of transitives."
|
||||
_try_install
|
||||
echo -e "${GREEN}✓${NC} Dependencies installed (transitives re-resolved, not hash-verified)"
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ Your job description says "route, don't execute." The rules that enforce that:
|
|||
- **For any concrete task, create a Kanban task and assign it.** Every single time.
|
||||
- **Split multi-lane requests before creating cards.** A user prompt can contain several independent workstreams. Extract those lanes first, then create one card per lane instead of bundling unrelated work into a single implementer card.
|
||||
- **Run independent lanes in parallel.** If two cards do not need each other's output, leave them unlinked so the dispatcher can fan them out. Link only true data dependencies.
|
||||
- **Never create dependent work as independent ready cards.** If a card must wait for another card, pass `parents=[...]` in the original `kanban_create` call. Do not create it first and link it later, and do not rely on prose like "wait for T1" inside the body.
|
||||
- **If no specialist fits the available profiles, ask the user which profile to create or which existing profile to use.** Do not invent profile names; the dispatcher will silently drop unknown assignees.
|
||||
- **Decompose, route, and summarize — that's the whole job.**
|
||||
|
||||
|
|
@ -67,7 +68,7 @@ Before creating anything, draft the graph out loud (in your response to the user
|
|||
2. Map each lane to one of the profiles you discovered in Step 0. If a lane doesn't fit any existing profile, ask the user which to use or create.
|
||||
3. Decide whether each lane is independent or gated by another lane.
|
||||
4. Create independent lanes as parallel cards with no parent links.
|
||||
5. Create synthesis/review/integration cards with parent links to the lanes they depend on.
|
||||
5. Create synthesis/review/integration cards with parent links to the lanes they depend on. A child created with unfinished parents starts in `todo`; the dispatcher promotes it to `ready` only after every parent is done.
|
||||
|
||||
Examples of prompts that should fan out (using placeholder profile names — substitute whatever exists on the user's setup):
|
||||
|
||||
|
|
@ -115,6 +116,8 @@ t4 = kanban_create(
|
|||
|
||||
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
|
||||
|
||||
If the task graph has dependencies, create the parent cards first, capture their returned ids, and include those ids in the child card's `parents` list during the child `kanban_create` call. Avoid creating all cards in parallel and linking them afterward; that creates a window where the dispatcher can claim a child before its inputs exist.
|
||||
|
||||
### Step 4 — Complete your own task
|
||||
|
||||
If you were spawned as a task yourself (e.g. a planner profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
|
||||
|
|
|
|||
1
tests/agent/lsp/__init__.py
Normal file
1
tests/agent/lsp/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Pytest helpers for LSP-related tests."""
|
||||
159
tests/agent/lsp/_mock_lsp_server.py
Normal file
159
tests/agent/lsp/_mock_lsp_server.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/env python3
|
||||
"""A minimal in-process LSP server used by tests.
|
||||
|
||||
Speaks just enough LSP to drive :class:`agent.lsp.client.LSPClient`
|
||||
through a full lifecycle: ``initialize``, ``initialized``,
|
||||
``textDocument/didOpen``, ``textDocument/didChange``, then a
|
||||
``textDocument/publishDiagnostics`` notification followed by
|
||||
``shutdown`` + ``exit``.
|
||||
|
||||
Behaviour (all behaviours selectable via env var ``MOCK_LSP_SCRIPT``):
|
||||
|
||||
- ``"clean"`` — initialize, accept didOpen/didChange, push empty
|
||||
diagnostics on every open/change, exit cleanly on shutdown.
|
||||
- ``"errors"`` — same as ``clean`` but the published diagnostics
|
||||
carry one severity-1 entry pointing at line 0:0.
|
||||
- ``"crash"`` — exit immediately after responding to ``initialize``
|
||||
(simulates a crashing server).
|
||||
- ``"slow"`` — same as ``clean`` but sleeps 1s before responding to
|
||||
``initialize`` (lets us test timeout behaviour).
|
||||
|
||||
The script writes JSON-RPC framed messages to stdout and reads from
|
||||
stdin. No third-party dependencies — uses only stdlib so it runs
|
||||
under whatever Python the test process picks up.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def read_message():
|
||||
"""Read one Content-Length framed JSON-RPC message from stdin."""
|
||||
headers = {}
|
||||
while True:
|
||||
line = sys.stdin.buffer.readline()
|
||||
if not line:
|
||||
return None
|
||||
line = line.rstrip(b"\r\n")
|
||||
if not line:
|
||||
break
|
||||
k, _, v = line.decode("ascii").partition(":")
|
||||
headers[k.strip().lower()] = v.strip()
|
||||
n = int(headers["content-length"])
|
||||
body = sys.stdin.buffer.read(n)
|
||||
return json.loads(body.decode("utf-8"))
|
||||
|
||||
|
||||
def write_message(obj):
|
||||
body = json.dumps(obj, separators=(",", ":")).encode("utf-8")
|
||||
sys.stdout.buffer.write(f"Content-Length: {len(body)}\r\n\r\n".encode("ascii"))
|
||||
sys.stdout.buffer.write(body)
|
||||
sys.stdout.buffer.flush()
|
||||
|
||||
|
||||
def main():
|
||||
script = os.environ.get("MOCK_LSP_SCRIPT", "clean")
|
||||
|
||||
while True:
|
||||
msg = read_message()
|
||||
if msg is None:
|
||||
return 0
|
||||
|
||||
if "id" in msg and msg.get("method") == "initialize":
|
||||
if script == "slow":
|
||||
time.sleep(1.0)
|
||||
write_message(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": msg["id"],
|
||||
"result": {
|
||||
"capabilities": {
|
||||
"textDocumentSync": 1, # Full
|
||||
"diagnosticProvider": {"interFileDependencies": False, "workspaceDiagnostics": False},
|
||||
},
|
||||
"serverInfo": {"name": "mock-lsp", "version": "0.1"},
|
||||
},
|
||||
}
|
||||
)
|
||||
if script == "crash":
|
||||
return 0
|
||||
continue
|
||||
|
||||
if msg.get("method") == "initialized":
|
||||
continue
|
||||
|
||||
if msg.get("method") == "workspace/didChangeConfiguration":
|
||||
continue
|
||||
|
||||
if msg.get("method") == "workspace/didChangeWatchedFiles":
|
||||
continue
|
||||
|
||||
if msg.get("method") in ("textDocument/didOpen", "textDocument/didChange"):
|
||||
params = msg.get("params") or {}
|
||||
td = params.get("textDocument") or {}
|
||||
uri = td.get("uri", "")
|
||||
version = td.get("version", 0)
|
||||
diagnostics = []
|
||||
if script == "errors":
|
||||
diagnostics = [
|
||||
{
|
||||
"range": {
|
||||
"start": {"line": 0, "character": 0},
|
||||
"end": {"line": 0, "character": 5},
|
||||
},
|
||||
"severity": 1,
|
||||
"code": "MOCK001",
|
||||
"source": "mock-lsp",
|
||||
"message": "synthetic error from mock-lsp",
|
||||
}
|
||||
]
|
||||
write_message(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"method": "textDocument/publishDiagnostics",
|
||||
"params": {
|
||||
"uri": uri,
|
||||
"version": version,
|
||||
"diagnostics": diagnostics,
|
||||
},
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if msg.get("method") == "textDocument/diagnostic":
|
||||
# Pull endpoint — return empty.
|
||||
write_message(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": msg["id"],
|
||||
"result": {"kind": "full", "items": []},
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if msg.get("method") == "textDocument/didSave":
|
||||
continue
|
||||
|
||||
if msg.get("method") == "shutdown":
|
||||
write_message({"jsonrpc": "2.0", "id": msg["id"], "result": None})
|
||||
continue
|
||||
|
||||
if msg.get("method") == "exit":
|
||||
return 0
|
||||
|
||||
# Unknown request: respond with method-not-found.
|
||||
if "id" in msg:
|
||||
write_message(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": msg["id"],
|
||||
"error": {"code": -32601, "message": f"method not found: {msg.get('method')}"},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
108
tests/agent/lsp/test_backend_gate.py
Normal file
108
tests/agent/lsp/test_backend_gate.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
"""Integration test: LSP layer is skipped on non-local backends.
|
||||
|
||||
The host-side LSP server can't see files inside a Docker/Modal/SSH
|
||||
sandbox. When the agent's terminal env isn't ``LocalEnvironment``,
|
||||
the file_operations layer must skip both ``snapshot_baseline`` and
|
||||
``get_diagnostics_sync`` calls — falling back to the in-process
|
||||
syntax check exactly as if LSP were disabled.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp import eventlog
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset():
|
||||
eventlog.reset_announce_caches()
|
||||
|
||||
|
||||
def test_local_only_helper_returns_true_for_local_env():
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd="/tmp"))
|
||||
assert fops._lsp_local_only() is True
|
||||
|
||||
|
||||
def test_local_only_helper_returns_false_for_non_local_env():
|
||||
"""A mocked non-local env (Docker/Modal/SSH stand-in) returns False."""
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
# Build something that's NOT a LocalEnvironment. We use a bare
|
||||
# MagicMock — isinstance() against LocalEnvironment is False.
|
||||
fake_env = MagicMock()
|
||||
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
|
||||
fake_env.cwd = "/sandbox"
|
||||
fops = ShellFileOperations(fake_env)
|
||||
assert fops._lsp_local_only() is False
|
||||
|
||||
|
||||
def test_snapshot_baseline_skipped_for_non_local(monkeypatch):
|
||||
"""Verify the LSP service's snapshot_baseline is NOT called when
|
||||
the backend isn't local."""
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
fake_env = MagicMock()
|
||||
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
|
||||
fake_env.cwd = "/sandbox"
|
||||
fops = ShellFileOperations(fake_env)
|
||||
|
||||
snapshot_called = []
|
||||
|
||||
class FakeService:
|
||||
def snapshot_baseline(self, path):
|
||||
snapshot_called.append(path)
|
||||
|
||||
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
|
||||
|
||||
fops._snapshot_lsp_baseline("/sandbox/x.py")
|
||||
assert snapshot_called == [], "snapshot must be skipped for non-local backends"
|
||||
|
||||
|
||||
def test_maybe_lsp_diagnostics_returns_empty_for_non_local(monkeypatch):
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
fake_env = MagicMock()
|
||||
fake_env.execute = MagicMock(return_value=MagicMock(exit_code=0, stdout=""))
|
||||
fake_env.cwd = "/sandbox"
|
||||
fops = ShellFileOperations(fake_env)
|
||||
|
||||
called = []
|
||||
|
||||
class FakeService:
|
||||
def enabled_for(self, path):
|
||||
called.append(("enabled_for", path))
|
||||
return True
|
||||
def get_diagnostics_sync(self, path, **kw):
|
||||
called.append(("get_diagnostics_sync", path))
|
||||
return [{"severity": 1, "message": "should not see this"}]
|
||||
|
||||
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
|
||||
|
||||
result = fops._maybe_lsp_diagnostics("/sandbox/x.py")
|
||||
assert result == ""
|
||||
assert called == [], "service must not be queried for non-local backends"
|
||||
|
||||
|
||||
def test_snapshot_baseline_called_for_local_env(tmp_path, monkeypatch):
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
|
||||
|
||||
snapshot_called = []
|
||||
|
||||
class FakeService:
|
||||
def snapshot_baseline(self, path):
|
||||
snapshot_called.append(path)
|
||||
|
||||
monkeypatch.setattr("agent.lsp.get_service", lambda: FakeService())
|
||||
|
||||
fops._snapshot_lsp_baseline(str(tmp_path / "x.py"))
|
||||
assert snapshot_called == [str(tmp_path / "x.py")]
|
||||
213
tests/agent/lsp/test_broken_set.py
Normal file
213
tests/agent/lsp/test_broken_set.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
"""Tests for the broken-set short-circuit added to handle outer-timeout failures.
|
||||
|
||||
When ``snapshot_baseline`` or ``get_diagnostics_sync`` time out from the
|
||||
service layer (because a language server hangs during initialize, or
|
||||
the binary is wedged), the inner spawn task is cancelled — but the
|
||||
inner exception handler that adds to ``_broken`` never runs. Without
|
||||
the service-layer fallback added in this module, every subsequent
|
||||
edit re-pays the full timeout cost until the process exits.
|
||||
|
||||
This module verifies:
|
||||
- ``_mark_broken_for_file`` adds the right key
|
||||
- ``enabled_for`` short-circuits on broken keys
|
||||
- a missing binary is broken-set'd after one snapshot attempt
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp.manager import LSPService
|
||||
from agent.lsp.servers import SERVERS, ServerContext, ServerDef, SpawnSpec
|
||||
from agent.lsp.workspace import clear_cache
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_workspace_cache():
|
||||
clear_cache()
|
||||
yield
|
||||
clear_cache()
|
||||
|
||||
|
||||
def _make_git_workspace(tmp_path: Path) -> Path:
|
||||
"""Build a minimal git repo with a pyproject so pyright's root resolver fires."""
|
||||
repo = tmp_path / "repo"
|
||||
repo.mkdir()
|
||||
(repo / ".git").mkdir()
|
||||
(repo / "pyproject.toml").write_text("[project]\nname='t'\n")
|
||||
return repo
|
||||
|
||||
|
||||
def test_mark_broken_for_file_adds_correct_key(tmp_path, monkeypatch):
|
||||
"""``_mark_broken_for_file`` keys the broken-set on
|
||||
(server_id, per_server_root) so subsequent ``enabled_for`` calls
|
||||
for files in the same project skip immediately."""
|
||||
repo = _make_git_workspace(tmp_path)
|
||||
monkeypatch.chdir(str(repo))
|
||||
src = repo / "x.py"
|
||||
src.write_text("")
|
||||
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
|
||||
# The pyright server resolves to the repo root via pyproject.toml.
|
||||
assert ("pyright", str(repo)) in svc._broken
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_enabled_for_returns_false_after_broken(tmp_path, monkeypatch):
|
||||
"""Once a (server_id, root) pair is in the broken-set,
|
||||
``enabled_for`` returns False so the file_operations layer skips
|
||||
the LSP path entirely."""
|
||||
repo = _make_git_workspace(tmp_path)
|
||||
monkeypatch.chdir(str(repo))
|
||||
src = repo / "x.py"
|
||||
src.write_text("")
|
||||
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
# Initially enabled.
|
||||
assert svc.enabled_for(str(src)) is True
|
||||
# Mark broken.
|
||||
svc._mark_broken_for_file(str(src), RuntimeError("simulated"))
|
||||
# Now disabled — the broken-set short-circuits.
|
||||
assert svc.enabled_for(str(src)) is False
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_enabled_for_other_file_in_same_project_also_skipped(tmp_path, monkeypatch):
|
||||
"""The broken key is (server_id, root), so ALL files routed through
|
||||
the same server in the same project are skipped — not just the one
|
||||
that triggered the failure."""
|
||||
repo = _make_git_workspace(tmp_path)
|
||||
monkeypatch.chdir(str(repo))
|
||||
a = repo / "a.py"
|
||||
a.write_text("")
|
||||
b = repo / "b.py"
|
||||
b.write_text("")
|
||||
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
svc._mark_broken_for_file(str(a), RuntimeError("simulated"))
|
||||
# Both files in the same project skip pyright now.
|
||||
assert svc.enabled_for(str(a)) is False
|
||||
assert svc.enabled_for(str(b)) is False
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_unrelated_project_not_affected_by_broken(tmp_path, monkeypatch):
|
||||
"""Marking pyright broken for project A must NOT affect project B."""
|
||||
repo_a = _make_git_workspace(tmp_path)
|
||||
repo_b = tmp_path / "repo-b"
|
||||
repo_b.mkdir()
|
||||
(repo_b / ".git").mkdir()
|
||||
(repo_b / "pyproject.toml").write_text("[project]\nname='b'\n")
|
||||
a_src = repo_a / "x.py"
|
||||
a_src.write_text("")
|
||||
b_src = repo_b / "x.py"
|
||||
b_src.write_text("")
|
||||
|
||||
monkeypatch.chdir(str(repo_a))
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
svc._mark_broken_for_file(str(a_src), RuntimeError("simulated"))
|
||||
# Project A skipped.
|
||||
assert svc.enabled_for(str(a_src)) is False
|
||||
# Project B still enabled — the broken key is per-project.
|
||||
monkeypatch.chdir(str(repo_b))
|
||||
assert svc.enabled_for(str(b_src)) is True
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_mark_broken_handles_missing_server_silently(tmp_path):
|
||||
"""If the file extension doesn't match any registered server,
|
||||
``_mark_broken_for_file`` no-ops — nothing to mark."""
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
# No registered server for .xyz; must not raise.
|
||||
svc._mark_broken_for_file(str(tmp_path / "weird.xyz"), RuntimeError("x"))
|
||||
assert len(svc._broken) == 0
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_mark_broken_handles_no_workspace_silently(tmp_path):
|
||||
"""File outside any git worktree → no workspace → no key to add."""
|
||||
src = tmp_path / "orphan.py"
|
||||
src.write_text("")
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
svc._mark_broken_for_file(str(src), RuntimeError("x"))
|
||||
assert len(svc._broken) == 0
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_snapshot_failure_marks_broken_via_outer_timeout(tmp_path, monkeypatch):
|
||||
"""End-to-end: ``snapshot_baseline``'s outer ``_loop.run`` timeout
|
||||
triggers ``_mark_broken_for_file``, so a second call to
|
||||
``enabled_for`` returns False."""
|
||||
repo = _make_git_workspace(tmp_path)
|
||||
monkeypatch.chdir(str(repo))
|
||||
src = repo / "x.py"
|
||||
src.write_text("")
|
||||
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
# Force the inner snapshot coroutine to raise.
|
||||
async def boom(_path):
|
||||
raise RuntimeError("outer-timeout simulated")
|
||||
|
||||
with patch.object(svc, "_snapshot_async", boom):
|
||||
assert svc.enabled_for(str(src)) is True
|
||||
svc.snapshot_baseline(str(src))
|
||||
|
||||
# After the failure, the file's pair is in the broken-set and
|
||||
# ``enabled_for`` skips it.
|
||||
assert ("pyright", str(repo)) in svc._broken
|
||||
assert svc.enabled_for(str(src)) is False
|
||||
finally:
|
||||
svc.shutdown()
|
||||
143
tests/agent/lsp/test_client_e2e.py
Normal file
143
tests/agent/lsp/test_client_e2e.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
"""End-to-end client tests against the in-process mock LSP server.
|
||||
|
||||
Spins up :file:`_mock_lsp_server.py` as an actual subprocess, drives
|
||||
it through real LSP traffic, and asserts diagnostic flow. This is
|
||||
the closest thing we have to integration coverage without requiring
|
||||
pyright/gopls/etc. to be installed in CI.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp.client import LSPClient
|
||||
|
||||
|
||||
MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
|
||||
|
||||
|
||||
def _client(workspace: Path, script: str = "clean") -> LSPClient:
|
||||
env = {"MOCK_LSP_SCRIPT": script, "PYTHONPATH": os.environ.get("PYTHONPATH", "")}
|
||||
return LSPClient(
|
||||
server_id=f"mock-{script}",
|
||||
workspace_root=str(workspace),
|
||||
command=[sys.executable, MOCK_SERVER],
|
||||
env=env,
|
||||
cwd=str(workspace),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_lifecycle_clean(tmp_path: Path):
|
||||
"""Full lifecycle: spawn, initialize, open, get clean diagnostics, shutdown."""
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("print('hi')\n")
|
||||
|
||||
client = _client(tmp_path, "clean")
|
||||
await client.start()
|
||||
try:
|
||||
assert client.is_running
|
||||
version = await client.open_file(str(f), language_id="python")
|
||||
assert version == 0
|
||||
await client.wait_for_diagnostics(str(f), version, mode="document")
|
||||
diags = client.diagnostics_for(str(f))
|
||||
assert diags == []
|
||||
finally:
|
||||
await client.shutdown()
|
||||
assert not client.is_running
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_receives_published_errors(tmp_path: Path):
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("print('hi')\n")
|
||||
|
||||
client = _client(tmp_path, "errors")
|
||||
await client.start()
|
||||
try:
|
||||
version = await client.open_file(str(f), language_id="python")
|
||||
await client.wait_for_diagnostics(str(f), version, mode="document")
|
||||
diags = client.diagnostics_for(str(f))
|
||||
assert len(diags) == 1
|
||||
d = diags[0]
|
||||
assert d["severity"] == 1
|
||||
assert d["code"] == "MOCK001"
|
||||
assert d["source"] == "mock-lsp"
|
||||
assert "synthetic error" in d["message"]
|
||||
finally:
|
||||
await client.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_didchange_bumps_version(tmp_path: Path):
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("print('hi')\n")
|
||||
|
||||
client = _client(tmp_path, "errors")
|
||||
await client.start()
|
||||
try:
|
||||
v0 = await client.open_file(str(f), language_id="python")
|
||||
f.write_text("print('hi 2')\n")
|
||||
v1 = await client.open_file(str(f), language_id="python") # re-open path = didChange
|
||||
assert v1 == v0 + 1
|
||||
await client.wait_for_diagnostics(str(f), v1, mode="document")
|
||||
# Mock pushed a diagnostic for both events; merged view has one
|
||||
# entry (push store keyed by file path).
|
||||
diags = client.diagnostics_for(str(f))
|
||||
assert len(diags) == 1
|
||||
finally:
|
||||
await client.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_handles_crashing_server(tmp_path: Path):
|
||||
"""When the server exits right after initialize, subsequent requests
|
||||
fail gracefully (not hang)."""
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
|
||||
client = _client(tmp_path, "crash")
|
||||
await client.start() # should succeed (mock answers initialize before crashing)
|
||||
# Give the OS a moment to deliver the EOF.
|
||||
await asyncio.sleep(0.2)
|
||||
# The reader loop should detect EOF and mark pending requests as failed.
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
client.open_file(str(f), language_id="python"), timeout=2.0
|
||||
)
|
||||
except Exception:
|
||||
pass # any exception is acceptable; the contract is "doesn't hang"
|
||||
await client.shutdown()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_shutdown_idempotent(tmp_path: Path):
|
||||
"""Calling shutdown twice must be safe."""
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
client = _client(tmp_path, "clean")
|
||||
await client.start()
|
||||
await client.shutdown()
|
||||
await client.shutdown() # must not raise
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_client_diagnostics_are_deduped(tmp_path: Path):
|
||||
"""Repeated identical pushes must not produce duplicate diagnostics."""
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
client = _client(tmp_path, "errors")
|
||||
await client.start()
|
||||
try:
|
||||
for _ in range(3):
|
||||
v = await client.open_file(str(f), language_id="python")
|
||||
await client.wait_for_diagnostics(str(f), v, mode="document")
|
||||
diags = client.diagnostics_for(str(f))
|
||||
# Push store overwrites on every notification — should have 1.
|
||||
assert len(diags) == 1
|
||||
finally:
|
||||
await client.shutdown()
|
||||
146
tests/agent/lsp/test_diagnostics_field.py
Normal file
146
tests/agent/lsp/test_diagnostics_field.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Tests for the ``lsp_diagnostics`` field on WriteResult / PatchResult.
|
||||
|
||||
The field exists so the agent can read syntax errors (``lint``) and
|
||||
semantic errors (``lsp_diagnostics``) as separate signals rather than
|
||||
having LSP output prepended to the lint string.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import (
|
||||
PatchResult,
|
||||
ShellFileOperations,
|
||||
WriteResult,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dataclass shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_writeresult_lsp_diagnostics_optional():
|
||||
r = WriteResult()
|
||||
assert r.lsp_diagnostics is None
|
||||
|
||||
|
||||
def test_writeresult_to_dict_omits_field_when_none():
|
||||
r = WriteResult(bytes_written=10)
|
||||
assert "lsp_diagnostics" not in r.to_dict()
|
||||
|
||||
|
||||
def test_writeresult_to_dict_includes_field_when_set():
|
||||
r = WriteResult(bytes_written=10, lsp_diagnostics="<diagnostics>...</diagnostics>")
|
||||
d = r.to_dict()
|
||||
assert d["lsp_diagnostics"] == "<diagnostics>...</diagnostics>"
|
||||
|
||||
|
||||
def test_patchresult_to_dict_includes_field_when_set():
|
||||
r = PatchResult(success=True, lsp_diagnostics="ERROR [1:1] thing")
|
||||
d = r.to_dict()
|
||||
assert d["lsp_diagnostics"] == "ERROR [1:1] thing"
|
||||
|
||||
|
||||
def test_patchresult_to_dict_omits_field_when_none():
|
||||
r = PatchResult(success=True)
|
||||
assert "lsp_diagnostics" not in r.to_dict()
|
||||
|
||||
|
||||
def test_patchresult_to_dict_omits_field_when_empty_string():
|
||||
"""Empty string counts as falsy — agent shouldn't see an empty field."""
|
||||
r = PatchResult(success=True, lsp_diagnostics="")
|
||||
assert "lsp_diagnostics" not in r.to_dict()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Channel separation: lint and lsp_diagnostics stay independent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_lint_and_lsp_diagnostics_are_separate_channels():
|
||||
"""A WriteResult can carry BOTH a syntax-error lint AND an LSP
|
||||
diagnostic block. They belong in separate fields."""
|
||||
r = WriteResult(
|
||||
bytes_written=42,
|
||||
lint={"status": "error", "output": "SyntaxError: ..."},
|
||||
lsp_diagnostics="<diagnostics>ERROR [1:5] type mismatch</diagnostics>",
|
||||
)
|
||||
d = r.to_dict()
|
||||
assert "lint" in d
|
||||
assert "lsp_diagnostics" in d
|
||||
assert d["lint"]["output"] == "SyntaxError: ..."
|
||||
assert "type mismatch" in d["lsp_diagnostics"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# write_file populates the field via _maybe_lsp_diagnostics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_write_file_populates_lsp_diagnostics_when_layer_returns_block(tmp_path):
|
||||
"""When the LSP layer returns a non-empty block, write_file puts it
|
||||
into the ``lsp_diagnostics`` field — NOT into ``lint.output``."""
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
|
||||
target = tmp_path / "x.py"
|
||||
|
||||
block = "<diagnostics file=\"x.py\">\nERROR [1:1] problem\n</diagnostics>"
|
||||
|
||||
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
|
||||
res = fops.write_file(str(target), "x = 1\n")
|
||||
|
||||
assert res.lsp_diagnostics == block
|
||||
# Lint is the syntax check, which is clean for "x = 1" — must NOT
|
||||
# have the LSP block folded into it.
|
||||
assert res.lint == {"status": "ok", "output": ""}
|
||||
|
||||
|
||||
def test_write_file_lsp_diagnostics_none_when_layer_returns_empty(tmp_path):
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
|
||||
target = tmp_path / "x.py"
|
||||
|
||||
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=""):
|
||||
res = fops.write_file(str(target), "x = 1\n")
|
||||
|
||||
assert res.lsp_diagnostics is None
|
||||
|
||||
|
||||
def test_write_file_skips_lsp_when_syntax_failed(tmp_path):
|
||||
"""If the syntax check finds errors, the LSP layer should not be
|
||||
consulted (a file that won't parse won't yield meaningful semantic
|
||||
diagnostics)."""
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
|
||||
target = tmp_path / "broken.py"
|
||||
|
||||
with patch.object(fops, "_maybe_lsp_diagnostics") as mock_lsp:
|
||||
res = fops.write_file(str(target), "def x(:\n") # syntax error
|
||||
assert mock_lsp.call_count == 0
|
||||
assert res.lsp_diagnostics is None
|
||||
assert res.lint["status"] == "error"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# patch_replace propagates the field from the inner write_file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_patch_replace_propagates_lsp_diagnostics(tmp_path):
|
||||
"""patch_replace's internal write_file populates lsp_diagnostics —
|
||||
the outer PatchResult must carry it forward."""
|
||||
fops = ShellFileOperations(LocalEnvironment(cwd=str(tmp_path)))
|
||||
target = tmp_path / "x.py"
|
||||
target.write_text("x = 1\n")
|
||||
|
||||
block = "<diagnostics>ERROR [1:5] semantic issue</diagnostics>"
|
||||
|
||||
with patch.object(fops, "_maybe_lsp_diagnostics", return_value=block):
|
||||
res = fops.patch_replace(str(target), "x = 1", "x = 2")
|
||||
|
||||
assert res.success is True
|
||||
assert res.lsp_diagnostics == block
|
||||
199
tests/agent/lsp/test_eventlog.py
Normal file
199
tests/agent/lsp/test_eventlog.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
"""Tests for the structured logging dedup model.
|
||||
|
||||
The contract: a 1000-write session in one project should emit exactly
|
||||
ONE INFO line ("active for <root>") at the default INFO threshold.
|
||||
Steady-state events stay at DEBUG; first-time-seen events surface
|
||||
once at INFO/WARNING.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp import eventlog
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset():
|
||||
eventlog.reset_announce_caches()
|
||||
yield
|
||||
eventlog.reset_announce_caches()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def caplog_lsp(caplog):
|
||||
caplog.set_level(logging.DEBUG, logger="hermes.lint.lsp")
|
||||
return caplog
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Steady-state silence (DEBUG)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clean_emits_at_debug(caplog_lsp):
|
||||
for _ in range(10):
|
||||
eventlog.log_clean("pyright", "/proj/x.py")
|
||||
info_records = [r for r in caplog_lsp.records if r.levelno >= logging.INFO]
|
||||
debug_records = [r for r in caplog_lsp.records if r.levelno == logging.DEBUG]
|
||||
assert info_records == []
|
||||
assert len(debug_records) == 10
|
||||
|
||||
|
||||
def test_disabled_emits_at_debug(caplog_lsp):
|
||||
eventlog.log_disabled("pyright", "/x.py", "feature off")
|
||||
eventlog.log_disabled("pyright", "/x.py", "ext not mapped")
|
||||
assert all(r.levelno == logging.DEBUG for r in caplog_lsp.records)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State transitions: INFO once, DEBUG thereafter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_active_for_fires_once_per_root(caplog_lsp):
|
||||
for _ in range(50):
|
||||
eventlog.log_active("pyright", "/proj")
|
||||
info_records = [
|
||||
r for r in caplog_lsp.records
|
||||
if r.levelno == logging.INFO and "active for" in r.getMessage()
|
||||
]
|
||||
assert len(info_records) == 1
|
||||
|
||||
|
||||
def test_active_for_fires_per_distinct_root(caplog_lsp):
|
||||
eventlog.log_active("pyright", "/proj-a")
|
||||
eventlog.log_active("pyright", "/proj-b")
|
||||
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
|
||||
assert len(info) == 2
|
||||
|
||||
|
||||
def test_active_for_separate_per_server(caplog_lsp):
|
||||
eventlog.log_active("pyright", "/proj")
|
||||
eventlog.log_active("typescript", "/proj")
|
||||
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
|
||||
assert len(info) == 2
|
||||
|
||||
|
||||
def test_no_project_root_fires_once_per_path(caplog_lsp):
|
||||
for _ in range(5):
|
||||
eventlog.log_no_project_root("pyright", "/orphan.py")
|
||||
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
|
||||
assert len(info) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Diagnostics events fire INFO every time
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_diagnostics_always_info(caplog_lsp):
|
||||
for i in range(5):
|
||||
eventlog.log_diagnostics("pyright", f"/x{i}.py", 1)
|
||||
info = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
|
||||
assert len(info) == 5
|
||||
assert all("diags" in r.getMessage() for r in info)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action-required: WARNING once, DEBUG thereafter (or per call for novel events)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_server_unavailable_warns_once_per_binary(caplog_lsp):
|
||||
for _ in range(20):
|
||||
eventlog.log_server_unavailable("pyright", "pyright-langserver")
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 1
|
||||
assert "pyright-langserver" in warns[0].getMessage()
|
||||
|
||||
|
||||
def test_server_unavailable_separate_per_binary(caplog_lsp):
|
||||
eventlog.log_server_unavailable("pyright", "pyright-langserver")
|
||||
eventlog.log_server_unavailable("typescript", "typescript-language-server")
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 2
|
||||
|
||||
|
||||
def test_no_server_configured_warns_once(caplog_lsp):
|
||||
for _ in range(10):
|
||||
eventlog.log_no_server_configured("pyright")
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 1
|
||||
|
||||
|
||||
def test_timeout_warns_every_call(caplog_lsp):
|
||||
for _ in range(3):
|
||||
eventlog.log_timeout("pyright", "/x.py")
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 3
|
||||
|
||||
|
||||
def test_server_error_warns_every_call(caplog_lsp):
|
||||
for _ in range(3):
|
||||
eventlog.log_server_error("pyright", "/x.py", RuntimeError("boom"))
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 3
|
||||
|
||||
|
||||
def test_spawn_failed_warns(caplog_lsp):
|
||||
eventlog.log_spawn_failed("pyright", "/proj", FileNotFoundError("nope"))
|
||||
warns = [r for r in caplog_lsp.records if r.levelno == logging.WARNING]
|
||||
assert len(warns) == 1
|
||||
assert "spawn/initialize failed" in warns[0].getMessage()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Format: log lines all carry the lsp[<server_id>] prefix for grep
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_log_lines_use_lsp_prefix(caplog_lsp):
|
||||
eventlog.log_clean("pyright", "/x.py")
|
||||
eventlog.log_active("pyright", "/proj")
|
||||
eventlog.log_diagnostics("typescript", "/y.ts", 2)
|
||||
for r in caplog_lsp.records:
|
||||
assert r.getMessage().startswith("lsp[")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Steady-state contract: 1000 clean writes → 1 INFO at most
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_thousand_clean_writes_emit_one_info(caplog_lsp):
|
||||
"""A long session writes lots of files cleanly; agent.log should
|
||||
show ONE 'active for' INFO and zero other INFO lines."""
|
||||
eventlog.log_active("pyright", "/proj")
|
||||
for _ in range(1000):
|
||||
eventlog.log_clean("pyright", "/proj/x.py")
|
||||
info_records = [r for r in caplog_lsp.records if r.levelno == logging.INFO]
|
||||
assert len(info_records) == 1
|
||||
assert "active for" in info_records[0].getMessage()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Path shortening
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_short_path_uses_relative_when_inside_cwd(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
sub = tmp_path / "x.py"
|
||||
sub.write_text("")
|
||||
out = eventlog._short_path(str(sub))
|
||||
assert out == "x.py"
|
||||
|
||||
|
||||
def test_short_path_keeps_absolute_when_outside(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path / "a") if (tmp_path / "a").exists() else None
|
||||
monkeypatch.chdir(tmp_path)
|
||||
other = "/var/log/foo.txt"
|
||||
out = eventlog._short_path(other)
|
||||
# Outside cwd: keeps absolute (no leading "../")
|
||||
assert out == "/var/log/foo.txt" or not out.startswith("..")
|
||||
|
||||
|
||||
def test_short_path_handles_empty_string():
|
||||
assert eventlog._short_path("") == ""
|
||||
279
tests/agent/lsp/test_install_and_lint_fixes.py
Normal file
279
tests/agent/lsp/test_install_and_lint_fixes.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
"""Tests for follow-up fixes to the LSP integration (PR after #24168).
|
||||
|
||||
Covers:
|
||||
|
||||
1. ``typescript-language-server`` install recipe pulls in ``typescript``
|
||||
alongside the server, so the npm install command targets both.
|
||||
2. ``hermes lsp status`` surfaces a ``Backend warnings`` section when
|
||||
bash-language-server is installed but ``shellcheck`` is missing.
|
||||
3. ``_check_lint`` returns ``skipped`` (not ``error``) when the linter
|
||||
command exists on PATH but couldn't actually run — e.g. ``npx tsc``
|
||||
without the typescript SDK installed. This is what unblocks the
|
||||
LSP semantic tier on TypeScript files when the user doesn't also
|
||||
have a project-level ``tsc``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
from contextlib import redirect_stdout
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp.install import INSTALL_RECIPES
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix 1: typescript install recipe carries the typescript SDK
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_typescript_recipe_includes_typescript_sdk():
|
||||
recipe = INSTALL_RECIPES["typescript-language-server"]
|
||||
extras = recipe.get("extra_pkgs") or []
|
||||
assert "typescript" in extras, (
|
||||
"typescript-language-server requires the `typescript` SDK as a "
|
||||
"sibling install — without it `initialize` fails with "
|
||||
"'Could not find a valid TypeScript installation'."
|
||||
)
|
||||
|
||||
|
||||
def test_install_npm_passes_extras_to_npm_command(tmp_path, monkeypatch):
|
||||
"""Verify the npm subprocess is invoked with both pkg AND extras."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
captured["cmd"] = cmd
|
||||
# Pretend npm succeeded but binary doesn't exist — install code
|
||||
# will return None, which is fine for this test.
|
||||
return MagicMock(returncode=0, stderr="")
|
||||
|
||||
from agent.lsp import install as install_mod
|
||||
|
||||
monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
|
||||
|
||||
install_mod._install_npm("typescript-language-server", "typescript-language-server",
|
||||
extra_pkgs=["typescript"])
|
||||
|
||||
cmd = captured["cmd"]
|
||||
assert "typescript-language-server" in cmd
|
||||
assert "typescript" in cmd
|
||||
# Both must come AFTER the npm flags, in install-target position
|
||||
install_idx = cmd.index("install")
|
||||
assert cmd.index("typescript-language-server") > install_idx
|
||||
assert cmd.index("typescript") > install_idx
|
||||
|
||||
|
||||
def test_install_npm_works_without_extras(tmp_path, monkeypatch):
|
||||
"""Backwards compat: pyright-style recipes (no extras) still install."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
captured["cmd"] = cmd
|
||||
return MagicMock(returncode=0, stderr="")
|
||||
|
||||
from agent.lsp import install as install_mod
|
||||
|
||||
monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr(install_mod.shutil, "which", lambda c: "/usr/bin/npm" if c == "npm" else None)
|
||||
|
||||
install_mod._install_npm("pyright", "pyright-langserver")
|
||||
|
||||
cmd = captured["cmd"]
|
||||
assert "pyright" in cmd
|
||||
# Should not blow up when extra_pkgs is omitted/None
|
||||
install_targets = [c for c in cmd if not c.startswith("-") and c not in (
|
||||
"install", "--prefix", str(install_mod.hermes_lsp_bin_dir().parent),
|
||||
"/usr/bin/npm",
|
||||
)]
|
||||
assert install_targets == ["pyright"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_backend_warnings_quiet_when_bash_not_installed(tmp_path, monkeypatch):
|
||||
"""No bash → no warning."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
from agent.lsp import cli as lsp_cli
|
||||
|
||||
with patch("shutil.which", return_value=None):
|
||||
notes = lsp_cli._backend_warnings()
|
||||
assert notes == []
|
||||
|
||||
|
||||
def test_backend_warnings_quiet_when_bash_and_shellcheck_both_present(tmp_path, monkeypatch):
|
||||
"""Both installed → no warning."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
from agent.lsp import cli as lsp_cli
|
||||
|
||||
def which(name):
|
||||
return f"/usr/bin/{name}" # both found
|
||||
|
||||
with patch("shutil.which", side_effect=which):
|
||||
notes = lsp_cli._backend_warnings()
|
||||
assert notes == []
|
||||
|
||||
|
||||
def test_backend_warnings_fires_when_bash_installed_but_shellcheck_missing(tmp_path, monkeypatch):
|
||||
"""The exact scenario from the bug report."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
from agent.lsp import cli as lsp_cli
|
||||
|
||||
def which(name):
|
||||
if name == "bash-language-server":
|
||||
return "/fake/bin/bash-language-server"
|
||||
return None # shellcheck missing
|
||||
|
||||
with patch("shutil.which", side_effect=which):
|
||||
notes = lsp_cli._backend_warnings()
|
||||
assert len(notes) == 1
|
||||
assert "shellcheck" in notes[0].lower()
|
||||
assert "bash-language-server" in notes[0].lower()
|
||||
|
||||
|
||||
def test_status_output_includes_backend_warnings_section(tmp_path, monkeypatch):
|
||||
"""End-to-end: status command output includes the warning section."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
# Pretend bash-language-server is installed but shellcheck is missing
|
||||
def which(name):
|
||||
if name == "bash-language-server":
|
||||
return "/fake/bin/bash-language-server"
|
||||
return None
|
||||
|
||||
from agent.lsp import cli as lsp_cli
|
||||
|
||||
buf = io.StringIO()
|
||||
with patch("shutil.which", side_effect=which), redirect_stdout(buf):
|
||||
lsp_cli._cmd_status(emit_json=False)
|
||||
|
||||
output = buf.getvalue()
|
||||
assert "Backend warnings" in output
|
||||
assert "shellcheck" in output
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix 3: tier-1 lint treats unusable linters as ``skipped``, not ``error``
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_npx_tsc_missing_treated_as_skipped():
|
||||
"""The original bug: ``npx tsc`` errors when tsc isn't installed.
|
||||
|
||||
Without this fix, the lint result is ``error``, which means the LSP
|
||||
semantic tier (gated on ``success or skipped``) is skipped — the user
|
||||
gets a useless tooling-error message instead of real diagnostics.
|
||||
"""
|
||||
from tools.file_operations import _looks_like_linter_unusable
|
||||
|
||||
npx_failure_output = (
|
||||
" \n"
|
||||
" This is not the tsc command you are looking for \n"
|
||||
" \n"
|
||||
"\n"
|
||||
"To get access to the TypeScript compiler, tsc, from the command line either:\n"
|
||||
"- Use npm install typescript to first add TypeScript to your project before using npx\n"
|
||||
)
|
||||
|
||||
assert _looks_like_linter_unusable("npx", npx_failure_output) is True
|
||||
|
||||
|
||||
def test_real_lint_error_not_classified_as_unusable():
|
||||
"""A genuine TypeScript type error must NOT be misclassified."""
|
||||
from tools.file_operations import _looks_like_linter_unusable
|
||||
|
||||
real_error = (
|
||||
"bad.ts:5:1 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
|
||||
"5 const x: string = greet(42);\n"
|
||||
" ~~~~~~~~~~~~~~~\n"
|
||||
)
|
||||
|
||||
assert _looks_like_linter_unusable("npx", real_error) is False
|
||||
|
||||
|
||||
def test_unknown_base_cmd_returns_false():
|
||||
"""Unfamiliar linters fall through and use the normal error path."""
|
||||
from tools.file_operations import _looks_like_linter_unusable
|
||||
|
||||
assert _looks_like_linter_unusable("eslint", "any output") is False
|
||||
assert _looks_like_linter_unusable("", "anything") is False
|
||||
|
||||
|
||||
def test_check_lint_returns_skipped_when_npx_tsc_unusable(tmp_path):
|
||||
"""Integration: _check_lint sees npx exit non-zero with the npx banner
|
||||
and returns a ``skipped`` LintResult so LSP can still run."""
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
ts_file = tmp_path / "bad.ts"
|
||||
ts_file.write_text("const x: string = 42;\n")
|
||||
|
||||
env = LocalEnvironment()
|
||||
fops = ShellFileOperations(env)
|
||||
|
||||
# Patch _exec to simulate ``npx tsc`` failing because tsc is missing.
|
||||
npx_banner = (
|
||||
" \n"
|
||||
" This is not the tsc command you are looking for \n"
|
||||
)
|
||||
|
||||
def fake_exec(cmd, **kwargs):
|
||||
result = MagicMock()
|
||||
result.exit_code = 1
|
||||
result.stdout = npx_banner
|
||||
return result
|
||||
|
||||
with patch.object(fops, "_exec", side_effect=fake_exec), \
|
||||
patch.object(fops, "_has_command", return_value=True):
|
||||
lint = fops._check_lint(str(ts_file))
|
||||
|
||||
assert lint.skipped is True, (
|
||||
f"expected skipped (so LSP runs); got success={lint.success}, "
|
||||
f"output={lint.output!r}"
|
||||
)
|
||||
assert "not usable" in (lint.message or "")
|
||||
|
||||
|
||||
def test_check_lint_returns_error_for_real_ts_type_errors(tmp_path):
|
||||
"""Sanity: real TypeScript errors still go through the error path."""
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
ts_file = tmp_path / "bad.ts"
|
||||
ts_file.write_text("const x: string = 42;\n")
|
||||
|
||||
env = LocalEnvironment()
|
||||
fops = ShellFileOperations(env)
|
||||
|
||||
real_tsc_error = (
|
||||
"bad.ts:1:7 - error TS2322: Type 'number' is not assignable to type 'string'.\n"
|
||||
"1 const x: string = 42;\n"
|
||||
" ~\n"
|
||||
"Found 1 error.\n"
|
||||
)
|
||||
|
||||
def fake_exec(cmd, **kwargs):
|
||||
result = MagicMock()
|
||||
result.exit_code = 1
|
||||
result.stdout = real_tsc_error
|
||||
return result
|
||||
|
||||
with patch.object(fops, "_exec", side_effect=fake_exec), \
|
||||
patch.object(fops, "_has_command", return_value=True):
|
||||
lint = fops._check_lint(str(ts_file))
|
||||
|
||||
assert lint.skipped is False
|
||||
assert lint.success is False
|
||||
assert "TS2322" in lint.output
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
pytest.main([__file__, "-v"])
|
||||
144
tests/agent/lsp/test_lifecycle.py
Normal file
144
tests/agent/lsp/test_lifecycle.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
"""Tests for service-singleton lifecycle: atexit handler, idempotent shutdown.
|
||||
|
||||
These cover the exit-cleanup behavior added to plug the language-server
|
||||
process leak — without the atexit hook, ``hermes chat`` exits while
|
||||
pyright/gopls/etc. are still alive on the host.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import lsp as lsp_module
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_singleton():
|
||||
"""Force a clean module state before each test.
|
||||
|
||||
Tests in this file share process-global state (the lazy
|
||||
singleton + atexit registration flag); reset both before and
|
||||
after every test so order doesn't matter.
|
||||
"""
|
||||
lsp_module._service = None
|
||||
lsp_module._atexit_registered = False
|
||||
yield
|
||||
lsp_module._service = None
|
||||
lsp_module._atexit_registered = False
|
||||
|
||||
|
||||
def test_get_service_registers_atexit_handler_once(monkeypatch):
|
||||
"""First call to ``get_service`` must register an atexit handler;
|
||||
subsequent calls must NOT register another one (Python's ``atexit``
|
||||
runs every registered callable, so a duplicate would shutdown
|
||||
twice — harmless but wasteful)."""
|
||||
fake_svc = MagicMock()
|
||||
fake_svc.is_active.return_value = True
|
||||
monkeypatch.setattr(
|
||||
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
|
||||
)
|
||||
|
||||
registrations = []
|
||||
|
||||
def fake_register(fn):
|
||||
registrations.append(fn)
|
||||
|
||||
monkeypatch.setattr(atexit, "register", fake_register)
|
||||
|
||||
a = lsp_module.get_service()
|
||||
b = lsp_module.get_service()
|
||||
c = lsp_module.get_service()
|
||||
|
||||
assert a is fake_svc
|
||||
assert b is fake_svc
|
||||
assert c is fake_svc
|
||||
assert len(registrations) == 1
|
||||
# The registered callable must be our internal shutdown wrapper.
|
||||
assert registrations[0] is lsp_module._atexit_shutdown
|
||||
|
||||
|
||||
def test_atexit_shutdown_calls_shutdown_service(monkeypatch):
|
||||
"""The atexit-registered wrapper invokes ``shutdown_service`` and
|
||||
swallows any exception — by the time atexit fires, the user has
|
||||
already seen the response and a noisy traceback would be clutter."""
|
||||
called = []
|
||||
monkeypatch.setattr(
|
||||
lsp_module, "shutdown_service", lambda: called.append("shutdown")
|
||||
)
|
||||
lsp_module._atexit_shutdown()
|
||||
assert called == ["shutdown"]
|
||||
|
||||
|
||||
def test_atexit_shutdown_swallows_exceptions(monkeypatch):
|
||||
def boom():
|
||||
raise RuntimeError("server already dead")
|
||||
|
||||
monkeypatch.setattr(lsp_module, "shutdown_service", boom)
|
||||
# Must not raise.
|
||||
lsp_module._atexit_shutdown()
|
||||
|
||||
|
||||
def test_shutdown_service_idempotent(monkeypatch):
|
||||
"""Calling shutdown twice must be safe — first call cleans up,
|
||||
second call no-ops (nothing to shut down)."""
|
||||
fake_svc = MagicMock()
|
||||
fake_svc.is_active.return_value = True
|
||||
fake_svc.shutdown = MagicMock()
|
||||
monkeypatch.setattr(
|
||||
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
|
||||
)
|
||||
monkeypatch.setattr(atexit, "register", lambda fn: None)
|
||||
|
||||
lsp_module.get_service()
|
||||
lsp_module.shutdown_service()
|
||||
lsp_module.shutdown_service() # must not raise
|
||||
|
||||
assert fake_svc.shutdown.call_count == 1
|
||||
|
||||
|
||||
def test_shutdown_service_no_op_when_never_started():
|
||||
"""Calling shutdown without ever creating the service is safe."""
|
||||
lsp_module.shutdown_service() # must not raise
|
||||
|
||||
|
||||
def test_shutdown_service_swallows_exception(monkeypatch):
|
||||
"""An exception during ``svc.shutdown()`` must not propagate —
|
||||
the caller (often atexit) has nothing useful to do with it."""
|
||||
fake_svc = MagicMock()
|
||||
fake_svc.is_active.return_value = True
|
||||
fake_svc.shutdown = MagicMock(side_effect=RuntimeError("kill -9 already"))
|
||||
monkeypatch.setattr(
|
||||
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
|
||||
)
|
||||
monkeypatch.setattr(atexit, "register", lambda fn: None)
|
||||
|
||||
lsp_module.get_service()
|
||||
lsp_module.shutdown_service() # must not raise
|
||||
|
||||
|
||||
def test_get_service_returns_none_for_inactive_service(monkeypatch):
|
||||
"""A service whose ``is_active()`` returns False is treated as
|
||||
not running — callers see ``None`` and fall back."""
|
||||
fake_svc = MagicMock()
|
||||
fake_svc.is_active.return_value = False
|
||||
monkeypatch.setattr(
|
||||
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: fake_svc)
|
||||
)
|
||||
monkeypatch.setattr(atexit, "register", lambda fn: None)
|
||||
|
||||
assert lsp_module.get_service() is None
|
||||
# Subsequent call returns None too — but the inactive instance is
|
||||
# cached so we don't re-build it on every check.
|
||||
assert lsp_module.get_service() is None
|
||||
|
||||
|
||||
def test_get_service_returns_none_when_create_fails(monkeypatch):
|
||||
"""Service factory returning ``None`` (no config, etc.) propagates."""
|
||||
monkeypatch.setattr(
|
||||
lsp_module.LSPService, "create_from_config", classmethod(lambda cls: None)
|
||||
)
|
||||
monkeypatch.setattr(atexit, "register", lambda fn: None)
|
||||
|
||||
assert lsp_module.get_service() is None
|
||||
197
tests/agent/lsp/test_protocol.py
Normal file
197
tests/agent/lsp/test_protocol.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
"""Tests for the LSP protocol framing layer.
|
||||
|
||||
The framer is small but load-bearing — Content-Length parsing is the
|
||||
single most common reason for hand-rolled LSP clients to silently
|
||||
deadlock. These tests exercise:
|
||||
|
||||
- exact wire format of outgoing messages (encode_message)
|
||||
- partial-read tolerance + EOF handling (read_message)
|
||||
- envelope helpers (request, response, notification, error)
|
||||
- message classification
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from agent.lsp.protocol import (
|
||||
ERROR_CONTENT_MODIFIED,
|
||||
ERROR_METHOD_NOT_FOUND,
|
||||
LSPProtocolError,
|
||||
LSPRequestError,
|
||||
classify_message,
|
||||
encode_message,
|
||||
make_error_response,
|
||||
make_notification,
|
||||
make_request,
|
||||
make_response,
|
||||
read_message,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# encode_message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_encode_message_uses_compact_separators_and_utf8():
|
||||
msg = {"jsonrpc": "2.0", "id": 1, "method": "x", "params": {"k": "ä"}}
|
||||
out = encode_message(msg)
|
||||
# Header is plain ASCII Content-Length CRLF CRLF
|
||||
header_end = out.index(b"\r\n\r\n") + 4
|
||||
header = out[:header_end].decode("ascii")
|
||||
body = out[header_end:]
|
||||
assert "Content-Length:" in header
|
||||
declared = int(header.split("Content-Length:")[1].split("\r\n")[0].strip())
|
||||
# Declared length must equal actual body bytes.
|
||||
assert declared == len(body)
|
||||
# Body parses as JSON and round-trips.
|
||||
parsed = json.loads(body.decode("utf-8"))
|
||||
assert parsed == msg
|
||||
# Body uses compact separators (no spaces between kv).
|
||||
assert b'"id":1' in body
|
||||
|
||||
|
||||
def test_encode_message_handles_unicode_in_strings():
|
||||
msg = {"jsonrpc": "2.0", "method": "log", "params": {"text": "🚀 ünıcödé"}}
|
||||
out = encode_message(msg)
|
||||
header_end = out.index(b"\r\n\r\n") + 4
|
||||
declared = int(out[: out.index(b"\r\n")].split(b": ")[1])
|
||||
assert declared == len(out[header_end:])
|
||||
assert json.loads(out[header_end:].decode("utf-8")) == msg
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# read_message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _stream_from_bytes(data: bytes) -> asyncio.StreamReader:
|
||||
"""Build an asyncio.StreamReader pre-populated with ``data``."""
|
||||
reader = asyncio.StreamReader()
|
||||
reader.feed_data(data)
|
||||
reader.feed_eof()
|
||||
return reader
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_round_trip():
|
||||
msg = {"jsonrpc": "2.0", "method": "ping"}
|
||||
reader = await _stream_from_bytes(encode_message(msg))
|
||||
parsed = await read_message(reader)
|
||||
assert parsed == msg
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_clean_eof_returns_none():
|
||||
reader = await _stream_from_bytes(b"")
|
||||
assert await read_message(reader) is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_truncated_body_raises():
|
||||
msg = encode_message({"jsonrpc": "2.0", "method": "x"})
|
||||
truncated = msg[: -3] # cut the body
|
||||
reader = await _stream_from_bytes(truncated)
|
||||
with pytest.raises(LSPProtocolError):
|
||||
await read_message(reader)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_missing_content_length_raises():
|
||||
bad = b"X-Other: 5\r\n\r\n12345"
|
||||
reader = await _stream_from_bytes(bad)
|
||||
with pytest.raises(LSPProtocolError):
|
||||
await read_message(reader)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_two_messages_back_to_back():
|
||||
a = encode_message({"jsonrpc": "2.0", "method": "a"})
|
||||
b = encode_message({"jsonrpc": "2.0", "method": "b"})
|
||||
reader = await _stream_from_bytes(a + b)
|
||||
assert (await read_message(reader))["method"] == "a"
|
||||
assert (await read_message(reader))["method"] == "b"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_read_message_rejects_runaway_header():
|
||||
"""A pathological server that streams headers without ever emitting
|
||||
the CRLF-CRLF terminator must not loop forever — the 8 KiB cap kicks
|
||||
in and surfaces a protocol error."""
|
||||
flood = (b"X-Junk: " + b"A" * 200 + b"\r\n") * 60 # ~12 KiB worth
|
||||
reader = await _stream_from_bytes(flood)
|
||||
with pytest.raises(LSPProtocolError) as exc:
|
||||
await read_message(reader)
|
||||
assert "8 KiB" in str(exc.value)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# envelope helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_make_request_includes_id_and_method():
|
||||
msg = make_request(7, "ping", {"v": 1})
|
||||
assert msg == {"jsonrpc": "2.0", "id": 7, "method": "ping", "params": {"v": 1}}
|
||||
|
||||
|
||||
def test_make_request_omits_params_when_none():
|
||||
msg = make_request(7, "ping", None)
|
||||
assert "params" not in msg
|
||||
|
||||
|
||||
def test_make_notification_omits_id():
|
||||
msg = make_notification("log", {"line": "hi"})
|
||||
assert "id" not in msg
|
||||
assert msg["method"] == "log"
|
||||
|
||||
|
||||
def test_make_response_carries_result():
|
||||
msg = make_response(7, {"ok": True})
|
||||
assert msg["id"] == 7 and msg["result"] == {"ok": True}
|
||||
|
||||
|
||||
def test_make_error_response_shape():
|
||||
msg = make_error_response(7, ERROR_CONTENT_MODIFIED, "stale", {"hint": "retry"})
|
||||
assert msg["error"]["code"] == ERROR_CONTENT_MODIFIED
|
||||
assert msg["error"]["message"] == "stale"
|
||||
assert msg["error"]["data"] == {"hint": "retry"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# classify_message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_classify_message_request():
|
||||
msg = {"jsonrpc": "2.0", "id": 1, "method": "x"}
|
||||
assert classify_message(msg) == ("request", 1)
|
||||
|
||||
|
||||
def test_classify_message_response():
|
||||
msg = {"jsonrpc": "2.0", "id": 1, "result": None}
|
||||
assert classify_message(msg) == ("response", 1)
|
||||
|
||||
|
||||
def test_classify_message_notification():
|
||||
msg = {"jsonrpc": "2.0", "method": "log"}
|
||||
assert classify_message(msg) == ("notification", "log")
|
||||
|
||||
|
||||
def test_classify_message_invalid():
|
||||
assert classify_message({"id": 1})[0] == "invalid"
|
||||
assert classify_message({"jsonrpc": "1.0", "method": "x"})[0] == "invalid"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LSPRequestError
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_lsp_request_error_carries_code_and_data():
|
||||
e = LSPRequestError(ERROR_METHOD_NOT_FOUND, "no", {"x": 1})
|
||||
assert e.code == ERROR_METHOD_NOT_FOUND
|
||||
assert e.message == "no"
|
||||
assert e.data == {"x": 1}
|
||||
94
tests/agent/lsp/test_reporter.py
Normal file
94
tests/agent/lsp/test_reporter.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""Tests for the diagnostic reporter (formatting layer)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from agent.lsp.reporter import (
|
||||
DEFAULT_SEVERITIES,
|
||||
MAX_PER_FILE,
|
||||
format_diagnostic,
|
||||
report_for_file,
|
||||
truncate,
|
||||
)
|
||||
|
||||
|
||||
def _diag(line=0, col=0, sev=1, code="E001", source="ls", msg="oops"):
|
||||
return {
|
||||
"range": {
|
||||
"start": {"line": line, "character": col},
|
||||
"end": {"line": line, "character": col + 1},
|
||||
},
|
||||
"severity": sev,
|
||||
"code": code,
|
||||
"source": source,
|
||||
"message": msg,
|
||||
}
|
||||
|
||||
|
||||
def test_format_diagnostic_uses_one_indexed_position():
|
||||
line = format_diagnostic(_diag(line=4, col=2))
|
||||
assert "[5:3]" in line # +1 on both
|
||||
|
||||
|
||||
def test_format_diagnostic_includes_severity_label():
|
||||
assert format_diagnostic(_diag(sev=1)).startswith("ERROR")
|
||||
assert format_diagnostic(_diag(sev=2)).startswith("WARN")
|
||||
assert format_diagnostic(_diag(sev=3)).startswith("INFO")
|
||||
assert format_diagnostic(_diag(sev=4)).startswith("HINT")
|
||||
|
||||
|
||||
def test_format_diagnostic_includes_code_and_source():
|
||||
line = format_diagnostic(_diag(code="X42", source="src"))
|
||||
assert "[X42]" in line
|
||||
assert "(src)" in line
|
||||
|
||||
|
||||
def test_format_diagnostic_omits_missing_optional_fields():
|
||||
line = format_diagnostic(
|
||||
{
|
||||
"range": {
|
||||
"start": {"line": 0, "character": 0},
|
||||
"end": {"line": 0, "character": 0},
|
||||
},
|
||||
"severity": 1,
|
||||
"message": "bare",
|
||||
}
|
||||
)
|
||||
assert "[" not in line.split("]", 1)[1] # no extra brackets after the position
|
||||
assert "(" not in line
|
||||
|
||||
|
||||
def test_report_for_file_returns_empty_when_only_warnings():
|
||||
"""Default severity filter is ERROR-only."""
|
||||
report = report_for_file("/x.py", [_diag(sev=2)])
|
||||
assert report == ""
|
||||
|
||||
|
||||
def test_report_for_file_emits_block_with_errors():
|
||||
diag = _diag(msg="real error")
|
||||
report = report_for_file("/x.py", [diag])
|
||||
assert "<diagnostics file=\"/x.py\">" in report
|
||||
assert "real error" in report
|
||||
assert "</diagnostics>" in report
|
||||
|
||||
|
||||
def test_report_for_file_caps_at_max_per_file():
|
||||
diags = [_diag(line=i) for i in range(MAX_PER_FILE + 5)]
|
||||
report = report_for_file("/x.py", diags)
|
||||
assert "and 5 more" in report
|
||||
|
||||
|
||||
def test_report_for_file_respects_custom_severities():
|
||||
diag = _diag(sev=2, msg="warn")
|
||||
report = report_for_file("/x.py", [diag], severities=frozenset({1, 2}))
|
||||
assert "warn" in report
|
||||
|
||||
|
||||
def test_truncate_below_limit_unchanged():
|
||||
s = "abc" * 100
|
||||
assert truncate(s, limit=4000) == s
|
||||
|
||||
|
||||
def test_truncate_above_limit_appends_marker():
|
||||
s = "x" * 10000
|
||||
out = truncate(s, limit=200)
|
||||
assert out.endswith("[truncated]")
|
||||
assert len(out) <= 200
|
||||
149
tests/agent/lsp/test_service.py
Normal file
149
tests/agent/lsp/test_service.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
"""Tests for the synchronous LSPService wrapper.
|
||||
|
||||
Drives the service through ``snapshot_baseline`` →
|
||||
``get_diagnostics_sync`` against the mock LSP server, exercising the
|
||||
delta filter that ``tools/file_operations._check_lint_delta`` relies
|
||||
on.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp.manager import LSPService
|
||||
from agent.lsp.servers import (
|
||||
SERVERS,
|
||||
ServerContext,
|
||||
ServerDef,
|
||||
SpawnSpec,
|
||||
find_server_for_file,
|
||||
)
|
||||
|
||||
|
||||
MOCK_SERVER = str(Path(__file__).parent / "_mock_lsp_server.py")
|
||||
|
||||
|
||||
def _install_mock_server(monkeypatch, script: str = "errors", server_id: str = "pyright"):
|
||||
"""Replace one registered server with a wrapper that spawns the mock.
|
||||
|
||||
We reuse ``pyright`` so .py files route to it. This keeps the
|
||||
test free of any LSP toolchain dependency.
|
||||
"""
|
||||
target_index = next(i for i, s in enumerate(SERVERS) if s.server_id == server_id)
|
||||
original = SERVERS[target_index]
|
||||
|
||||
def _spawn(root: str, ctx: ServerContext) -> SpawnSpec:
|
||||
env = {"MOCK_LSP_SCRIPT": script}
|
||||
return SpawnSpec(
|
||||
command=[sys.executable, MOCK_SERVER],
|
||||
workspace_root=root,
|
||||
cwd=root,
|
||||
env=env,
|
||||
initialization_options={},
|
||||
)
|
||||
|
||||
replacement = ServerDef(
|
||||
server_id=server_id,
|
||||
extensions=original.extensions,
|
||||
resolve_root=lambda fp, ws: ws, # always use workspace root
|
||||
build_spawn=_spawn,
|
||||
seed_first_push=False,
|
||||
description="mock " + server_id,
|
||||
)
|
||||
# Patch the SERVERS list element directly + restore on teardown.
|
||||
SERVERS[target_index] = replacement
|
||||
|
||||
yield
|
||||
|
||||
SERVERS[target_index] = original
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pyright(monkeypatch, tmp_path):
|
||||
"""Install the mock as ``pyright`` and create a fake git workspace."""
|
||||
repo = tmp_path / "repo"
|
||||
repo.mkdir()
|
||||
(repo / ".git").mkdir()
|
||||
(repo / "pyproject.toml").write_text("") # so pyright's root resolver finds it
|
||||
monkeypatch.chdir(str(repo))
|
||||
gen = _install_mock_server(monkeypatch, "errors", "pyright")
|
||||
next(gen)
|
||||
yield repo
|
||||
try:
|
||||
next(gen)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
|
||||
def test_service_returns_empty_when_disabled(tmp_path):
|
||||
svc = LSPService(
|
||||
enabled=False,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="auto",
|
||||
)
|
||||
assert not svc.is_active()
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
assert svc.get_diagnostics_sync(str(f)) == []
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_service_skips_files_outside_workspace(tmp_path):
|
||||
"""Files outside any git worktree must not trigger LSP."""
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=2.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
# No .git anywhere — service should report not enabled for this file.
|
||||
assert not svc.enabled_for(str(f))
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_service_e2e_delta_filter(mock_pyright):
|
||||
"""End-to-end: snapshot baseline → wait → delta returned."""
|
||||
repo = mock_pyright
|
||||
f = repo / "x.py"
|
||||
f.write_text("print('hi')\n")
|
||||
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=3.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
assert svc.enabled_for(str(f))
|
||||
# Baseline first — server pushes 1 error.
|
||||
svc.snapshot_baseline(str(f))
|
||||
# Re-poll: same error is in baseline, so delta is empty.
|
||||
new_diags = svc.get_diagnostics_sync(str(f))
|
||||
assert new_diags == []
|
||||
finally:
|
||||
svc.shutdown()
|
||||
|
||||
|
||||
def test_service_status_includes_clients(mock_pyright):
|
||||
repo = mock_pyright
|
||||
f = repo / "x.py"
|
||||
f.write_text("")
|
||||
svc = LSPService(
|
||||
enabled=True,
|
||||
wait_mode="document",
|
||||
wait_timeout=3.0,
|
||||
install_strategy="manual",
|
||||
)
|
||||
try:
|
||||
svc.get_diagnostics_sync(str(f))
|
||||
info = svc.get_status()
|
||||
assert info["enabled"] is True
|
||||
assert any(c["server_id"] == "pyright" for c in info["clients"])
|
||||
finally:
|
||||
svc.shutdown()
|
||||
139
tests/agent/lsp/test_workspace.py
Normal file
139
tests/agent/lsp/test_workspace.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
"""Tests for workspace + project-root resolution."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.lsp.workspace import (
|
||||
clear_cache,
|
||||
find_git_worktree,
|
||||
is_inside_workspace,
|
||||
nearest_root,
|
||||
normalize_path,
|
||||
resolve_workspace_for_file,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear():
|
||||
clear_cache()
|
||||
yield
|
||||
clear_cache()
|
||||
|
||||
|
||||
def test_find_git_worktree_returns_none_outside_repo(tmp_path: Path):
|
||||
sub = tmp_path / "sub"
|
||||
sub.mkdir()
|
||||
assert find_git_worktree(str(sub)) is None
|
||||
|
||||
|
||||
def test_find_git_worktree_finds_dotgit(tmp_path: Path):
|
||||
repo = tmp_path / "repo"
|
||||
repo.mkdir()
|
||||
(repo / ".git").mkdir()
|
||||
sub = repo / "src" / "deep"
|
||||
sub.mkdir(parents=True)
|
||||
assert find_git_worktree(str(sub)) == str(repo)
|
||||
|
||||
|
||||
def test_find_git_worktree_handles_dotgit_file(tmp_path: Path):
|
||||
"""``.git`` can also be a file (gitfile pointing into a worktree)."""
|
||||
repo = tmp_path / "repo"
|
||||
repo.mkdir()
|
||||
(repo / ".git").write_text("gitdir: /elsewhere\n")
|
||||
assert find_git_worktree(str(repo)) == str(repo)
|
||||
|
||||
|
||||
def test_is_inside_workspace_true_for_subpath(tmp_path: Path):
|
||||
root = tmp_path / "p"
|
||||
root.mkdir()
|
||||
sub = root / "x" / "y.py"
|
||||
sub.parent.mkdir(parents=True)
|
||||
sub.write_text("")
|
||||
assert is_inside_workspace(str(sub), str(root))
|
||||
|
||||
|
||||
def test_is_inside_workspace_false_for_unrelated(tmp_path: Path):
|
||||
a = tmp_path / "a"
|
||||
b = tmp_path / "b"
|
||||
a.mkdir()
|
||||
b.mkdir()
|
||||
f = b / "x.py"
|
||||
f.write_text("")
|
||||
assert not is_inside_workspace(str(f), str(a))
|
||||
|
||||
|
||||
def test_nearest_root_finds_first_marker(tmp_path: Path):
|
||||
root = tmp_path / "p"
|
||||
deep = root / "src" / "pkg"
|
||||
deep.mkdir(parents=True)
|
||||
(root / "pyproject.toml").write_text("")
|
||||
found = nearest_root(str(deep / "mod.py"), ["pyproject.toml"])
|
||||
assert found == str(root)
|
||||
|
||||
|
||||
def test_nearest_root_excludes_take_priority(tmp_path: Path):
|
||||
"""If an exclude marker matches first, return None."""
|
||||
root = tmp_path / "p"
|
||||
sub = root / "deno-app"
|
||||
sub.mkdir(parents=True)
|
||||
(sub / "deno.json").write_text("{}")
|
||||
(root / "package.json").write_text("{}") # would match if not for exclude
|
||||
found = nearest_root(
|
||||
str(sub / "main.ts"),
|
||||
["package.json"],
|
||||
excludes=["deno.json"],
|
||||
)
|
||||
assert found is None
|
||||
|
||||
|
||||
def test_nearest_root_returns_none_when_no_marker(tmp_path: Path):
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
assert nearest_root(str(f), ["pyproject.toml"]) is None
|
||||
|
||||
|
||||
def test_resolve_workspace_for_file_uses_cwd_first(tmp_path: Path, monkeypatch):
|
||||
repo = tmp_path / "repo"
|
||||
(repo / ".git").mkdir(parents=True)
|
||||
file_path = repo / "x.py"
|
||||
file_path.write_text("")
|
||||
# cwd is inside the repo
|
||||
monkeypatch.chdir(str(repo))
|
||||
root, gated = resolve_workspace_for_file(str(file_path))
|
||||
assert root == str(repo)
|
||||
assert gated is True
|
||||
|
||||
|
||||
def test_resolve_workspace_for_file_no_repo_returns_none(tmp_path: Path, monkeypatch):
|
||||
monkeypatch.chdir(str(tmp_path))
|
||||
f = tmp_path / "x.py"
|
||||
f.write_text("")
|
||||
root, gated = resolve_workspace_for_file(str(f))
|
||||
assert root is None
|
||||
assert gated is False
|
||||
|
||||
|
||||
def test_resolve_workspace_falls_back_to_file_location(tmp_path: Path, monkeypatch):
|
||||
"""When cwd isn't a git repo but the file is inside one, we still
|
||||
discover the workspace from the file's path."""
|
||||
not_a_repo = tmp_path / "loose"
|
||||
not_a_repo.mkdir()
|
||||
monkeypatch.chdir(str(not_a_repo))
|
||||
|
||||
repo = tmp_path / "actual-repo"
|
||||
(repo / ".git").mkdir(parents=True)
|
||||
f = repo / "x.py"
|
||||
f.write_text("")
|
||||
|
||||
root, gated = resolve_workspace_for_file(str(f))
|
||||
assert root == str(repo)
|
||||
assert gated is True
|
||||
|
||||
|
||||
def test_normalize_path_expands_tilde(monkeypatch):
|
||||
monkeypatch.setenv("HOME", "/home/user")
|
||||
p = normalize_path("~/x.py")
|
||||
assert p == os.path.abspath("/home/user/x.py")
|
||||
|
|
@ -660,6 +660,7 @@ class TestAuxiliaryPoolAwareness:
|
|||
with (
|
||||
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai,
|
||||
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
|
||||
):
|
||||
from agent.auxiliary_client import _try_nous
|
||||
|
||||
|
|
|
|||
|
|
@ -473,6 +473,240 @@ class TestCodexOAuthContextLength:
|
|||
assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Nous Portal context-window resolution (provider="nous")
|
||||
# =========================================================================
|
||||
|
||||
class TestNousPortalContextResolution:
|
||||
"""Nous Portal /v1/models is authoritative for what Nous infra enforces
|
||||
and may diverge from the OpenRouter catalog.
|
||||
|
||||
Invariants this class pins down:
|
||||
1. Portal value wins over the OR fallback.
|
||||
2. Portal-derived values are persisted to disk.
|
||||
3. OR-fallback values are NEVER persisted — otherwise a single portal
|
||||
blip would freeze the wrong value in via step-1 cache short-circuit.
|
||||
4. Pre-fix persistent-cache entries (seeded from the OR catalog) are
|
||||
bypassed at step 1 and overwritten once the portal responds.
|
||||
5. Pre-fix persistent-cache entries SURVIVE on disk when the portal
|
||||
is unreachable — no opportunistic invalidation that loses the only
|
||||
value we have.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
import agent.model_metadata as mm
|
||||
mm._endpoint_model_metadata_cache.clear()
|
||||
mm._endpoint_model_metadata_cache_time.clear()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_portal_value_wins_over_openrouter_catalog(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""The motivating case: OR catalog says 1M for qwen3.6-plus, but
|
||||
the Nous portal correctly enforces 262144. Portal must win."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_key="fake-token",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"Portal must override OR catalog; got {ctx} (OR leak?)"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_portal_value_is_persisted_to_disk(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Portal-derived value should land in the persistent cache so
|
||||
cross-process callers (e.g. child agents) see the same value."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144
|
||||
persisted = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert persisted.get(f"qwen3.6-plus@{base_url}") == 262_144, (
|
||||
"Portal-derived value should be persisted to disk"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_openrouter_fallback_is_not_persisted(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""When the portal can't resolve a model (network blip, auth glitch,
|
||||
model not yet listed) we fall back to the OR catalog so the agent
|
||||
keeps working — but we must NOT write the OR value to disk. Once
|
||||
cached on disk, step-1 short-circuits forever and the user is stuck
|
||||
with the wrong number until they manually clear the cache."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
mock_portal.return_value = {} # portal unreachable / model unknown
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 1_000_000, "OR fallback should still serve the request"
|
||||
assert not cache_file.exists() or not yaml.safe_load(
|
||||
cache_file.read_text()
|
||||
).get("context_lengths", {}), (
|
||||
"OR-fallback values must NOT be persisted — a single portal blip "
|
||||
"would otherwise freeze the wrong value in via step-1 cache hit"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_stale_cache_is_bypassed_and_overwritten_by_portal(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Users upgrading from pre-fix builds have ``qwen3.6-plus@…nous… =
|
||||
1000000`` (OR-derived) sitting in their cache file. Step 1 must
|
||||
NOT short-circuit on that entry — step 5b reconciles against the
|
||||
portal and overwrites the persistent value with 262144."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
stale_key = f"qwen3.6-plus@{base_url}"
|
||||
other_key = "other-model@https://api.openai.com/v1"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
stale_key: 1_000_000, # pre-fix OR-derived value
|
||||
other_key: 128_000, # unrelated, must survive
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"Stale OR-derived cache entry should not have leaked through; got {ctx}"
|
||||
)
|
||||
|
||||
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert remaining.get(stale_key) == 262_144, (
|
||||
"Portal value should have overwritten the stale entry on disk"
|
||||
)
|
||||
assert remaining.get(other_key) == 128_000, (
|
||||
"Unrelated cache entries must not be touched"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_stale_cache_survives_when_portal_unreachable(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""When the portal is unreachable AND we have a (potentially stale)
|
||||
on-disk cache entry, the entry must survive untouched — we don't
|
||||
want a transient outage to delete the only value we have. The
|
||||
request itself still gets served via OR fallback for this call."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
existing_key = f"qwen3.6-plus@{base_url}"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
existing_key: 1_000_000,
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {} # portal unreachable
|
||||
mock_or.return_value = {
|
||||
"qwen/qwen3.6-plus": {"context_length": 1_000_000},
|
||||
}
|
||||
|
||||
mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider="nous",
|
||||
)
|
||||
|
||||
remaining = yaml.safe_load(cache_file.read_text()).get("context_lengths", {})
|
||||
assert remaining.get(existing_key) == 1_000_000, (
|
||||
"Persistent cache entry must survive a transient portal outage"
|
||||
)
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_bypass_keyed_on_url_not_provider_string(
|
||||
self, mock_or, mock_portal, tmp_path, monkeypatch
|
||||
):
|
||||
"""Some call sites pass ``provider=""`` or ``provider="openrouter"``
|
||||
when the user is really on Nous Portal (e.g. cred-pool fallback).
|
||||
The Nous-URL bypass must trigger off the URL host, not the provider
|
||||
string, so the portal-first resolver still runs in that case."""
|
||||
import agent.model_metadata as mm
|
||||
cache_file = tmp_path / "context_length_cache.yaml"
|
||||
monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file)
|
||||
|
||||
base_url = "https://inference-api.nousresearch.com/v1"
|
||||
cache_file.write_text(yaml.dump({"context_lengths": {
|
||||
f"qwen3.6-plus@{base_url}": 1_000_000, # stale
|
||||
}}))
|
||||
|
||||
mock_portal.return_value = {
|
||||
"qwen3.6-plus": {"context_length": 262_144},
|
||||
}
|
||||
mock_or.return_value = {}
|
||||
|
||||
for provider_arg in ("", "openrouter", "custom"):
|
||||
mm._endpoint_model_metadata_cache.clear()
|
||||
mm._endpoint_model_metadata_cache_time.clear()
|
||||
ctx = mm.get_model_context_length(
|
||||
model="qwen3.6-plus",
|
||||
base_url=base_url,
|
||||
api_key="fake",
|
||||
provider=provider_arg,
|
||||
)
|
||||
assert ctx == 262_144, (
|
||||
f"URL-based Nous detection must fire for provider={provider_arg!r}; "
|
||||
f"got {ctx}"
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# get_model_context_length — resolution order
|
||||
# =========================================================================
|
||||
|
|
|
|||
|
|
@ -190,3 +190,37 @@ def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
|
|||
|
||||
assert float(entry.input_cost_per_million) == 0.5
|
||||
assert float(entry.output_cost_per_million) == 2.0
|
||||
|
||||
|
||||
def test_deepseek_v4_pro_pricing_entry_exists():
|
||||
"""Regression test: deepseek-v4-pro must have a pricing entry.
|
||||
|
||||
Before this fix, deepseek-v4-pro sessions showed as unknown cost
|
||||
in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
|
||||
entry for that model. See #24218.
|
||||
"""
|
||||
entry = get_pricing_entry(
|
||||
"deepseek-v4-pro",
|
||||
provider="deepseek",
|
||||
)
|
||||
|
||||
assert entry is not None
|
||||
assert entry.input_cost_per_million is not None
|
||||
assert entry.output_cost_per_million is not None
|
||||
assert float(entry.input_cost_per_million) == 1.74
|
||||
assert float(entry.output_cost_per_million) == 3.48
|
||||
assert float(entry.cache_read_cost_per_million) == 0.0145
|
||||
|
||||
|
||||
def test_deepseek_v4_pro_estimate_usage_cost():
|
||||
"""Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
|
||||
result = estimate_usage_cost(
|
||||
"deepseek-v4-pro",
|
||||
CanonicalUsage(input_tokens=1000000, output_tokens=500000),
|
||||
provider="deepseek",
|
||||
)
|
||||
|
||||
assert result.status == "estimated"
|
||||
assert result.amount_usd is not None
|
||||
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
|
||||
assert float(result.amount_usd) == 3.48
|
||||
|
|
|
|||
43
tests/cli/test_cli_insights_command.py
Normal file
43
tests/cli/test_cli_insights_command.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from cli import HermesCLI
|
||||
|
||||
|
||||
class _InsightsEngineStub:
|
||||
calls = []
|
||||
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
|
||||
def generate(self, *, days=30, source=None):
|
||||
self.calls.append({"days": days, "source": source})
|
||||
return {"days": days, "source": source}
|
||||
|
||||
def format_terminal(self, report):
|
||||
return f"days={report['days']} source={report['source']}"
|
||||
|
||||
|
||||
def _run_show_insights(command: str):
|
||||
cli_obj = HermesCLI.__new__(HermesCLI)
|
||||
db = MagicMock()
|
||||
_InsightsEngineStub.calls = []
|
||||
with patch("hermes_state.SessionDB", return_value=db), \
|
||||
patch("agent.insights.InsightsEngine", _InsightsEngineStub):
|
||||
cli_obj._show_insights(command)
|
||||
return _InsightsEngineStub.calls, db
|
||||
|
||||
|
||||
def test_cli_insights_accepts_positional_days(capsys):
|
||||
calls, db = _run_show_insights("/insights 7")
|
||||
|
||||
assert calls == [{"days": 7, "source": None}]
|
||||
db.close.assert_called_once()
|
||||
assert "days=7 source=None" in capsys.readouterr().out
|
||||
|
||||
|
||||
def test_cli_insights_keeps_days_flag_and_source(capsys):
|
||||
calls, db = _run_show_insights("/insights --days 14 --source discord")
|
||||
|
||||
assert calls == [{"days": 14, "source": "discord"}]
|
||||
db.close.assert_called_once()
|
||||
assert "days=14 source=discord" in capsys.readouterr().out
|
||||
|
|
@ -222,6 +222,9 @@ def make_runner(platform: Platform, session_entry: SessionEntry = None) -> "Gate
|
|||
runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
|
||||
runner._emit_gateway_run_progress = AsyncMock()
|
||||
|
||||
# Disable destructive slash confirm gate so /new executes immediately
|
||||
runner._read_user_config = lambda: {"approvals": {"destructive_slash_confirm": False}}
|
||||
|
||||
runner.pairing_store = MagicMock()
|
||||
runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
|
||||
runner.pairing_store.generate_code = MagicMock(return_value="ABC123")
|
||||
|
|
|
|||
|
|
@ -681,6 +681,56 @@ class TestChatCompletionsEndpoint:
|
|||
assert "[DONE]" in body
|
||||
assert "Hello!" in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_task_done_callback_enqueues_eos_for_chat_completions(self, adapter):
|
||||
"""Regression guard for #24451: completion callback must signal SSE EOS."""
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
class _FakeTask:
|
||||
def __init__(self):
|
||||
self.callbacks = []
|
||||
|
||||
def add_done_callback(self, cb):
|
||||
self.callbacks.append(cb)
|
||||
|
||||
fake_task = _FakeTask()
|
||||
|
||||
def _fake_ensure_future(coro):
|
||||
# We short-circuit task scheduling in this unit test.
|
||||
coro.close()
|
||||
return fake_task
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
adapter,
|
||||
"_run_agent",
|
||||
new=AsyncMock(
|
||||
return_value=(
|
||||
{"final_response": "ok", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
|
||||
)
|
||||
),
|
||||
),
|
||||
patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
|
||||
patch.object(adapter, "_write_sse_chat_completion", new_callable=AsyncMock) as mock_write_sse,
|
||||
):
|
||||
mock_write_sse.return_value = web.Response(status=200, text="ok")
|
||||
resp = await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "test",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"stream": True,
|
||||
},
|
||||
)
|
||||
assert resp.status == 200
|
||||
|
||||
assert len(fake_task.callbacks) == 1
|
||||
stream_q = mock_write_sse.call_args.args[4]
|
||||
assert stream_q.empty()
|
||||
fake_task.callbacks[0](fake_task)
|
||||
assert stream_q.get_nowait() is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_sends_keepalive_during_quiet_tool_gap(self, adapter):
|
||||
"""Idle SSE streams should send keepalive comments while tools run silently."""
|
||||
|
|
@ -1676,6 +1726,52 @@ class TestResponsesStreaming:
|
|||
assert "Hello" in body
|
||||
assert " world" in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_task_done_callback_enqueues_eos_for_responses(self, adapter):
|
||||
"""Regression guard for #24451 on /v1/responses streaming path."""
|
||||
app = _create_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
class _FakeTask:
|
||||
def __init__(self):
|
||||
self.callbacks = []
|
||||
|
||||
def add_done_callback(self, cb):
|
||||
self.callbacks.append(cb)
|
||||
|
||||
fake_task = _FakeTask()
|
||||
|
||||
def _fake_ensure_future(coro):
|
||||
# We short-circuit task scheduling in this unit test.
|
||||
coro.close()
|
||||
return fake_task
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
adapter,
|
||||
"_run_agent",
|
||||
new=AsyncMock(
|
||||
return_value=(
|
||||
{"final_response": "ok", "messages": [], "api_calls": 1},
|
||||
{"input_tokens": 1, "output_tokens": 1, "total_tokens": 2},
|
||||
)
|
||||
),
|
||||
),
|
||||
patch("gateway.platforms.api_server.asyncio.ensure_future", side_effect=_fake_ensure_future),
|
||||
patch.object(adapter, "_write_sse_responses", new_callable=AsyncMock) as mock_write_sse,
|
||||
):
|
||||
mock_write_sse.return_value = web.Response(status=200, text="ok")
|
||||
resp = await cli.post(
|
||||
"/v1/responses",
|
||||
json={"model": "hermes-agent", "input": "hi", "stream": True},
|
||||
)
|
||||
assert resp.status == 200
|
||||
|
||||
assert len(fake_task.callbacks) == 1
|
||||
stream_q = mock_write_sse.call_args.kwargs["stream_q"]
|
||||
assert stream_q.empty()
|
||||
fake_task.callbacks[0](fake_task)
|
||||
assert stream_q.get_nowait() is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_emits_function_call_and_output_items(self, adapter):
|
||||
app = _create_app(adapter)
|
||||
|
|
@ -3061,4 +3157,3 @@ class TestSessionKeyHeader:
|
|||
assert resp.status == 200
|
||||
data = await resp.json()
|
||||
assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"
|
||||
|
||||
|
|
|
|||
|
|
@ -176,8 +176,8 @@ class TestStreamingConfig:
|
|||
"fresh_final_after_seconds": "oops",
|
||||
}
|
||||
)
|
||||
assert restored.edit_interval == 1.0
|
||||
assert restored.buffer_threshold == 40
|
||||
assert restored.edit_interval == 0.8
|
||||
assert restored.buffer_threshold == 24
|
||||
assert restored.fresh_final_after_seconds == 60.0
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -444,6 +444,93 @@ class TestScopedLocks:
|
|||
assert acquired is False
|
||||
assert existing["pid"] == 99999
|
||||
|
||||
def test_acquire_scoped_lock_replaces_pid_reused_by_unrelated_process(self, tmp_path, monkeypatch):
|
||||
"""macOS regression: PID reused by an unrelated process with start_time=None.
|
||||
|
||||
On macOS /proc is unavailable, so both the lock record and the live
|
||||
process report start_time=None. The live PID is alive (os.kill
|
||||
succeeds) but belongs to a completely different program. The lock
|
||||
must be treated as stale.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path.write_text(json.dumps({
|
||||
"pid": 873,
|
||||
"start_time": None,
|
||||
"kind": "hermes-gateway",
|
||||
"argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
|
||||
}))
|
||||
|
||||
# Post-#21561 the liveness probe routes through
|
||||
# ``gateway.status._pid_exists`` (psutil-first, safe on Windows),
|
||||
# not ``os.kill``.
|
||||
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
|
||||
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
|
||||
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
|
||||
# On macOS ``ps`` is available, so _read_process_cmdline returns the
|
||||
# unrelated process's name. This confirms the PID was reused.
|
||||
monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: "/usr/libexec/bluetoothuserd")
|
||||
|
||||
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
|
||||
|
||||
assert acquired is True
|
||||
payload = json.loads(lock_path.read_text())
|
||||
assert payload["pid"] == os.getpid()
|
||||
assert payload["metadata"]["platform"] == "telegram"
|
||||
|
||||
def test_acquire_scoped_lock_keeps_lock_when_cmdline_unreadable_but_record_is_gateway(self, tmp_path, monkeypatch):
|
||||
"""Windows regression: ps unavailable so cmdline cannot be read.
|
||||
|
||||
When start_time is None on both sides and _looks_like_gateway_process
|
||||
returns False because ps is missing (not because the PID belongs to an
|
||||
unrelated process), the stale check must not delete a valid gateway
|
||||
lock. Fall back to the lock record's own argv — written by the
|
||||
gateway at startup — before declaring the lock stale.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path.write_text(json.dumps({
|
||||
"pid": 99999,
|
||||
"start_time": None,
|
||||
"kind": "hermes-gateway",
|
||||
"argv": ["hermes_cli/main.py", "gateway", "run"],
|
||||
}))
|
||||
|
||||
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
|
||||
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
|
||||
# Windows: ps not available, so _read_process_cmdline returns None
|
||||
# and _looks_like_gateway_process returns False for every process.
|
||||
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: False)
|
||||
monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
|
||||
|
||||
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
|
||||
|
||||
assert acquired is False
|
||||
assert existing["pid"] == 99999
|
||||
|
||||
def test_acquire_scoped_lock_keeps_lock_when_pid_reused_by_gateway(self, tmp_path, monkeypatch):
|
||||
"""When start_time is None but the live PID still looks like a gateway, keep the lock."""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path.write_text(json.dumps({
|
||||
"pid": 99999,
|
||||
"start_time": None,
|
||||
"kind": "hermes-gateway",
|
||||
"argv": ["/Users/user/.hermes/hermes-agent/hermes_cli/main.py", "gateway", "run", "--replace"],
|
||||
}))
|
||||
|
||||
monkeypatch.setattr(status, "_pid_exists", lambda pid: True)
|
||||
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: None)
|
||||
monkeypatch.setattr(status, "_looks_like_gateway_process", lambda pid: True)
|
||||
|
||||
acquired, existing = status.acquire_scoped_lock("telegram-bot-token", "secret", metadata={"platform": "telegram"})
|
||||
|
||||
assert acquired is False
|
||||
assert existing["pid"] == 99999
|
||||
|
||||
def test_acquire_scoped_lock_replaces_stale_record(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
|
||||
lock_path = tmp_path / "locks" / "telegram-bot-token-2bb80d537b1da3e3.lock"
|
||||
|
|
@ -811,3 +898,46 @@ class TestPlannedStopMarker:
|
|||
ok = status.write_planned_stop_marker(target_pid=12345)
|
||||
|
||||
assert ok is False
|
||||
|
||||
|
||||
class TestReadProcessCmdlinePsFallback:
|
||||
"""Tests for _read_process_cmdline falling back to ps on non-Linux."""
|
||||
|
||||
def test_ps_fallback_when_proc_unavailable(self, monkeypatch):
|
||||
monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
|
||||
monkeypatch.setattr(
|
||||
status.subprocess, "run",
|
||||
lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="/usr/libexec/bluetoothuserd\n"),
|
||||
)
|
||||
result = status._read_process_cmdline(873)
|
||||
assert result == "/usr/libexec/bluetoothuserd"
|
||||
|
||||
def test_ps_fallback_returns_none_on_failure(self, monkeypatch):
|
||||
monkeypatch.setattr(status.Path, "read_bytes", lambda self: (_ for _ in ()).throw(FileNotFoundError))
|
||||
monkeypatch.setattr(
|
||||
status.subprocess, "run",
|
||||
lambda args, **kwargs: SimpleNamespace(returncode=1, stdout=""),
|
||||
)
|
||||
result = status._read_process_cmdline(99999)
|
||||
assert result is None
|
||||
|
||||
def test_proc_cmdline_takes_priority_over_ps(self, monkeypatch):
|
||||
calls = []
|
||||
|
||||
def fake_read_bytes(self):
|
||||
calls.append("proc")
|
||||
return b"python\x00hermes_cli/main.py\x00gateway\x00"
|
||||
|
||||
monkeypatch.setattr(status.Path, "read_bytes", fake_read_bytes)
|
||||
result = status._read_process_cmdline(12345)
|
||||
assert "hermes_cli/main.py" in result
|
||||
assert calls == ["proc"]
|
||||
|
||||
def test_ps_fallback_used_when_proc_returns_empty(self, monkeypatch):
|
||||
monkeypatch.setattr(status.Path, "read_bytes", lambda self: b"")
|
||||
monkeypatch.setattr(
|
||||
status.subprocess, "run",
|
||||
lambda args, **kwargs: SimpleNamespace(returncode=0, stdout="python hermes_cli/main.py gateway run\n"),
|
||||
)
|
||||
result = status._read_process_cmdline(12345)
|
||||
assert "hermes_cli/main.py" in result
|
||||
|
|
|
|||
451
tests/gateway/test_telegram_clarify_buttons.py
Normal file
451
tests/gateway/test_telegram_clarify_buttons.py
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
"""Tests for Telegram inline keyboard clarify buttons.
|
||||
|
||||
Mirrors test_telegram_approval_buttons.py for the new ``send_clarify`` and
|
||||
``cl:`` callback dispatch added in feat/clarify-gateway-buttons.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure the repo root is importable
|
||||
# ---------------------------------------------------------------------------
|
||||
_repo = str(Path(__file__).resolve().parents[2])
|
||||
if _repo not in sys.path:
|
||||
sys.path.insert(0, _repo)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Minimal Telegram mock so TelegramAdapter can be imported (mirrors
|
||||
# test_telegram_approval_buttons.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
def _ensure_telegram_mock():
|
||||
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
|
||||
return
|
||||
|
||||
mod = MagicMock()
|
||||
mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
|
||||
mod.constants.ParseMode.MARKDOWN = "Markdown"
|
||||
mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
mod.constants.ParseMode.HTML = "HTML"
|
||||
mod.constants.ChatType.PRIVATE = "private"
|
||||
mod.constants.ChatType.GROUP = "group"
|
||||
mod.constants.ChatType.SUPERGROUP = "supergroup"
|
||||
mod.constants.ChatType.CHANNEL = "channel"
|
||||
mod.error.NetworkError = type("NetworkError", (OSError,), {})
|
||||
mod.error.TimedOut = type("TimedOut", (OSError,), {})
|
||||
mod.error.BadRequest = type("BadRequest", (Exception,), {})
|
||||
|
||||
for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
|
||||
sys.modules.setdefault(name, mod)
|
||||
sys.modules.setdefault("telegram.error", mod.error)
|
||||
|
||||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
|
||||
|
||||
def _make_adapter(extra=None):
|
||||
config = PlatformConfig(enabled=True, token="test-token", extra=extra or {})
|
||||
adapter = TelegramAdapter(config)
|
||||
adapter._bot = AsyncMock()
|
||||
adapter._app = MagicMock()
|
||||
return adapter
|
||||
|
||||
|
||||
def _clear_clarify_state():
|
||||
from tools import clarify_gateway as cm
|
||||
with cm._lock:
|
||||
cm._entries.clear()
|
||||
cm._session_index.clear()
|
||||
cm._notify_cbs.clear()
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# send_clarify — render
|
||||
# ===========================================================================
|
||||
|
||||
class TestTelegramSendClarify:
|
||||
"""Verify the rendered prompt has buttons or none, and stores state."""
|
||||
|
||||
def setup_method(self):
|
||||
_clear_clarify_state()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multi_choice_renders_buttons_and_other(self):
|
||||
adapter = _make_adapter()
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 100
|
||||
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
|
||||
|
||||
result = await adapter.send_clarify(
|
||||
chat_id="12345",
|
||||
question="Which option?",
|
||||
choices=["alpha", "beta", "gamma"],
|
||||
clarify_id="cid1",
|
||||
session_key="sk1",
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id == "100"
|
||||
|
||||
kwargs = adapter._bot.send_message.call_args[1]
|
||||
assert kwargs["chat_id"] == 12345
|
||||
assert "Which option?" in kwargs["text"]
|
||||
# InlineKeyboardMarkup with N+1 buttons (3 choices + Other)
|
||||
markup = kwargs["reply_markup"]
|
||||
assert markup is not None
|
||||
# Mocked InlineKeyboardMarkup — just verify it was constructed
|
||||
# with rows. We check state instead of poking the mock structure.
|
||||
assert "cid1" in adapter._clarify_state
|
||||
assert adapter._clarify_state["cid1"] == "sk1"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_ended_no_keyboard(self):
|
||||
adapter = _make_adapter()
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 101
|
||||
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
|
||||
|
||||
result = await adapter.send_clarify(
|
||||
chat_id="12345",
|
||||
question="What is your name?",
|
||||
choices=None,
|
||||
clarify_id="cid2",
|
||||
session_key="sk2",
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
kwargs = adapter._bot.send_message.call_args[1]
|
||||
# No reply_markup means no buttons — open-ended path
|
||||
assert "reply_markup" not in kwargs
|
||||
assert "What is your name?" in kwargs["text"]
|
||||
assert adapter._clarify_state["cid2"] == "sk2"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_not_connected(self):
|
||||
adapter = _make_adapter()
|
||||
adapter._bot = None
|
||||
result = await adapter.send_clarify(
|
||||
chat_id="12345",
|
||||
question="?",
|
||||
choices=["a"],
|
||||
clarify_id="cid3",
|
||||
session_key="sk3",
|
||||
)
|
||||
assert result.success is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_truncates_long_choice_label(self):
|
||||
adapter = _make_adapter()
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 102
|
||||
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
|
||||
|
||||
long_choice = "x" * 200 # > 60 char cap
|
||||
result = await adapter.send_clarify(
|
||||
chat_id="12345",
|
||||
question="?",
|
||||
choices=[long_choice],
|
||||
clarify_id="cid4",
|
||||
session_key="sk4",
|
||||
)
|
||||
assert result.success is True
|
||||
# The truncation logic replaces with "..." past 57 chars; we don't
|
||||
# inspect the mock's button labels directly (auto-MagicMock), but
|
||||
# we can verify the call didn't raise on absurdly long input.
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_html_escapes_question(self):
|
||||
adapter = _make_adapter()
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 103
|
||||
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
|
||||
|
||||
await adapter.send_clarify(
|
||||
chat_id="12345",
|
||||
question="<script>alert(1)</script>",
|
||||
choices=["x"],
|
||||
clarify_id="cid5",
|
||||
session_key="sk5",
|
||||
)
|
||||
kwargs = adapter._bot.send_message.call_args[1]
|
||||
# Must NOT contain raw <script> — html.escape should have neutralized
|
||||
assert "<script>" not in kwargs["text"]
|
||||
assert "<script>" in kwargs["text"]
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Callback dispatch — _handle_callback_query routing for cl:* prefixes
|
||||
# ===========================================================================
|
||||
|
||||
class TestTelegramClarifyCallback:
|
||||
"""Verify clicking a button resolves the clarify primitive."""
|
||||
|
||||
def setup_method(self):
|
||||
_clear_clarify_state()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_numeric_choice_resolves_with_choice_text(self):
|
||||
from tools import clarify_gateway as cm
|
||||
|
||||
adapter = _make_adapter()
|
||||
# Pre-register a clarify entry so the callback can look up the choice text
|
||||
cm.register("cidA", "sk-cb", "Pick", ["red", "green", "blue"])
|
||||
adapter._clarify_state["cidA"] = "sk-cb"
|
||||
|
||||
query = AsyncMock()
|
||||
query.data = "cl:cidA:1" # green
|
||||
query.message = MagicMock()
|
||||
query.message.chat_id = 12345
|
||||
query.message.text = "Pick"
|
||||
query.from_user = MagicMock()
|
||||
query.from_user.id = "777"
|
||||
query.from_user.first_name = "Tester"
|
||||
query.answer = AsyncMock()
|
||||
query.edit_message_text = AsyncMock()
|
||||
|
||||
update = MagicMock()
|
||||
update.callback_query = query
|
||||
context = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
# State popped
|
||||
assert "cidA" not in adapter._clarify_state
|
||||
# Wait shouldn't be needed — resolve_gateway_clarify is sync.
|
||||
# The entry's response should be set.
|
||||
# We test by reading the entry's response directly.
|
||||
with cm._lock:
|
||||
entry = cm._entries.get("cidA")
|
||||
# Entry might be popped by wait_for_response, but here we never
|
||||
# called wait — so it's still in _entries with response set.
|
||||
assert entry is not None
|
||||
assert entry.response == "green"
|
||||
assert entry.event.is_set()
|
||||
query.answer.assert_called_once()
|
||||
query.edit_message_text.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_other_button_flips_to_text_mode(self):
|
||||
from tools import clarify_gateway as cm
|
||||
|
||||
adapter = _make_adapter()
|
||||
cm.register("cidB", "sk-cb-other", "Pick", ["x", "y"])
|
||||
adapter._clarify_state["cidB"] = "sk-cb-other"
|
||||
|
||||
query = AsyncMock()
|
||||
query.data = "cl:cidB:other"
|
||||
query.message = MagicMock()
|
||||
query.message.chat_id = 12345
|
||||
query.message.text = "Pick"
|
||||
query.from_user = MagicMock()
|
||||
query.from_user.id = "777"
|
||||
query.from_user.first_name = "Tester"
|
||||
query.answer = AsyncMock()
|
||||
query.edit_message_text = AsyncMock()
|
||||
|
||||
update = MagicMock()
|
||||
update.callback_query = query
|
||||
context = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
# Entry should now be in text-capture mode
|
||||
pending = cm.get_pending_for_session("sk-cb-other")
|
||||
assert pending is not None
|
||||
assert pending.clarify_id == "cidB"
|
||||
assert pending.awaiting_text is True
|
||||
# State NOT popped — the user still needs to type their answer
|
||||
assert "cidB" in adapter._clarify_state
|
||||
# Entry NOT yet resolved
|
||||
with cm._lock:
|
||||
entry = cm._entries.get("cidB")
|
||||
assert entry is not None
|
||||
assert not entry.event.is_set()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_already_resolved(self):
|
||||
adapter = _make_adapter()
|
||||
# No state for cidGone
|
||||
|
||||
query = AsyncMock()
|
||||
query.data = "cl:cidGone:0"
|
||||
query.message = MagicMock()
|
||||
query.message.chat_id = 12345
|
||||
query.from_user = MagicMock()
|
||||
query.from_user.id = "777"
|
||||
query.from_user.first_name = "Tester"
|
||||
query.answer = AsyncMock()
|
||||
|
||||
update = MagicMock()
|
||||
update.callback_query = query
|
||||
context = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
query.answer.assert_called_once()
|
||||
# Should NOT resolve anything
|
||||
assert "already" in query.answer.call_args[1]["text"].lower()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthorized_user_rejected(self):
|
||||
from tools import clarify_gateway as cm
|
||||
|
||||
adapter = _make_adapter()
|
||||
cm.register("cidC", "sk-auth", "Pick", ["a", "b"])
|
||||
adapter._clarify_state["cidC"] = "sk-auth"
|
||||
|
||||
# Hook up a runner that says NOT authorized
|
||||
class _DenyRunner:
|
||||
async def _handle_message(self, event):
|
||||
return None
|
||||
def _is_user_authorized(self, source):
|
||||
return False
|
||||
|
||||
adapter._message_handler = _DenyRunner()._handle_message
|
||||
|
||||
query = AsyncMock()
|
||||
query.data = "cl:cidC:0"
|
||||
query.message = MagicMock()
|
||||
query.message.chat_id = 12345
|
||||
query.message.chat.type = "private"
|
||||
query.message.text = "Pick"
|
||||
query.from_user = MagicMock()
|
||||
query.from_user.id = "999"
|
||||
query.from_user.first_name = "Mallory"
|
||||
query.answer = AsyncMock()
|
||||
query.edit_message_text = AsyncMock()
|
||||
|
||||
update = MagicMock()
|
||||
update.callback_query = query
|
||||
context = MagicMock()
|
||||
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
# Must not resolve, must answer with not-authorized message
|
||||
with cm._lock:
|
||||
entry = cm._entries.get("cidC")
|
||||
assert entry is not None
|
||||
assert not entry.event.is_set()
|
||||
query.answer.assert_called_once()
|
||||
assert "not authorized" in query.answer.call_args[1]["text"].lower()
|
||||
# State preserved
|
||||
assert adapter._clarify_state["cidC"] == "sk-auth"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_choice_token(self):
|
||||
from tools import clarify_gateway as cm
|
||||
|
||||
adapter = _make_adapter()
|
||||
cm.register("cidD", "sk-inv", "Q?", ["a"])
|
||||
adapter._clarify_state["cidD"] = "sk-inv"
|
||||
|
||||
query = AsyncMock()
|
||||
query.data = "cl:cidD:not-a-number"
|
||||
query.message = MagicMock()
|
||||
query.message.chat_id = 12345
|
||||
query.message.text = "Q?"
|
||||
query.from_user = MagicMock()
|
||||
query.from_user.id = "777"
|
||||
query.from_user.first_name = "Tester"
|
||||
query.answer = AsyncMock()
|
||||
|
||||
update = MagicMock()
|
||||
update.callback_query = query
|
||||
context = MagicMock()
|
||||
|
||||
with patch.dict(os.environ, {"TELEGRAM_ALLOWED_USERS": "*"}, clear=False):
|
||||
await adapter._handle_callback_query(update, context)
|
||||
|
||||
with cm._lock:
|
||||
entry = cm._entries.get("cidD")
|
||||
assert entry is not None
|
||||
assert not entry.event.is_set()
|
||||
query.answer.assert_called_once()
|
||||
assert "invalid" in query.answer.call_args[1]["text"].lower()
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Base adapter fallback render — text numbered list
|
||||
# ===========================================================================
|
||||
|
||||
class TestBaseAdapterClarifyFallback:
|
||||
"""Adapters without button overrides should render numbered text."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_numbered_text_fallback(self):
|
||||
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
||||
|
||||
# Subclass just enough to instantiate
|
||||
class _Stub(BasePlatformAdapter):
|
||||
name = "stub"
|
||||
|
||||
def __init__(self):
|
||||
# Skip base __init__ — we're not exercising it
|
||||
self.sent: list = []
|
||||
|
||||
async def connect(self): pass
|
||||
async def disconnect(self): pass
|
||||
async def send(self, chat_id, content, **kw):
|
||||
self.sent.append({"chat_id": chat_id, "content": content})
|
||||
return SendResult(success=True, message_id="1")
|
||||
async def edit(self, *a, **k): return SendResult(success=False)
|
||||
async def get_history(self, *a, **k): return []
|
||||
async def get_chat_info(self, *a, **k): return {}
|
||||
|
||||
adapter = _Stub()
|
||||
|
||||
result = await adapter.send_clarify(
|
||||
chat_id="c",
|
||||
question="Pick a fruit",
|
||||
choices=["apple", "banana"],
|
||||
clarify_id="x",
|
||||
session_key="s",
|
||||
)
|
||||
assert result.success is True
|
||||
assert len(adapter.sent) == 1
|
||||
text = adapter.sent[0]["content"]
|
||||
assert "Pick a fruit" in text
|
||||
assert "1." in text and "apple" in text
|
||||
assert "2." in text and "banana" in text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_ended_fallback_renders_question_only(self):
|
||||
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
||||
|
||||
class _Stub(BasePlatformAdapter):
|
||||
name = "stub"
|
||||
def __init__(self):
|
||||
self.sent: list = []
|
||||
async def connect(self): pass
|
||||
async def disconnect(self): pass
|
||||
async def send(self, chat_id, content, **kw):
|
||||
self.sent.append(content)
|
||||
return SendResult(success=True, message_id="1")
|
||||
async def edit(self, *a, **k): return SendResult(success=False)
|
||||
async def get_history(self, *a, **k): return []
|
||||
async def get_chat_info(self, *a, **k): return {}
|
||||
|
||||
adapter = _Stub()
|
||||
await adapter.send_clarify(
|
||||
chat_id="c",
|
||||
question="Free form?",
|
||||
choices=None,
|
||||
clarify_id="x",
|
||||
session_key="s",
|
||||
)
|
||||
assert "Free form?" in adapter.sent[0]
|
||||
# No numbered list — choices were empty
|
||||
assert "1." not in adapter.sent[0]
|
||||
|
|
@ -218,17 +218,62 @@ async def test_on_processing_complete_skipped_when_disabled(monkeypatch):
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_processing_complete_cancelled_keeps_existing_reaction(monkeypatch):
|
||||
"""Expected cancellation should not replace the in-progress reaction."""
|
||||
async def test_on_processing_complete_cancelled_clears_reaction(monkeypatch):
|
||||
"""Cancelled processing should clear the in-progress reaction.
|
||||
|
||||
Without this clear, the 👀 reaction lingers on the user's message
|
||||
indefinitely (until another agent run swaps it for 👍/👎). On a
|
||||
``/stop`` that ends a session, that reaction never gets cleaned up.
|
||||
"""
|
||||
monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
|
||||
adapter = _make_adapter()
|
||||
event = _make_event()
|
||||
|
||||
await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
|
||||
|
||||
# set_message_reaction with reaction=None clears all reactions on the
|
||||
# message (Bot API documented semantics; equivalent to Bot API 10.0's
|
||||
# deleteMessageReaction but works on PTB 22.6 already).
|
||||
adapter._bot.set_message_reaction.assert_awaited_once_with(
|
||||
chat_id=123,
|
||||
message_id=456,
|
||||
reaction=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_on_processing_complete_cancelled_skipped_when_disabled(monkeypatch):
|
||||
"""Cancelled processing should not call the API when reactions are off."""
|
||||
monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
|
||||
adapter = _make_adapter()
|
||||
event = _make_event()
|
||||
|
||||
await adapter.on_processing_complete(event, ProcessingOutcome.CANCELLED)
|
||||
|
||||
adapter._bot.set_message_reaction.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear_reactions_handles_api_error_gracefully(monkeypatch):
|
||||
"""API errors during clear should not propagate."""
|
||||
monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
|
||||
adapter = _make_adapter()
|
||||
adapter._bot.set_message_reaction = AsyncMock(side_effect=RuntimeError("no perms"))
|
||||
|
||||
result = await adapter._clear_reactions("123", "456")
|
||||
assert result is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_clear_reactions_returns_false_without_bot(monkeypatch):
|
||||
"""_clear_reactions should return False when bot is not available."""
|
||||
adapter = _make_adapter()
|
||||
adapter._bot = None
|
||||
|
||||
result = await adapter._clear_reactions("123", "456")
|
||||
assert result is False
|
||||
|
||||
|
||||
# ── config.py bridging ───────────────────────────────────────────────
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ only renders as a voice bubble when explicitly flagged) and via
|
|||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -106,6 +106,16 @@ async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_
|
|||
adapter.send_document.assert_not_awaited()
|
||||
|
||||
|
||||
def _fake_runner(thread_meta):
|
||||
"""Build a fake GatewayRunner-like object with the helper methods needed by
|
||||
_deliver_media_from_response."""
|
||||
runner = SimpleNamespace(
|
||||
_thread_metadata_for_source=lambda source, anchor=None: thread_meta,
|
||||
_reply_anchor_for_event=lambda event: None,
|
||||
)
|
||||
return runner
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
|
||||
event = _event(thread_id="topic-1")
|
||||
|
|
@ -121,7 +131,7 @@ async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sen
|
|||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
_fake_runner({"thread_id": "topic-1"}),
|
||||
"MEDIA:/tmp/speech.flac",
|
||||
event,
|
||||
adapter,
|
||||
|
|
@ -150,7 +160,7 @@ async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_doc
|
|||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
_fake_runner({"thread_id": "topic-1"}),
|
||||
"MEDIA:/tmp/speech.ogg",
|
||||
event,
|
||||
adapter,
|
||||
|
|
@ -181,7 +191,7 @@ async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender(
|
|||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
_fake_runner({"thread_id": "topic-1"}),
|
||||
"MEDIA:/tmp/speech.mp3",
|
||||
event,
|
||||
adapter,
|
||||
|
|
|
|||
|
|
@ -45,6 +45,9 @@ def _make_runner(hermes_home=None):
|
|||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._failed_platforms = {}
|
||||
# config is accessed by _check_slash_access and quick_commands lookup;
|
||||
# None makes policy_for_source return a disabled (allow-all) policy.
|
||||
runner.config = None
|
||||
# Bypass the destructive-slash confirm gate — this test exercises
|
||||
# update-prompt interception, not the confirm prompt.
|
||||
runner._read_user_config = lambda: {
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ class TestVerboseCommand:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_defaults_to_all_when_no_tool_progress_set(self, tmp_path, monkeypatch):
|
||||
"""When tool_progress is not in config, defaults to 'all' then cycles to verbose."""
|
||||
"""When tool_progress is not in config, defaults to platform default then cycles."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
|
|
@ -143,17 +143,17 @@ class TestVerboseCommand:
|
|||
runner = _make_runner()
|
||||
result = await runner._handle_verbose_command(_make_event())
|
||||
|
||||
# Telegram default is "all" (high tier) → cycles to verbose
|
||||
assert "VERBOSE" in result
|
||||
# Telegram platform default is "new" → cycles to "all"
|
||||
assert "ALL" in result
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "verbose"
|
||||
assert saved["display"]["platforms"]["telegram"]["tool_progress"] == "all"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_per_platform_isolation(self, tmp_path, monkeypatch):
|
||||
"""Cycling /verbose on Telegram doesn't change Slack's setting.
|
||||
|
||||
Without a global tool_progress, each platform uses its built-in
|
||||
default: Telegram = 'all' (high tier), Slack = 'off' (quiet Slack default).
|
||||
default: Telegram = 'new' (overridden high tier), Slack = 'off' (quiet Slack default).
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
|
@ -178,8 +178,8 @@ class TestVerboseCommand:
|
|||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
platforms = saved["display"]["platforms"]
|
||||
# Telegram: all -> verbose (high tier default = all)
|
||||
assert platforms["telegram"]["tool_progress"] == "verbose"
|
||||
# Telegram: new -> all (platform default = new)
|
||||
assert platforms["telegram"]["tool_progress"] == "all"
|
||||
# Slack: off -> new (first /verbose cycle from quiet default)
|
||||
assert platforms["slack"]["tool_progress"] == "new"
|
||||
|
||||
|
|
|
|||
|
|
@ -170,6 +170,50 @@ def test_auth_add_nous_oauth_persists_pool_entry(tmp_path, monkeypatch):
|
|||
assert singleton["inference_base_url"] == "https://inference.example.com/v1"
|
||||
|
||||
|
||||
def test_auth_add_minimax_oauth_starts_login_and_persists_pool_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
|
||||
token = _jwt_with_email("minimax@example.com")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.auth._minimax_oauth_login",
|
||||
lambda **kwargs: {
|
||||
"provider": "minimax-oauth",
|
||||
"region": "global",
|
||||
"portal_base_url": "https://api.minimax.io",
|
||||
"inference_base_url": "https://api.minimax.io/anthropic",
|
||||
"client_id": "client-id",
|
||||
"scope": "group_id profile model.completion",
|
||||
"token_type": "Bearer",
|
||||
"access_token": token,
|
||||
"refresh_token": "refresh-token",
|
||||
"resource_url": None,
|
||||
"obtained_at": "2026-05-11T10:00:00+00:00",
|
||||
"expires_at": "2026-05-14T10:00:00+00:00",
|
||||
"expires_in": 259200,
|
||||
},
|
||||
)
|
||||
|
||||
from hermes_cli.auth_commands import auth_add_command
|
||||
|
||||
class _Args:
|
||||
provider = "minimax-oauth"
|
||||
auth_type = "oauth"
|
||||
api_key = None
|
||||
label = None
|
||||
no_browser = True
|
||||
timeout = None
|
||||
|
||||
auth_add_command(_Args())
|
||||
|
||||
payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
|
||||
entries = payload["credential_pool"]["minimax-oauth"]
|
||||
entry = next(item for item in entries if item["source"] == "manual:minimax_oauth")
|
||||
assert entry["label"] == "minimax@example.com"
|
||||
assert entry["access_token"] == token
|
||||
assert entry["refresh_token"] == "refresh-token"
|
||||
assert entry["base_url"] == "https://api.minimax.io/anthropic"
|
||||
|
||||
|
||||
def test_auth_add_nous_oauth_honors_custom_label(tmp_path, monkeypatch):
|
||||
"""`hermes auth add nous --type oauth --label <name>` must preserve the
|
||||
custom label end-to-end — it was silently dropped in the first cut of the
|
||||
|
|
|
|||
|
|
@ -242,12 +242,14 @@ class TestTelegramBotCommands:
|
|||
tg_name = cmd.name.replace("-", "_")
|
||||
assert tg_name not in names
|
||||
|
||||
def test_excludes_commands_with_required_args(self):
|
||||
def test_includes_builtin_commands_with_required_args(self):
|
||||
"""Built-in arg-taking commands (e.g. /queue, /steer, /background)
|
||||
are now included because their handlers return usage text when
|
||||
invoked without arguments — issue #24312."""
|
||||
names = {name for name, _ in telegram_bot_commands()}
|
||||
assert "background" not in names
|
||||
assert "queue" not in names
|
||||
assert "steer" not in names
|
||||
assert "background" in GATEWAY_KNOWN_COMMANDS
|
||||
assert "background" in names
|
||||
assert "queue" in names
|
||||
assert "steer" in names
|
||||
|
||||
|
||||
class TestSlackSubcommandMap:
|
||||
|
|
|
|||
|
|
@ -2,10 +2,11 @@
|
|||
from pathlib import Path
|
||||
|
||||
|
||||
def test_profiles_nav_label_uses_short_multi_agents_copy():
|
||||
def test_profiles_nav_label_uses_short_copy():
|
||||
en_i18n = Path(__file__).resolve().parents[2] / "web" / "src" / "i18n" / "en.ts"
|
||||
|
||||
content = en_i18n.read_text(encoding="utf-8")
|
||||
|
||||
assert 'profiles: "profiles : multi agents"' in content
|
||||
assert "Profiles: Running Multiple Agents" not in content
|
||||
# Nav label should be the clean short form, not the old verbose string
|
||||
assert 'profiles: "Profiles"' in content
|
||||
assert "profiles : multi agents" not in content
|
||||
|
|
|
|||
61
tests/hermes_cli/test_gateway_platform_gating.py
Normal file
61
tests/hermes_cli/test_gateway_platform_gating.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
"""Host-specific gating in ``hermes_cli.gateway._all_platforms()``.
|
||||
|
||||
Some messaging platforms can't function on every host. The gate lives
|
||||
in one place — ``_all_platforms()`` — so the setup wizard, the curses
|
||||
gateway-config menu, and any future picker all see the same filtered
|
||||
list.
|
||||
|
||||
Currently:
|
||||
- Matrix is hidden on Windows. The ``[matrix]`` extra pulls
|
||||
``mautrix[encryption]`` -> ``python-olm``, which has no Windows wheel
|
||||
and needs ``make`` + libolm to build from sdist. There's no native
|
||||
Windows path that works.
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
class TestMatrixHiddenOnWindows:
|
||||
def test_matrix_present_on_linux(self, monkeypatch):
|
||||
"""Sanity: matrix is still in the picker on Linux/macOS."""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
monkeypatch.setattr(gateway_mod.sys, "platform", "linux")
|
||||
platforms = gateway_mod._all_platforms()
|
||||
keys = {p["key"] for p in platforms}
|
||||
assert "matrix" in keys, "matrix must be available on Linux"
|
||||
|
||||
def test_matrix_present_on_macos(self, monkeypatch):
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
monkeypatch.setattr(gateway_mod.sys, "platform", "darwin")
|
||||
platforms = gateway_mod._all_platforms()
|
||||
keys = {p["key"] for p in platforms}
|
||||
assert "matrix" in keys, "matrix must be available on macOS"
|
||||
|
||||
def test_matrix_hidden_on_windows(self, monkeypatch):
|
||||
"""The actual gate: matrix must NOT appear on Windows."""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
|
||||
platforms = gateway_mod._all_platforms()
|
||||
keys = {p["key"] for p in platforms}
|
||||
assert "matrix" not in keys, (
|
||||
"matrix must be hidden on Windows — python-olm has no "
|
||||
"Windows wheel and no native build path"
|
||||
)
|
||||
|
||||
def test_other_platforms_unaffected_on_windows(self, monkeypatch):
|
||||
"""Gating must only drop matrix, not collateral damage."""
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
monkeypatch.setattr(gateway_mod.sys, "platform", "win32")
|
||||
platforms = gateway_mod._all_platforms()
|
||||
keys = {p["key"] for p in platforms}
|
||||
# A representative sample of platforms that have no Windows
|
||||
# blockers — picker should still surface them.
|
||||
for must_have in ("telegram", "discord", "slack", "mattermost"):
|
||||
assert must_have in keys, (
|
||||
f"{must_have} disappeared from Windows picker — gate is "
|
||||
"over-filtering"
|
||||
)
|
||||
|
|
@ -7,6 +7,7 @@ from hermes_cli.models import (
|
|||
is_nous_free_tier, partition_nous_models_by_tier,
|
||||
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
|
||||
union_with_portal_free_recommendations,
|
||||
union_with_portal_paid_recommendations,
|
||||
)
|
||||
import hermes_cli.models as _models_mod
|
||||
|
||||
|
|
@ -506,6 +507,147 @@ class TestUnionWithPortalFreeRecommendations:
|
|||
assert p["qwen/qwen3.6-plus"] == self._FREE
|
||||
|
||||
|
||||
class TestUnionWithPortalPaidRecommendations:
|
||||
"""Tests for union_with_portal_paid_recommendations.
|
||||
|
||||
Mirror of TestUnionWithPortalFreeRecommendations: the Portal's
|
||||
paidRecommendedModels endpoint is the source of truth for what's a
|
||||
blessed paid model *right now*. The in-repo curated list and
|
||||
docs-hosted manifest can lag — this helper guarantees newly-launched
|
||||
paid models surface in the picker for paid-tier users without a CLI
|
||||
release.
|
||||
"""
|
||||
|
||||
_PAID = {"prompt": "0.000003", "completion": "0.000015"}
|
||||
_FREE = {"prompt": "0", "completion": "0"}
|
||||
|
||||
def _payload(self, paid_models: list[str]) -> dict:
|
||||
return {
|
||||
"paidRecommendedModels": [
|
||||
{"modelName": mid, "displayName": mid} for mid in paid_models
|
||||
],
|
||||
}
|
||||
|
||||
def test_adds_portal_paid_model_missing_from_curated(self):
|
||||
"""A Portal-advertised paid model not in curated is prepended."""
|
||||
curated = ["anthropic/claude-opus-4.6"]
|
||||
pricing = {"anthropic/claude-opus-4.6": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["openai/gpt-5.4"]),
|
||||
):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
|
||||
assert ids[0] == "openai/gpt-5.4" # prepended
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
# Existing pricing untouched
|
||||
assert p["anthropic/claude-opus-4.6"] == self._PAID
|
||||
|
||||
def test_does_not_synthesize_pricing_for_paid_models(self):
|
||||
"""Paid recommendations missing from live pricing get no synthetic entry.
|
||||
|
||||
Synthesizing zero pricing (like the free helper does) would mislead
|
||||
:func:`partition_nous_models_by_tier` into treating them as free;
|
||||
synthesizing a non-zero placeholder would lie to the user. The
|
||||
right thing is to leave pricing absent so the picker shows a blank
|
||||
column until the live pricing endpoint catches up.
|
||||
"""
|
||||
curated = ["anthropic/claude-opus-4.6"]
|
||||
pricing = {"anthropic/claude-opus-4.6": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["openai/gpt-5.4"]),
|
||||
):
|
||||
_, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
|
||||
assert "openai/gpt-5.4" not in p
|
||||
assert p["anthropic/claude-opus-4.6"] == self._PAID
|
||||
|
||||
def test_does_not_duplicate_curated_entries(self):
|
||||
"""A Portal paid model already in curated is not duplicated."""
|
||||
curated = ["openai/gpt-5.4", "anthropic/claude-opus-4.6"]
|
||||
pricing = {
|
||||
"openai/gpt-5.4": self._PAID,
|
||||
"anthropic/claude-opus-4.6": self._PAID,
|
||||
}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["openai/gpt-5.4"]),
|
||||
):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_empty_payload_returns_inputs_unchanged(self):
|
||||
"""Empty Portal response leaves curated + pricing untouched."""
|
||||
curated = ["a", "b"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch("hermes_cli.models.fetch_nous_recommended_models", return_value={}):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_missing_paidRecommendedModels_key(self):
|
||||
"""Portal payload without paidRecommendedModels degrades gracefully."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value={"freeRecommendedModels": [{"modelName": "x"}]},
|
||||
):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_fetch_failure_returns_inputs(self):
|
||||
"""Network failures don't blow up the picker."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
side_effect=RuntimeError("network down"),
|
||||
):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
assert ids == curated
|
||||
assert p == pricing
|
||||
|
||||
def test_invalid_entries_skipped(self):
|
||||
"""Non-dict / missing-modelName entries are filtered out."""
|
||||
curated = ["a"]
|
||||
pricing = {"a": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value={
|
||||
"paidRecommendedModels": [
|
||||
"not-a-dict",
|
||||
{"displayName": "no-modelName"},
|
||||
{"modelName": ""},
|
||||
{"modelName": "openai/gpt-5.4"},
|
||||
]
|
||||
},
|
||||
):
|
||||
ids, p = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
assert ids == ["openai/gpt-5.4", "a"]
|
||||
# No synthetic entry — pricing is untouched.
|
||||
assert "openai/gpt-5.4" not in p
|
||||
|
||||
def test_preserves_relative_order_of_new_paid_models(self):
|
||||
"""Multiple new paid models are prepended in payload order."""
|
||||
curated = ["anthropic/claude-opus-4.6"]
|
||||
pricing = {"anthropic/claude-opus-4.6": self._PAID}
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_nous_recommended_models",
|
||||
return_value=self._payload(["openai/gpt-5.4", "openai/gpt-5.5"]),
|
||||
):
|
||||
ids, _ = union_with_portal_paid_recommendations(curated, pricing, "")
|
||||
assert ids == [
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.5",
|
||||
"anthropic/claude-opus-4.6",
|
||||
]
|
||||
|
||||
|
||||
class TestCheckNousFreeTierCache:
|
||||
"""Tests for the TTL cache on check_nous_free_tier()."""
|
||||
|
||||
|
|
|
|||
|
|
@ -2285,3 +2285,39 @@ def test_minimax_oauth_runtime_uses_inference_base_url(monkeypatch):
|
|||
resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
|
||||
|
||||
assert MINIMAX_OAUTH_CN_INFERENCE.rstrip("/") in resolved["base_url"]
|
||||
|
||||
|
||||
def test_minimax_oauth_pool_forces_anthropic_messages_despite_stale_config(monkeypatch):
|
||||
"""A pooled MiniMax OAuth token must not inherit stale chat_completions config."""
|
||||
|
||||
class _Entry:
|
||||
access_token = "oauth-token"
|
||||
source = "manual:minimax_oauth"
|
||||
base_url = "https://api.minimax.io/anthropic"
|
||||
|
||||
class _Pool:
|
||||
def has_credentials(self):
|
||||
return True
|
||||
|
||||
def select(self):
|
||||
return _Entry()
|
||||
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax-oauth")
|
||||
monkeypatch.setattr(
|
||||
rp,
|
||||
"_get_model_config",
|
||||
lambda: {
|
||||
"provider": "minimax-oauth",
|
||||
"default": "MiniMax-M2.7",
|
||||
"api_mode": "chat_completions",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
|
||||
monkeypatch.setattr(rp, "_resolve_named_custom_runtime", lambda **k: None)
|
||||
monkeypatch.setattr(rp, "_resolve_explicit_runtime", lambda **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax-oauth")
|
||||
|
||||
assert resolved["provider"] == "minimax-oauth"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
assert resolved["base_url"] == "https://api.minimax.io/anthropic"
|
||||
|
|
|
|||
330
tests/hermes_cli/test_security_advisories.py
Normal file
330
tests/hermes_cli/test_security_advisories.py
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
"""Tests for hermes_cli.security_advisories.
|
||||
|
||||
The advisory module is the user-facing detection / remediation surface
|
||||
for supply-chain attacks (e.g. the Mini Shai-Hulud worm of May 2026 that
|
||||
poisoned mistralai 2.4.6 on PyPI). These tests exercise the public API in
|
||||
isolation — no real package metadata, no real config, no real cache.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
import pytest
|
||||
|
||||
import hermes_cli.security_advisories as adv
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_advisory() -> adv.Advisory:
|
||||
"""A self-contained Advisory used across tests."""
|
||||
return adv.Advisory(
|
||||
id="test-advisory-2026-99",
|
||||
title="Test advisory",
|
||||
summary="Pretend this package has been compromised.",
|
||||
url="https://example.com/advisory",
|
||||
compromised=(
|
||||
("fake-malicious-pkg", frozenset({"6.6.6"})),
|
||||
),
|
||||
remediation=(
|
||||
"pip uninstall -y fake-malicious-pkg",
|
||||
"Rotate any credentials that may have been exposed.",
|
||||
),
|
||||
published="2026-01-01",
|
||||
severity="critical",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Redirect HERMES_HOME so banner cache and config writes are sandboxed."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
(home / "cache").mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return home
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patched_version(monkeypatch: pytest.MonkeyPatch) -> Iterator[dict[str, str]]:
|
||||
"""Override _installed_version with a controllable lookup table."""
|
||||
table: dict[str, str] = {}
|
||||
monkeypatch.setattr(adv, "_installed_version", lambda pkg: table.get(pkg))
|
||||
yield table
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# detect_compromised
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDetectCompromised:
|
||||
def test_no_match_returns_empty_list(self, fake_advisory, patched_version):
|
||||
# No matching package installed.
|
||||
hits = adv.detect_compromised(advisories=[fake_advisory])
|
||||
assert hits == []
|
||||
|
||||
def test_exact_version_match(self, fake_advisory, patched_version):
|
||||
patched_version["fake-malicious-pkg"] = "6.6.6"
|
||||
hits = adv.detect_compromised(advisories=[fake_advisory])
|
||||
assert len(hits) == 1
|
||||
assert hits[0].advisory.id == fake_advisory.id
|
||||
assert hits[0].package == "fake-malicious-pkg"
|
||||
assert hits[0].installed_version == "6.6.6"
|
||||
|
||||
def test_safe_version_does_not_match(self, fake_advisory, patched_version):
|
||||
# Package is installed but the version is not in the compromised set.
|
||||
patched_version["fake-malicious-pkg"] = "6.6.5"
|
||||
hits = adv.detect_compromised(advisories=[fake_advisory])
|
||||
assert hits == []
|
||||
|
||||
def test_empty_compromised_set_matches_any_version(
|
||||
self, patched_version
|
||||
):
|
||||
# An advisory with an empty version set is a "any version is suspect"
|
||||
# wildcard — used when an entire maintainer namespace is owned.
|
||||
wildcard = adv.Advisory(
|
||||
id="wildcard",
|
||||
title="Whole namespace owned",
|
||||
summary="x",
|
||||
url="x",
|
||||
compromised=(("evil-namespace", frozenset()),),
|
||||
remediation=("uninstall it",),
|
||||
)
|
||||
patched_version["evil-namespace"] = "0.0.1"
|
||||
hits = adv.detect_compromised(advisories=[wildcard])
|
||||
assert len(hits) == 1
|
||||
assert hits[0].installed_version == "0.0.1"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Acknowledgement persistence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAck:
|
||||
def test_get_acked_ids_empty_when_no_config(self, monkeypatch):
|
||||
# load_config raises → returns empty set, doesn't crash.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: (_ for _ in ()).throw(RuntimeError("boom")),
|
||||
)
|
||||
assert adv.get_acked_ids() == set()
|
||||
|
||||
def test_filter_unacked_strips_dismissed(self, fake_advisory, monkeypatch):
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
|
||||
assert adv.filter_unacked([hit]) == []
|
||||
|
||||
def test_filter_unacked_passes_through_unknown(
|
||||
self, fake_advisory, monkeypatch
|
||||
):
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
assert adv.filter_unacked([hit]) == [hit]
|
||||
|
||||
def test_ack_advisory_persists_id(self, isolated_home, monkeypatch):
|
||||
# Stub the config layer end-to-end with a tiny in-memory store so we
|
||||
# don't depend on the full hermes_cli.config bootstrap.
|
||||
store: dict = {"security": {}}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config", lambda: store
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.save_config",
|
||||
lambda cfg: store.update(cfg) or None,
|
||||
)
|
||||
assert adv.ack_advisory("test-advisory-2026-99") is True
|
||||
assert "test-advisory-2026-99" in store["security"]["acked_advisories"]
|
||||
# Idempotent.
|
||||
adv.ack_advisory("test-advisory-2026-99")
|
||||
assert (
|
||||
store["security"]["acked_advisories"].count("test-advisory-2026-99")
|
||||
== 1
|
||||
)
|
||||
|
||||
def test_ack_advisory_rejects_blank(self, isolated_home):
|
||||
assert adv.ack_advisory("") is False
|
||||
assert adv.ack_advisory(" ") is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Banner cache rate limiting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBannerCache:
|
||||
def test_first_call_returns_due_hits(
|
||||
self, fake_advisory, isolated_home, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
due = adv.hits_due_for_banner([hit])
|
||||
assert due == [hit]
|
||||
|
||||
def test_second_call_within_window_suppresses(
|
||||
self, fake_advisory, isolated_home, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
adv.hits_due_for_banner([hit])
|
||||
# Same banner inside repeat window → suppressed.
|
||||
again = adv.hits_due_for_banner([hit])
|
||||
assert again == []
|
||||
|
||||
def test_call_after_window_re_banners(
|
||||
self, fake_advisory, isolated_home, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
adv.hits_due_for_banner([hit])
|
||||
# Backdate the cache so it looks like the banner was shown more
|
||||
# than 24h ago — should re-banner.
|
||||
cache_path = adv._banner_cache_path()
|
||||
assert cache_path is not None
|
||||
old_lines = cache_path.read_text(encoding="utf-8").splitlines()
|
||||
backdated = []
|
||||
for line in old_lines:
|
||||
parts = line.split(None, 1)
|
||||
if len(parts) == 2:
|
||||
backdated.append(f"{parts[0]} {time.time() - 48 * 3600}")
|
||||
cache_path.write_text("\n".join(backdated) + "\n", encoding="utf-8")
|
||||
again = adv.hits_due_for_banner([hit])
|
||||
assert again == [hit]
|
||||
|
||||
def test_acked_hits_never_banner(
|
||||
self, fake_advisory, isolated_home, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: {fake_advisory.id})
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
assert adv.hits_due_for_banner([hit]) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRendering:
|
||||
def test_short_banner_lines_includes_id_and_version(self, fake_advisory):
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
lines = adv.short_banner_lines([hit])
|
||||
joined = "\n".join(lines)
|
||||
assert fake_advisory.id in joined
|
||||
assert fake_advisory.title in joined
|
||||
assert "fake-malicious-pkg==6.6.6" in joined
|
||||
assert "hermes doctor" in joined
|
||||
|
||||
def test_full_remediation_text_contains_all_steps(self, fake_advisory):
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
body = "\n".join(adv.full_remediation_text(hit))
|
||||
# All remediation steps must be present.
|
||||
for step in fake_advisory.remediation:
|
||||
assert step in body
|
||||
assert fake_advisory.url in body
|
||||
assert fake_advisory.summary in body
|
||||
|
||||
def test_render_doctor_section_clean_state(self):
|
||||
# No hits → success message, has_problems=False.
|
||||
has_problems, lines = adv.render_doctor_section([])
|
||||
assert has_problems is False
|
||||
assert any("No active security advisories" in line for line in lines)
|
||||
|
||||
def test_render_doctor_section_with_unacked_hit(
|
||||
self, fake_advisory, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
has_problems, lines = adv.render_doctor_section([hit])
|
||||
assert has_problems is True
|
||||
body = "\n".join(lines)
|
||||
assert fake_advisory.title in body
|
||||
|
||||
def test_gateway_log_message_singular(self, fake_advisory, monkeypatch):
|
||||
monkeypatch.setattr(adv, "get_acked_ids", lambda: set())
|
||||
hit = adv.AdvisoryHit(
|
||||
advisory=fake_advisory,
|
||||
package="fake-malicious-pkg",
|
||||
installed_version="6.6.6",
|
||||
)
|
||||
msg = adv.gateway_log_message([hit])
|
||||
assert msg is not None
|
||||
assert fake_advisory.id in msg
|
||||
assert "fake-malicious-pkg==6.6.6" in msg
|
||||
|
||||
def test_gateway_log_message_returns_none_for_no_hits(self):
|
||||
assert adv.gateway_log_message([]) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Real catalog smoke test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRealCatalog:
|
||||
def test_advisories_well_formed(self):
|
||||
"""Every shipped advisory must be self-consistent.
|
||||
|
||||
Catches data-entry mistakes (empty IDs, missing remediation, bad
|
||||
compromised tuples) before they ship.
|
||||
"""
|
||||
seen_ids: set[str] = set()
|
||||
for advisory in adv.ADVISORIES:
|
||||
assert advisory.id, "advisory has empty id"
|
||||
assert advisory.id not in seen_ids, f"duplicate id {advisory.id}"
|
||||
seen_ids.add(advisory.id)
|
||||
assert advisory.title, f"{advisory.id}: empty title"
|
||||
assert advisory.summary, f"{advisory.id}: empty summary"
|
||||
assert advisory.remediation, f"{advisory.id}: empty remediation"
|
||||
assert advisory.url.startswith("http"), \
|
||||
f"{advisory.id}: bad url {advisory.url!r}"
|
||||
assert advisory.compromised, \
|
||||
f"{advisory.id}: empty compromised tuple"
|
||||
for pkg, versions in advisory.compromised:
|
||||
assert pkg, f"{advisory.id}: empty package name"
|
||||
assert isinstance(versions, frozenset), \
|
||||
f"{advisory.id}: versions must be frozenset"
|
||||
|
|
@ -6,6 +6,7 @@ rather than leaving zombie processes or telling users to manually restart
|
|||
when launchd will auto-respawn.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
|
@ -1068,13 +1069,18 @@ class TestFindGatewayPidsExclude:
|
|||
|
||||
def test_excludes_specified_pids(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
||||
# Bypass /proc scan so the subprocess (ps) fallback is used
|
||||
_real_isdir = os.path.isdir
|
||||
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
|
||||
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
|
||||
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout=(
|
||||
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
||||
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
||||
"100 python gateway/run.py\n"
|
||||
"200 python gateway/run.py\n"
|
||||
),
|
||||
stderr="",
|
||||
)
|
||||
|
|
@ -1082,19 +1088,24 @@ class TestFindGatewayPidsExclude:
|
|||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr("os.getpid", lambda: 999)
|
||||
|
||||
pids = gateway_cli.find_gateway_pids(exclude_pids={100})
|
||||
pids = gateway_cli.find_gateway_pids(exclude_pids={100}, all_profiles=True)
|
||||
assert 100 not in pids
|
||||
assert 200 in pids
|
||||
|
||||
def test_no_exclude_returns_all(self, monkeypatch):
|
||||
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
||||
# Bypass /proc scan so the subprocess (ps) fallback is used
|
||||
_real_isdir = os.path.isdir
|
||||
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
|
||||
monkeypatch.setattr(gateway_cli, "_get_service_pids", lambda: set())
|
||||
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 0,
|
||||
stdout=(
|
||||
"user 100 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
||||
"user 200 0.0 0.0 0 0 ? S 00:00 0:00 python gateway/run.py\n"
|
||||
"100 python gateway/run.py\n"
|
||||
"200 python gateway/run.py\n"
|
||||
),
|
||||
stderr="",
|
||||
)
|
||||
|
|
@ -1102,7 +1113,7 @@ class TestFindGatewayPidsExclude:
|
|||
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
|
||||
monkeypatch.setattr("os.getpid", lambda: 999)
|
||||
|
||||
pids = gateway_cli.find_gateway_pids()
|
||||
pids = gateway_cli.find_gateway_pids(all_profiles=True)
|
||||
assert 100 in pids
|
||||
assert 200 in pids
|
||||
|
||||
|
|
@ -1111,6 +1122,10 @@ class TestFindGatewayPidsExclude:
|
|||
profile_dir.mkdir(parents=True)
|
||||
monkeypatch.setattr(gateway_cli, "is_windows", lambda: False)
|
||||
monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
|
||||
# Bypass /proc scan so the subprocess (ps) fallback is used
|
||||
_real_isdir = os.path.isdir
|
||||
monkeypatch.setattr("os.path.isdir", lambda p: False if p == "/proc" else _real_isdir(p))
|
||||
monkeypatch.setattr(gateway_cli, "_get_ancestor_pids", lambda: {999})
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ The fix:
|
|||
|
||||
These tests pin the corrected behavior.
|
||||
"""
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
|
@ -67,6 +69,53 @@ def test_minimax_login_does_not_launch_anthropic_flow():
|
|||
assert body["expires_in"] == 600
|
||||
|
||||
|
||||
def test_minimax_dashboard_poller_accepts_absolute_ms_expired_in():
|
||||
"""Dashboard MiniMax completion must accept unix-ms token expiry values."""
|
||||
from hermes_cli import web_server as ws
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
abs_ms = int((now.timestamp() + 1800) * 1000)
|
||||
session_id = "minimax-absolute-ms-test"
|
||||
ws._oauth_sessions[session_id] = {
|
||||
"session_id": session_id,
|
||||
"provider": "minimax-oauth",
|
||||
"flow": "device_code",
|
||||
"created_at": time.time(),
|
||||
"status": "pending",
|
||||
"error_message": None,
|
||||
"portal_base_url": "https://api.minimax.io",
|
||||
"client_id": "client-id",
|
||||
"user_code": "ABCD-1234",
|
||||
"code_verifier": "verifier",
|
||||
"interval_ms": 2000,
|
||||
"expired_in_raw": abs_ms,
|
||||
"region": "global",
|
||||
}
|
||||
captured_state = {}
|
||||
|
||||
try:
|
||||
with patch(
|
||||
"hermes_cli.auth._minimax_poll_token",
|
||||
return_value={
|
||||
"status": "success",
|
||||
"access_token": "access",
|
||||
"refresh_token": "refresh",
|
||||
"expired_in": abs_ms,
|
||||
"token_type": "Bearer",
|
||||
},
|
||||
), patch(
|
||||
"hermes_cli.auth._minimax_save_auth_state",
|
||||
side_effect=lambda state: captured_state.update(state),
|
||||
):
|
||||
ws._minimax_poller(session_id)
|
||||
finally:
|
||||
ws._oauth_sessions.pop(session_id, None)
|
||||
|
||||
assert captured_state["access_token"] == "access"
|
||||
assert 1790 <= captured_state["expires_in"] <= 1810
|
||||
assert datetime.fromisoformat(captured_state["expires_at"]).year < 9999
|
||||
|
||||
|
||||
def test_anthropic_pkce_branch_still_works():
|
||||
"""Sanity: the dispatcher tightening doesn't break the legitimate Anthropic PKCE path."""
|
||||
fake_anthropic_response = {
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ class TestClientCacheBoundedGrowth:
|
|||
_get_cached_client,
|
||||
)
|
||||
|
||||
key = ("test_replace", True, "", "", "", (), False)
|
||||
key = ("test_replace", True, "", "", "", (), False, "")
|
||||
|
||||
# Simulate a stale entry from a closed loop
|
||||
old_loop = asyncio.new_event_loop()
|
||||
|
|
|
|||
308
tests/run_agent/test_file_mutation_verifier.py
Normal file
308
tests/run_agent/test_file_mutation_verifier.py
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
"""Tests for the per-turn file-mutation verifier footer.
|
||||
|
||||
Covers the three moving pieces:
|
||||
|
||||
1. ``_extract_file_mutation_targets`` — pulls file paths from write_file /
|
||||
patch (replace + V4A) tool-call argument dicts.
|
||||
2. ``AIAgent._record_file_mutation_result`` — builds the per-turn state
|
||||
dict, removing entries when a later success supersedes an earlier
|
||||
failure for the same path.
|
||||
3. ``AIAgent._format_file_mutation_failure_footer`` — renders the dict
|
||||
as a user-visible advisory.
|
||||
|
||||
Regression target: the "Ben Eng llm-wiki" session where grok-4.1-fast
|
||||
batched parallel patches, half failed, and the model summarised the
|
||||
turn claiming every file was edited. This verifier makes over-claiming
|
||||
structurally impossible past the model: the user always sees the real
|
||||
list of files that did NOT change.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from run_agent import (
|
||||
AIAgent,
|
||||
_FILE_MUTATING_TOOLS,
|
||||
_extract_error_preview,
|
||||
_extract_file_mutation_targets,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_file_mutation_targets
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExtractFileMutationTargets:
|
||||
def test_non_mutating_tool_returns_empty(self):
|
||||
assert _extract_file_mutation_targets("read_file", {"path": "/x"}) == []
|
||||
assert _extract_file_mutation_targets("terminal", {"command": "ls"}) == []
|
||||
|
||||
def test_write_file_returns_single_path(self):
|
||||
out = _extract_file_mutation_targets("write_file", {"path": "/tmp/a.md", "content": "x"})
|
||||
assert out == ["/tmp/a.md"]
|
||||
|
||||
def test_write_file_missing_path_returns_empty(self):
|
||||
assert _extract_file_mutation_targets("write_file", {"content": "x"}) == []
|
||||
|
||||
def test_patch_replace_mode_returns_path(self):
|
||||
args = {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
|
||||
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
|
||||
|
||||
def test_patch_default_mode_is_replace(self):
|
||||
# Mode omitted — schema default is ``replace``.
|
||||
args = {"path": "/tmp/a.md", "old_string": "x", "new_string": "y"}
|
||||
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
|
||||
|
||||
def test_patch_v4a_single_file(self):
|
||||
body = (
|
||||
"*** Begin Patch\n"
|
||||
"*** Update File: /tmp/a.md\n"
|
||||
"@@ ctx @@\n"
|
||||
" line1\n"
|
||||
"-bad\n"
|
||||
"+good\n"
|
||||
"*** End Patch\n"
|
||||
)
|
||||
args = {"mode": "patch", "patch": body}
|
||||
assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"]
|
||||
|
||||
def test_patch_v4a_multi_file(self):
|
||||
body = (
|
||||
"*** Begin Patch\n"
|
||||
"*** Update File: /tmp/a.md\n"
|
||||
"@@ @@\n-a\n+b\n"
|
||||
"*** Add File: /tmp/new.md\n"
|
||||
"+fresh\n"
|
||||
"*** Delete File: /tmp/old.md\n"
|
||||
"*** End Patch\n"
|
||||
)
|
||||
args = {"mode": "patch", "patch": body}
|
||||
paths = _extract_file_mutation_targets("patch", args)
|
||||
assert paths == ["/tmp/a.md", "/tmp/new.md", "/tmp/old.md"]
|
||||
|
||||
def test_patch_v4a_missing_body_returns_empty(self):
|
||||
assert _extract_file_mutation_targets("patch", {"mode": "patch"}) == []
|
||||
assert _extract_file_mutation_targets("patch", {"mode": "patch", "patch": ""}) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_error_preview
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestExtractErrorPreview:
|
||||
def test_json_error_field_preferred(self):
|
||||
raw = json.dumps({"success": False, "error": "Could not find old_string in /tmp/x"})
|
||||
assert _extract_error_preview(raw) == "Could not find old_string in /tmp/x"
|
||||
|
||||
def test_plain_string_falls_through(self):
|
||||
assert _extract_error_preview("Error executing tool: boom") == "Error executing tool: boom"
|
||||
|
||||
def test_long_preview_truncated(self):
|
||||
long = "x" * 500
|
||||
out = _extract_error_preview(long, max_len=50)
|
||||
assert len(out) <= 50
|
||||
assert out.endswith("…")
|
||||
|
||||
def test_none_returns_empty(self):
|
||||
assert _extract_error_preview(None) == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _record_file_mutation_result — state transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _bare_agent() -> AIAgent:
|
||||
"""Skip __init__ and only attach the per-turn state dict.
|
||||
|
||||
AIAgent.__init__ takes ~60 parameters and touches network, auth, and
|
||||
the filesystem. For these tests we only need the two methods —
|
||||
``_record_file_mutation_result`` and ``_format_file_mutation_failure_footer``.
|
||||
Using ``object.__new__`` mirrors the gateway-test pattern documented in
|
||||
the agent pitfalls list.
|
||||
"""
|
||||
agent = object.__new__(AIAgent)
|
||||
agent._turn_failed_file_mutations = {}
|
||||
return agent
|
||||
|
||||
|
||||
class TestRecordFileMutationResult:
|
||||
def test_non_mutating_tool_ignored(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
"read_file", {"path": "/tmp/x"}, "{}", is_error=True,
|
||||
)
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
def test_failure_recorded(self):
|
||||
agent = _bare_agent()
|
||||
result = json.dumps({"success": False, "error": "Could not find old_string"})
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
|
||||
result, is_error=True,
|
||||
)
|
||||
state = agent._turn_failed_file_mutations
|
||||
assert "/tmp/a.md" in state
|
||||
assert state["/tmp/a.md"]["tool"] == "patch"
|
||||
assert "Could not find old_string" in state["/tmp/a.md"]["error_preview"]
|
||||
|
||||
def test_success_removes_prior_failure(self):
|
||||
agent = _bare_agent()
|
||||
# First attempt fails
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"},
|
||||
json.dumps({"error": "not found"}), is_error=True,
|
||||
)
|
||||
assert "/tmp/a.md" in agent._turn_failed_file_mutations
|
||||
# Second attempt with corrected old_string succeeds
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "real", "new_string": "fixed"},
|
||||
json.dumps({"success": True, "diff": "..."}), is_error=False,
|
||||
)
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
def test_repeated_failure_keeps_first_error(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v1", "new_string": "y"},
|
||||
json.dumps({"error": "first error"}), is_error=True,
|
||||
)
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v2", "new_string": "y"},
|
||||
json.dumps({"error": "second error"}), is_error=True,
|
||||
)
|
||||
# Keep the original error — swapping to the latest would obscure
|
||||
# the initial root cause.
|
||||
assert "first error" in agent._turn_failed_file_mutations["/tmp/a.md"]["error_preview"]
|
||||
|
||||
def test_v4a_multi_file_all_tracked(self):
|
||||
agent = _bare_agent()
|
||||
body = (
|
||||
"*** Begin Patch\n"
|
||||
"*** Update File: /tmp/a.md\n@@ @@\n-a\n+b\n"
|
||||
"*** Update File: /tmp/b.md\n@@ @@\n-a\n+b\n"
|
||||
"*** End Patch\n"
|
||||
)
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "patch", "patch": body},
|
||||
json.dumps({"error": "parse failure"}), is_error=True,
|
||||
)
|
||||
assert set(agent._turn_failed_file_mutations) == {"/tmp/a.md", "/tmp/b.md"}
|
||||
|
||||
def test_no_state_dict_silent_noop(self):
|
||||
"""When called outside run_conversation the state dict is absent.
|
||||
|
||||
The record helper must never raise — a tool dispatched from, say,
|
||||
a direct ``chat()`` call should not blow up the call site just
|
||||
because the verifier state hasn't been initialised.
|
||||
"""
|
||||
agent = object.__new__(AIAgent) # no state attached
|
||||
# Should not raise
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace", "path": "/tmp/a.md"},
|
||||
json.dumps({"error": "x"}), is_error=True,
|
||||
)
|
||||
|
||||
def test_missing_path_arg_recorded_nowhere(self):
|
||||
agent = _bare_agent()
|
||||
agent._record_file_mutation_result(
|
||||
"patch", {"mode": "replace"}, # no path
|
||||
json.dumps({"error": "path required"}), is_error=True,
|
||||
)
|
||||
# No path → nothing to key on, state stays empty. The per-turn
|
||||
# state is about file paths, not individual tool-call IDs.
|
||||
assert agent._turn_failed_file_mutations == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _format_file_mutation_failure_footer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFormatFooter:
|
||||
def test_empty_returns_empty_string(self):
|
||||
assert AIAgent._format_file_mutation_failure_footer({}) == ""
|
||||
|
||||
def test_single_failure(self):
|
||||
out = AIAgent._format_file_mutation_failure_footer(
|
||||
{"/tmp/a.md": {"tool": "patch", "error_preview": "Could not find old_string"}},
|
||||
)
|
||||
assert "1 file(s) were NOT modified" in out
|
||||
assert "/tmp/a.md" in out
|
||||
assert "Could not find old_string" in out
|
||||
assert "git status" in out # user-actionable hint
|
||||
|
||||
def test_truncation_at_10_entries(self):
|
||||
failed = {
|
||||
f"/tmp/f{i}.md": {"tool": "patch", "error_preview": "err"}
|
||||
for i in range(15)
|
||||
}
|
||||
out = AIAgent._format_file_mutation_failure_footer(failed)
|
||||
assert "15 file(s) were NOT modified" in out
|
||||
assert "… and 5 more" in out
|
||||
# Ten file bullets + header + "and X more" line
|
||||
lines = out.split("\n")
|
||||
bullet_lines = [ln for ln in lines if ln.lstrip().startswith("•")]
|
||||
assert len(bullet_lines) == 11 # 10 shown + 1 summary
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _file_mutation_verifier_enabled — env + config precedence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestVerifierEnabled:
|
||||
def test_default_is_enabled(self, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
|
||||
agent = _bare_agent()
|
||||
# With no env and no config present, safe default is True.
|
||||
# load_config may surface a user config.yaml in some envs — stub it.
|
||||
import hermes_cli.config as _cfg_mod
|
||||
monkeypatch.setattr(_cfg_mod, "load_config", lambda: {})
|
||||
assert agent._file_mutation_verifier_enabled() is True
|
||||
|
||||
@pytest.mark.parametrize("value", ["0", "false", "FALSE", "no", "off"])
|
||||
def test_env_disables(self, monkeypatch, value):
|
||||
monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", value)
|
||||
agent = _bare_agent()
|
||||
assert agent._file_mutation_verifier_enabled() is False
|
||||
|
||||
def test_env_enables_over_config(self, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", "1")
|
||||
import hermes_cli.config as _cfg_mod
|
||||
monkeypatch.setattr(
|
||||
_cfg_mod, "load_config",
|
||||
lambda: {"display": {"file_mutation_verifier": False}},
|
||||
)
|
||||
agent = _bare_agent()
|
||||
assert agent._file_mutation_verifier_enabled() is True
|
||||
|
||||
def test_config_disables_when_no_env(self, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False)
|
||||
import hermes_cli.config as _cfg_mod
|
||||
monkeypatch.setattr(
|
||||
_cfg_mod, "load_config",
|
||||
lambda: {"display": {"file_mutation_verifier": False}},
|
||||
)
|
||||
agent = _bare_agent()
|
||||
assert agent._file_mutation_verifier_enabled() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level invariants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_file_mutating_tools_set_shape():
|
||||
"""write_file + patch are the only tools the verifier tracks.
|
||||
|
||||
Guard rail: if someone adds a third file-mutating tool (e.g. a new
|
||||
``append_file``), they should also audit whether the verifier should
|
||||
track it. This test fails loudly on unilateral additions.
|
||||
"""
|
||||
assert _FILE_MUTATING_TOOLS == frozenset({"write_file", "patch"})
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue