From c9a3f36f5656f1a3d543e5b6be1fd05b98783c53 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 00:04:36 +0530 Subject: [PATCH 001/171] feat: add video_analyze tool for native video understanding (#19301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add video_analyze tool for native video understanding Adds a video_analyze tool that sends video files to multimodal LLMs (e.g. Gemini) for analysis via the OpenRouter-compatible video_url content type. Mirrors vision_analyze in structure, error handling, and registration pattern. Key design: - Base64 encodes entire video (no frame extraction, no ffmpeg dep) - Uses 'video_url' content block type (OpenRouter standard) - Supports mp4, webm, mov, avi, mkv, mpeg formats - 50 MB hard cap, 20 MB warning threshold - 180s minimum timeout (videos take longer than images) - AUXILIARY_VIDEO_MODEL env override, falls back to AUXILIARY_VISION_MODEL - Same SSRF protection, retry logic, and cleanup as vision_analyze Default disabled: registered in 'video' toolset (not in _HERMES_CORE_TOOLS). Users opt in via: hermes tools enable video, or enabled_toolsets=['video']. * feat(video): add models.dev capability pre-check + CONFIGURABLE_TOOLSETS entry - Pre-checks model video capability via models.dev modalities.input before expensive base64 encoding. Fails early with helpful message suggesting video-capable alternatives (gemini, mimo-v2.5-pro). - Passes optimistically if model unknown or lookup fails. - Adds ModelInfo.supports_video_input() helper. - Adds 'video' to CONFIGURABLE_TOOLSETS and _DEFAULT_OFF_TOOLSETS so 'hermes tools enable video' works from CLI. - 8 new tests for the capability check (37 total). * refactor(video): remove models.dev capability pre-check Removes _check_video_model_capability and ModelInfo.supports_video_input. The vision_analyze tool doesn't pre-check image capability either — both tools rely on the same pattern: send request, handle API errors gracefully with categorized user-facing messages. The pre-check was inconsistent (only worked for some providers/models) so drop it for parity. * cleanup: compress comments, fix fragile timeout coupling - Replace _VISION_DOWNLOAD_TIMEOUT * 2 with hardcoded 60s (no silent breakage if vision timeout changes independently) - Strip verbose comments and redundant log lines throughout - No behavioral changes --- hermes_cli/tools_config.py | 3 +- tests/tools/test_video_analyze.py | 337 ++++++++++++++++++++++++++++ tools/vision_tools.py | 361 ++++++++++++++++++++++++++++++ toolsets.py | 6 + 4 files changed, 706 insertions(+), 1 deletion(-) create mode 100644 tests/tools/test_video_analyze.py diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index b3df18d9321..21439a28c13 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -56,6 +56,7 @@ CONFIGURABLE_TOOLSETS = [ ("file", "📁 File Operations", "read, write, patch, search"), ("code_execution", "⚡ Code Execution", "execute_code"), ("vision", "👁️ Vision / Image Analysis", "vision_analyze"), + ("video", "🎬 Video Analysis", "video_analyze (requires video-capable model)"), ("image_gen", "🎨 Image Generation", "image_generate"), ("moa", "🧠 Mixture of Agents", "mixture_of_agents"), ("tts", "🔊 Text-to-Speech", "text_to_speech"), @@ -78,7 +79,7 @@ CONFIGURABLE_TOOLSETS = [ # Toolsets that are OFF by default for new installs. # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled), # but the setup checklist won't pre-select them for first-time users. -_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"} +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin", "video"} # Platform-scoped toolsets: only appear in the `hermes tools` checklist for # these platforms, and only resolve/save for these platforms. A toolset diff --git a/tests/tools/test_video_analyze.py b/tests/tools/test_video_analyze.py new file mode 100644 index 00000000000..62987d96b20 --- /dev/null +++ b/tests/tools/test_video_analyze.py @@ -0,0 +1,337 @@ +"""Tests for video_analyze tool in tools/vision_tools.py.""" + +import asyncio +import json +import os +from pathlib import Path +from typing import Awaitable +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from tools.vision_tools import ( + _detect_video_mime_type, + _video_to_base64_data_url, + _handle_video_analyze, + _MAX_VIDEO_BASE64_BYTES, + _VIDEO_MIME_TYPES, + _VIDEO_SIZE_WARN_BYTES, + video_analyze_tool, + VIDEO_ANALYZE_SCHEMA, +) + + +# --------------------------------------------------------------------------- +# _detect_video_mime_type +# --------------------------------------------------------------------------- + + +class TestDetectVideoMimeType: + """Extension-based MIME detection for video files.""" + + def test_mp4(self, tmp_path): + p = tmp_path / "clip.mp4" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_webm(self, tmp_path): + p = tmp_path / "clip.webm" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/webm" + + def test_mov(self, tmp_path): + p = tmp_path / "clip.mov" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mov" + + def test_avi_fallback_mp4(self, tmp_path): + p = tmp_path / "clip.avi" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_mkv_fallback_mp4(self, tmp_path): + p = tmp_path / "clip.mkv" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_mpeg(self, tmp_path): + p = tmp_path / "clip.mpeg" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mpeg" + + def test_mpg(self, tmp_path): + p = tmp_path / "clip.mpg" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mpeg" + + def test_unsupported_extension(self, tmp_path): + p = tmp_path / "clip.flv" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) is None + + def test_case_insensitive(self, tmp_path): + p = tmp_path / "clip.MP4" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + +# --------------------------------------------------------------------------- +# _video_to_base64_data_url +# --------------------------------------------------------------------------- + + +class TestVideoToBase64DataUrl: + """Base64 encoding of video files.""" + + def test_produces_data_url(self, tmp_path): + p = tmp_path / "test.mp4" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p) + assert result.startswith("data:video/mp4;base64,") + + def test_custom_mime_type(self, tmp_path): + p = tmp_path / "test.webm" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p, mime_type="video/webm") + assert result.startswith("data:video/webm;base64,") + + def test_default_mime_for_unknown_ext(self, tmp_path): + p = tmp_path / "test.xyz" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p) + # Falls back to video/mp4 + assert result.startswith("data:video/mp4;base64,") + + +# --------------------------------------------------------------------------- +# Schema validation +# --------------------------------------------------------------------------- + + +class TestVideoAnalyzeSchema: + """Schema structure is correct.""" + + def test_schema_name(self): + assert VIDEO_ANALYZE_SCHEMA["name"] == "video_analyze" + + def test_schema_has_required_fields(self): + params = VIDEO_ANALYZE_SCHEMA["parameters"] + assert "video_url" in params["properties"] + assert "question" in params["properties"] + assert params["required"] == ["video_url", "question"] + + def test_schema_description_mentions_video(self): + assert "video" in VIDEO_ANALYZE_SCHEMA["description"].lower() + + +# --------------------------------------------------------------------------- +# _handle_video_analyze handler +# --------------------------------------------------------------------------- + + +class TestHandleVideoAnalyze: + """Tests for the registry handler wrapper.""" + + def test_returns_awaitable(self, tmp_path, monkeypatch): + video_file = tmp_path / "test.mp4" + video_file.write_bytes(b"\x00" * 100) + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "test"}) + result = _handle_video_analyze({"video_url": str(video_file), "question": "what is this?"}) + # Should return an awaitable (coroutine) + assert asyncio.iscoroutine(result) + # Clean up the unawaited coroutine + result.close() + + def test_uses_auxiliary_video_model_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "google/gemini-2.5-flash") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "other-model") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "ok"}) + asyncio.get_event_loop().run_until_complete( + _handle_video_analyze({"video_url": "/tmp/test.mp4", "question": "test"}) + ) + args = mock_tool.call_args[0] + assert args[2] == "google/gemini-2.5-flash" + + def test_falls_back_to_vision_model_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "google/gemini-flash") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "ok"}) + asyncio.get_event_loop().run_until_complete( + _handle_video_analyze({"video_url": "/tmp/test.mp4", "question": "test"}) + ) + args = mock_tool.call_args[0] + assert args[2] == "google/gemini-flash" + + +# --------------------------------------------------------------------------- +# video_analyze_tool — integration-style tests with mocked LLM +# --------------------------------------------------------------------------- + + +class TestVideoAnalyzeTool: + """Core video analysis function tests.""" + + def _run(self, coro): + return asyncio.get_event_loop().run_until_complete(coro) + + def test_local_file_success(self, tmp_path, monkeypatch): + """Analyze a local video file — happy path.""" + video = tmp_path / "demo.mp4" + video.write_bytes(b"\x00" * 1024) + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "A short video showing a demo." + + with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="A short video showing a demo."): + result = self._run(video_analyze_tool(str(video), "What is this?")) + + data = json.loads(result) + assert data["success"] is True + assert "demo" in data["analysis"].lower() + + def test_local_file_not_found(self, tmp_path): + """Non-existent file raises appropriate error.""" + result = self._run(video_analyze_tool("/nonexistent/video.mp4", "What?")) + data = json.loads(result) + assert data["success"] is False + assert "invalid video source" in data["analysis"].lower() + + def test_unsupported_format(self, tmp_path): + """Unsupported extension raises error.""" + video = tmp_path / "clip.flv" + video.write_bytes(b"\x00" * 100) + + result = self._run(video_analyze_tool(str(video), "What is this?")) + data = json.loads(result) + assert data["success"] is False + assert "unsupported video format" in data["analysis"].lower() + + def test_video_too_large(self, tmp_path, monkeypatch): + """Video exceeding max size is rejected.""" + video = tmp_path / "huge.mp4" + # Don't actually write 50MB — mock the stat + video.write_bytes(b"\x00" * 100) + + # Patch the base64 encoding to return something huge + with patch("tools.vision_tools._video_to_base64_data_url") as mock_encode: + mock_encode.return_value = "data:video/mp4;base64," + "A" * (_MAX_VIDEO_BASE64_BYTES + 1) + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is False + assert "too large" in data["analysis"].lower() + + def test_interrupt_check(self, tmp_path): + """Tool respects interrupt flag.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + with patch("tools.interrupt.is_interrupted", return_value=True): + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is False + + def test_empty_response_retries(self, tmp_path): + """Retries once on empty model response.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + call_count = 0 + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "Video analysis result." + + async def fake_llm(**kwargs): + nonlocal call_count + call_count += 1 + return mock_response + + with patch("tools.vision_tools.async_call_llm", side_effect=fake_llm): + with patch("tools.vision_tools.extract_content_or_reasoning", side_effect=["", "Video analysis result."]): + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is True + assert call_count == 2 # Initial call + retry + + def test_file_scheme_stripped(self, tmp_path): + """file:// prefix is stripped correctly.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "OK" + + with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="OK"): + result = self._run(video_analyze_tool(f"file://{video}", "What?")) + + data = json.loads(result) + assert data["success"] is True + + def test_api_message_format(self, tmp_path): + """Verify the message sent to LLM uses video_url content type.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + captured_kwargs = {} + + async def capture_llm(**kwargs): + captured_kwargs.update(kwargs) + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "OK" + return mock_response + + with patch("tools.vision_tools.async_call_llm", side_effect=capture_llm): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="OK"): + self._run(video_analyze_tool(str(video), "Describe this")) + + messages = captured_kwargs["messages"] + assert len(messages) == 1 + content = messages[0]["content"] + assert len(content) == 2 + assert content[0]["type"] == "text" + assert content[1]["type"] == "video_url" + assert "video_url" in content[1] + assert content[1]["video_url"]["url"].startswith("data:video/mp4;base64,") + + +# --------------------------------------------------------------------------- +# Toolset registration +# --------------------------------------------------------------------------- + + +class TestVideoToolsetRegistration: + """Verify the tool is registered correctly.""" + + def test_registered_in_video_toolset(self): + from tools.registry import registry + entry = registry.get_entry("video_analyze") + assert entry is not None + assert entry.toolset == "video" + assert entry.is_async is True + assert entry.emoji == "🎬" + + def test_not_in_core_tools(self): + """video_analyze should NOT be in _HERMES_CORE_TOOLS (default disabled).""" + from toolsets import _HERMES_CORE_TOOLS + assert "video_analyze" not in _HERMES_CORE_TOOLS + + def test_in_video_toolset_definition(self): + """Toolset 'video' should contain video_analyze.""" + from toolsets import TOOLSETS + assert "video" in TOOLSETS + assert "video_analyze" in TOOLSETS["video"]["tools"] diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 233b737272b..e7389e3efa1 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -801,3 +801,364 @@ registry.register( is_async=True, emoji="👁️", ) + + +# --------------------------------------------------------------------------- +# Video Analysis Tool +# --------------------------------------------------------------------------- + +# Extension → MIME. avi/mkv fall back to mp4. +_VIDEO_MIME_TYPES = { + ".mp4": "video/mp4", + ".webm": "video/webm", + ".mov": "video/mov", + ".avi": "video/mp4", + ".mkv": "video/mp4", + ".mpeg": "video/mpeg", + ".mpg": "video/mpeg", +} + +_MAX_VIDEO_BASE64_BYTES = 50 * 1024 * 1024 # 50 MB hard cap +_VIDEO_SIZE_WARN_BYTES = 20 * 1024 * 1024 + + +def _detect_video_mime_type(video_path: Path) -> Optional[str]: + """Return a video MIME type based on file extension, or None if unsupported.""" + ext = video_path.suffix.lower() + return _VIDEO_MIME_TYPES.get(ext) + + +def _video_to_base64_data_url(video_path: Path, mime_type: Optional[str] = None) -> str: + """Convert a video file to a base64-encoded data URL.""" + data = video_path.read_bytes() + encoded = base64.b64encode(data).decode("ascii") + mime = mime_type or _VIDEO_MIME_TYPES.get(video_path.suffix.lower(), "video/mp4") + return f"data:{mime};base64,{encoded}" + + +async def _download_video(video_url: str, destination: Path, max_retries: int = 3) -> Path: + """Download video from URL with SSRF protection and retry.""" + import asyncio + + destination.parent.mkdir(parents=True, exist_ok=True) + + async def _ssrf_redirect_guard(response): + if response.is_redirect and response.next_request: + redirect_url = str(response.next_request.url) + from tools.url_safety import is_safe_url + if not is_safe_url(redirect_url): + raise ValueError( + f"Blocked redirect to private/internal address: {redirect_url}" + ) + + last_error = None + for attempt in range(max_retries): + try: + blocked = check_website_access(video_url) + if blocked: + raise PermissionError(blocked["message"]) + + async with httpx.AsyncClient( + timeout=60.0, + follow_redirects=True, + event_hooks={"response": [_ssrf_redirect_guard]}, + ) as client: + response = await client.get( + video_url, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "video/*,*/*;q=0.8", + }, + ) + response.raise_for_status() + + cl = response.headers.get("content-length") + if cl and int(cl) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large ({int(cl)} bytes, max {_MAX_VIDEO_BASE64_BYTES})" + ) + + final_url = str(response.url) + blocked = check_website_access(final_url) + if blocked: + raise PermissionError(blocked["message"]) + + body = response.content + if len(body) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large ({len(body)} bytes, max {_MAX_VIDEO_BASE64_BYTES})" + ) + destination.write_bytes(body) + + return destination + except Exception as e: + last_error = e + if attempt < max_retries - 1: + wait_time = 2 ** (attempt + 1) + logger.warning("Video download failed (attempt %s/%s): %s", attempt + 1, max_retries, str(e)[:50]) + await asyncio.sleep(wait_time) + else: + logger.error( + "Video download failed after %s attempts: %s", + max_retries, str(e)[:100], exc_info=True, + ) + + if last_error is None: + raise RuntimeError( + f"_download_video exited retry loop without attempting (max_retries={max_retries})" + ) + raise last_error + + +async def video_analyze_tool( + video_url: str, + user_prompt: str, + model: str = None, +) -> str: + """Analyze a video via multimodal LLM. Returns JSON {success, analysis}.""" + debug_call_data = { + "parameters": { + "video_url": video_url, + "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt, + "model": model, + }, + "error": None, + "success": False, + "analysis_length": 0, + "model_used": model, + "video_size_bytes": 0, + } + + temp_video_path = None + should_cleanup = True + + try: + from tools.interrupt import is_interrupted + if is_interrupted(): + return tool_error("Interrupted", success=False) + + logger.info("Analyzing video: %s", video_url[:60]) + logger.info("User prompt: %s", user_prompt[:100]) + + # Resolve local path vs remote URL + resolved_url = video_url + if resolved_url.startswith("file://"): + resolved_url = resolved_url[len("file://"):] + local_path = Path(os.path.expanduser(resolved_url)) + + if local_path.is_file(): + logger.info("Using local video file: %s", video_url) + temp_video_path = local_path + should_cleanup = False + elif _validate_image_url(video_url): + blocked = check_website_access(video_url) + if blocked: + raise PermissionError(blocked["message"]) + temp_dir = get_hermes_dir("cache/video", "temp_video_files") + temp_video_path = temp_dir / f"temp_video_{uuid.uuid4()}.mp4" + await _download_video(video_url, temp_video_path) + should_cleanup = True + else: + raise ValueError( + "Invalid video source. Provide an HTTP/HTTPS URL or a valid local file path." + ) + + video_size_bytes = temp_video_path.stat().st_size + video_size_mb = video_size_bytes / (1024 * 1024) + logger.info("Video ready (%.1f MB)", video_size_mb) + + detected_mime = _detect_video_mime_type(temp_video_path) + if not detected_mime: + raise ValueError( + f"Unsupported video format: '{temp_video_path.suffix}'. " + f"Supported: {', '.join(sorted(_VIDEO_MIME_TYPES.keys()))}" + ) + + if video_size_bytes > _VIDEO_SIZE_WARN_BYTES: + logger.warning("Video is %.1f MB — may be slow or rejected", video_size_mb) + + video_data_url = _video_to_base64_data_url(temp_video_path, mime_type=detected_mime) + data_size_mb = len(video_data_url) / (1024 * 1024) + + if len(video_data_url) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large for API: base64 payload is {data_size_mb:.1f} MB " + f"(limit {_MAX_VIDEO_BASE64_BYTES / (1024 * 1024):.0f} MB). " + f"Compress or trim the video and retry." + ) + + debug_call_data["video_size_bytes"] = video_size_bytes + + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": user_prompt, + }, + { + "type": "video_url", + "video_url": { + "url": video_data_url, + }, + }, + ], + } + ] + + vision_timeout = 180.0 + vision_temperature = 0.1 + try: + from hermes_cli.config import cfg_get, load_config + _cfg = load_config() + _vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={}) + _vt = _vision_cfg.get("timeout") + if _vt is not None: + vision_timeout = max(float(_vt), 180.0) + _vtemp = _vision_cfg.get("temperature") + if _vtemp is not None: + vision_temperature = float(_vtemp) + except Exception: + pass + + call_kwargs = { + "task": "vision", + "messages": messages, + "temperature": vision_temperature, + "max_tokens": 4000, + "timeout": vision_timeout, + } + if model: + call_kwargs["model"] = model + + response = await async_call_llm(**call_kwargs) + analysis = extract_content_or_reasoning(response) + + if not analysis: + logger.warning("Empty video response, retrying once") + response = await async_call_llm(**call_kwargs) + analysis = extract_content_or_reasoning(response) + + analysis_length = len(analysis) if analysis else 0 + logger.info("Video analysis completed (%s characters)", analysis_length) + + result = { + "success": True, + "analysis": analysis or "There was a problem with the request and the video could not be analyzed.", + } + + debug_call_data["success"] = True + debug_call_data["analysis_length"] = analysis_length + _debug.log_call("video_analyze_tool", debug_call_data) + _debug.save() + + return json.dumps(result, indent=2, ensure_ascii=False) + + except Exception as e: + error_msg = f"Error analyzing video: {str(e)}" + logger.error("%s", error_msg, exc_info=True) + + err_str = str(e).lower() + if any(hint in err_str for hint in ( + "402", "insufficient", "payment required", "credits", "billing", + )): + analysis = ( + "Insufficient credits or payment required. Please top up your " + f"API provider account and try again. Error: {e}" + ) + elif any(hint in err_str for hint in ( + "does not support", "not support video", + "content_policy", "multimodal", + "unrecognized request argument", "video input", + "video_url", + )): + analysis = ( + f"The model does not support video analysis or the request was " + f"rejected. Ensure you're using a video-capable model " + f"(e.g. google/gemini-2.5-flash). Error: {e}" + ) + elif any(hint in err_str for hint in ( + "too large", "payload", "413", "content_too_large", + "request_too_large", "exceeds", "size limit", + )): + analysis = ( + "The video is too large for the API. Try compressing or trimming " + f"the video (max ~50 MB). Error: {e}" + ) + else: + analysis = ( + "There was a problem with the request and the video could not " + f"be analyzed. Error: {e}" + ) + + result = { + "success": False, + "error": error_msg, + "analysis": analysis, + } + + debug_call_data["error"] = error_msg + _debug.log_call("video_analyze_tool", debug_call_data) + _debug.save() + + return json.dumps(result, indent=2, ensure_ascii=False) + + finally: + if should_cleanup and temp_video_path and temp_video_path.exists(): + try: + temp_video_path.unlink() + logger.debug("Cleaned up temporary video file") + except Exception as cleanup_error: + logger.warning( + "Could not delete temporary file: %s", cleanup_error, exc_info=True + ) + + +VIDEO_ANALYZE_SCHEMA = { + "name": "video_analyze", + "description": ( + "Analyze a video from a URL or local file path using a multimodal AI model. " + "Sends the video to a video-capable model (e.g. Gemini) for understanding. " + "Use this for video files — for images, use vision_analyze instead. " + "Supports mp4, webm, mov, avi, mkv, mpeg formats. " + "Note: large videos (>20 MB) may be slow; max ~50 MB." + ), + "parameters": { + "type": "object", + "properties": { + "video_url": { + "type": "string", + "description": "Video URL (http/https) or local file path to analyze.", + }, + "question": { + "type": "string", + "description": "Your specific question about the video. The AI will describe what happens in the video and answer your question.", + }, + }, + "required": ["video_url", "question"], + }, +} + + +def _handle_video_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]: + video_url = args.get("video_url", "") + question = args.get("question", "") + full_prompt = ( + "Fully describe and explain everything happening in this video, " + "including visual content, motion, audio cues, text overlays, and scene " + f"transitions. Then answer the following question:\n\n{question}" + ) + model = os.getenv("AUXILIARY_VIDEO_MODEL", "").strip() or os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None + return video_analyze_tool(video_url, full_prompt, model) + + +registry.register( + name="video_analyze", + toolset="video", + schema=VIDEO_ANALYZE_SCHEMA, + handler=_handle_video_analyze, + check_fn=check_vision_requirements, + is_async=True, + emoji="🎬", +) diff --git a/toolsets.py b/toolsets.py index 57e226d3c08..2a77f615cef 100644 --- a/toolsets.py +++ b/toolsets.py @@ -89,6 +89,12 @@ TOOLSETS = { "tools": ["vision_analyze"], "includes": [] }, + + "video": { + "description": "Video analysis and understanding tools (opt-in, not in default toolset)", + "tools": ["video_analyze"], + "includes": [] + }, "image_gen": { "description": "Creative generation tools (images)", From b8ae8cc801df3bb440d86b826795fcbceffa9372 Mon Sep 17 00:00:00 2001 From: GodsBoy Date: Sun, 3 May 2026 19:58:44 +0200 Subject: [PATCH 002/171] fix(debug): redact log content at upload time in hermes debug share Apply agent.redact.redact_sensitive_text with force=True to log content captured by _capture_log_snapshot before it reaches upload_to_pastebin. On-disk logs are untouched. Compatible with the off-by-default local redaction policy from #16794: this is upload-time-only and applies regardless of security.redact_secrets because the public paste service is the leak surface. A visible banner is prepended to each uploaded log paste so reviewers know redaction was applied. --no-redact preserves deliberate unredacted sharing for maintainer-coordinated cases. The bug-report, setup-help, and feature-request issue templates direct users to run hermes debug share and paste the resulting public URLs. With redaction off by default per #16794, those uploads have been carrying credentials onto paste.rs and dpaste.com. force=True is non-negotiable: without it, redact_sensitive_text short-circuits at agent/redact.py:322 when the env var is unset, so the fix would silently be a no-op for its target audience. A regression test pins this down. Fixes #19316 --- hermes_cli/debug.py | 89 ++++++++++++-- hermes_cli/main.py | 11 ++ scripts/release.py | 2 + tests/hermes_cli/test_debug.py | 213 +++++++++++++++++++++++++++++++++ 4 files changed, 308 insertions(+), 7 deletions(-) diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py index 06be05a3551..a7338e4ba82 100644 --- a/hermes_cli/debug.py +++ b/hermes_cli/debug.py @@ -1,12 +1,19 @@ -"""``hermes debug`` — debug tools for Hermes Agent. +"""``hermes debug`` debug tools for Hermes Agent. Currently supports: hermes debug share Upload debug report (system info + logs) to a paste service and print a shareable URL. + By default, log content is run through + ``agent.redact.redact_sensitive_text`` with + ``force=True`` before upload so credentials in + ``~/.hermes/logs/*.log`` are not leaked into + the public paste service. Pass ``--no-redact`` + to disable. """ import io import json +import logging import sys import time import urllib.error @@ -19,6 +26,16 @@ from typing import Optional from hermes_constants import get_hermes_home from utils import atomic_replace +logger = logging.getLogger(__name__) + +# Banner prepended to upload-bound log content when redaction is enabled. +# Visible in the public paste so reviewers know the content was sanitized. +# Kept short; the trailing newline guarantees the banner sits on its own line. +_REDACTION_BANNER = ( + "[hermes debug share: log content redacted at upload time. " + "run with --no-redact to disable]\n" +) + # --------------------------------------------------------------------------- # Paste services — try paste.rs first, dpaste.com as fallback. @@ -368,17 +385,40 @@ def _resolve_log_path(log_name: str) -> Optional[Path]: return None +def _redact_log_text(text: str) -> str: + """Run ``redact_sensitive_text`` with ``force=True`` over upload-bound text. + + Uses ``force=True`` so redaction fires regardless of the operator's + ``security.redact_secrets`` setting. The local on-disk log file is + not modified; only the in-memory copy headed for the public paste + service is sanitized. Returns the redacted text (or the original + when empty / non-string). + """ + if not text: + return text + from agent.redact import redact_sensitive_text + + return redact_sensitive_text(text, force=True) + + def _capture_log_snapshot( log_name: str, *, tail_lines: int, max_bytes: int = _MAX_LOG_BYTES, + redact: bool = True, ) -> LogSnapshot: """Capture a log once and derive summary/full-log views from it. The report tail and standalone log upload must come from the same file snapshot. Otherwise a rotation/truncate between reads can make the report look newer than the uploaded ``agent.log`` paste. + + When ``redact`` is True (the default), both ``tail_text`` and + ``full_text`` are run through ``_redact_log_text`` so the snapshot + returned is upload-safe. The on-disk log file is never modified. + Pass ``redact=False`` to capture original log content (used by + ``hermes debug share --no-redact``). """ log_path = _resolve_log_path(log_name) if log_path is None: @@ -438,18 +478,34 @@ def _capture_log_snapshot( if truncated: full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}" + if redact: + tail_text = _redact_log_text(tail_text) + full_text = _redact_log_text(full_text) + return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text) except Exception as exc: return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None) -def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]: - """Capture all logs used by debug-share exactly once.""" +def _capture_default_log_snapshots( + log_lines: int, *, redact: bool = True +) -> dict[str, LogSnapshot]: + """Capture all logs used by debug-share exactly once. + + ``redact`` is forwarded to each ``_capture_log_snapshot`` call so all + captured logs share the same redaction policy for a given run. + """ errors_lines = min(log_lines, 100) return { - "agent": _capture_log_snapshot("agent", tail_lines=log_lines), - "errors": _capture_log_snapshot("errors", tail_lines=errors_lines), - "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines), + "agent": _capture_log_snapshot( + "agent", tail_lines=log_lines, redact=redact + ), + "errors": _capture_log_snapshot( + "errors", tail_lines=errors_lines, redact=redact + ), + "gateway": _capture_log_snapshot( + "gateway", tail_lines=errors_lines, redact=redact + ), } @@ -532,6 +588,7 @@ def run_debug_share(args): log_lines = getattr(args, "lines", 200) expiry = getattr(args, "expire", 7) local_only = getattr(args, "local", False) + redact = not getattr(args, "no_redact", False) if not local_only: print(_PRIVACY_NOTICE) @@ -539,8 +596,16 @@ def run_debug_share(args): print("Collecting debug report...") # Capture dump once — prepended to every paste for context. + # The dump is already redacted at extract time via dump.py:_redact; + # log_snapshots are redacted by _capture_default_log_snapshots when + # redact=True so credentials never reach the public paste service. dump_text = _capture_dump() - log_snapshots = _capture_default_log_snapshots(log_lines) + log_snapshots = _capture_default_log_snapshots(log_lines, redact=redact) + + if redact: + logger.info( + "hermes debug share: applied force-mode redaction to log snapshots before upload" + ) report = collect_debug_report( log_lines=log_lines, @@ -556,6 +621,15 @@ def run_debug_share(args): if gateway_log: gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log + # Visible banner so reviewers reading the public paste know redaction + # was applied at upload time. Banner is omitted under --no-redact. + if redact: + report = _REDACTION_BANNER + report + if agent_log: + agent_log = _REDACTION_BANNER + agent_log + if gateway_log: + gateway_log = _REDACTION_BANNER + gateway_log + if local_only: print(report) if agent_log: @@ -666,6 +740,7 @@ def run_debug(args): print(" --lines N Number of log lines to include (default: 200)") print(" --expire N Paste expiry in days (default: 7)") print(" --local Print report locally instead of uploading") + print(" --no-redact Disable upload-time secret redaction (default: redact)") print() print("Options (delete):") print(" ... One or more paste URLs to delete") diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ed8c24c8fa7..d80e31f6901 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -8891,6 +8891,7 @@ Examples: hermes debug share --lines 500 Include more log lines hermes debug share --expire 30 Keep paste for 30 days hermes debug share --local Print report locally (no upload) + hermes debug share --no-redact Disable upload-time secret redaction hermes debug delete Delete a previously uploaded paste """, ) @@ -8916,6 +8917,16 @@ Examples: action="store_true", help="Print the report locally instead of uploading", ) + share_parser.add_argument( + "--no-redact", + action="store_true", + help=( + "Disable upload-time secret redaction (default: redact). Logs " + "are normally run through agent.redact.redact_sensitive_text " + "with force=True before upload so credentials are not leaked " + "into the public paste service." + ), + ) delete_parser = debug_sub.add_parser( "delete", help="Delete a paste uploaded by 'hermes debug share'", diff --git a/scripts/release.py b/scripts/release.py index a752ffb98eb..c1988049d46 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -679,6 +679,8 @@ AUTHOR_MAP = { "ztzheng@163.com": "chengoak", # PR #17467 "24110240104@m.fudan.edu.cn": "YuShu", # co-author only "charliekerfoot@gmail.com": "CharlieKerfoot", # PR #18951 + # Debug share upload-time redaction (May 2026) + "dhuysamen@gmail.com": "GodsBoy", # PR #19318 } diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py index 4bba56867e2..b83023a76a4 100644 --- a/tests/hermes_cli/test_debug.py +++ b/tests/hermes_cli/test_debug.py @@ -273,6 +273,101 @@ class TestCaptureLogSnapshot: assert "rotated agent data" in snap.full_text +# --------------------------------------------------------------------------- +# Capture log redaction (force=True applies regardless of HERMES_REDACT_SECRETS) +# --------------------------------------------------------------------------- + +# A vendor-prefixed token used across redaction tests. Long enough to clear +# the redactor's `floor` parameter so it actually masks rather than fully blanks. +_REDACT_FIXTURE_TOKEN = "sk-proj-A1B2C3D4E5F6G7H8I9J0aA" + + +class TestCaptureLogSnapshotRedaction: + """Pin upload-time redaction at the _capture_log_snapshot boundary.""" + + @pytest.fixture + def hermes_home_with_secret(self, tmp_path, monkeypatch): + """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + # Critical: ensure the user has NOT opted in to redaction. The whole + # point of this PR is that share-time redaction works for users who + # never set this env var. + monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + + logs_dir = home / "logs" + logs_dir.mkdir() + (logs_dir / "agent.log").write_text( + f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n" + ) + (logs_dir / "errors.log").write_text("") + (logs_dir / "gateway.log").write_text("") + return home + + def test_default_redacts_tail_and_full_text(self, hermes_home_with_secret): + from hermes_cli.debug import _capture_log_snapshot + + snap = _capture_log_snapshot("agent", tail_lines=10) + + # Both views the upload uses must be sanitized. + assert _REDACT_FIXTURE_TOKEN not in snap.tail_text + assert snap.full_text is not None + assert _REDACT_FIXTURE_TOKEN not in snap.full_text + + def test_redact_false_passes_through(self, hermes_home_with_secret): + from hermes_cli.debug import _capture_log_snapshot + + snap = _capture_log_snapshot("agent", tail_lines=10, redact=False) + + # Original token survives when the caller opts out. + assert _REDACT_FIXTURE_TOKEN in snap.tail_text + assert _REDACT_FIXTURE_TOKEN in (snap.full_text or "") + + def test_force_true_overrides_unset_env_var(self, hermes_home_with_secret): + """Regression test: redact_sensitive_text short-circuits without force=True. + + If a future refactor drops `force=True` from `_redact_log_text`, this + test fails immediately. Without `force=True`, the redactor returns the + input unchanged when HERMES_REDACT_SECRETS is unset, and the feature + ships silently broken for its target audience. + """ + import os + + from hermes_cli.debug import _capture_log_snapshot + + # Belt-and-suspenders: confirm the env var is genuinely unset for this + # test so we know we're exercising the force=True path. + assert os.environ.get("HERMES_REDACT_SECRETS", "") == "" + + snap = _capture_log_snapshot("agent", tail_lines=10) + + assert _REDACT_FIXTURE_TOKEN not in snap.tail_text + assert snap.full_text is not None + assert _REDACT_FIXTURE_TOKEN not in snap.full_text + + def test_capture_default_log_snapshots_threads_redact( + self, hermes_home_with_secret + ): + from hermes_cli.debug import _capture_default_log_snapshots + + snaps = _capture_default_log_snapshots(50) + + # Default threads redact=True to all three captured logs. + assert _REDACT_FIXTURE_TOKEN not in snaps["agent"].tail_text + assert _REDACT_FIXTURE_TOKEN not in (snaps["agent"].full_text or "") + + def test_capture_default_log_snapshots_no_redact_passes_through( + self, hermes_home_with_secret + ): + from hermes_cli.debug import _capture_default_log_snapshots + + snaps = _capture_default_log_snapshots(50, redact=False) + + assert _REDACT_FIXTURE_TOKEN in snaps["agent"].tail_text + assert _REDACT_FIXTURE_TOKEN in (snaps["agent"].full_text or "") + + # --------------------------------------------------------------------------- # Debug report collection # --------------------------------------------------------------------------- @@ -556,6 +651,124 @@ class TestRunDebugShare: assert "all failed" in out.err +# --------------------------------------------------------------------------- +# Share-time redaction wiring + visible banner +# --------------------------------------------------------------------------- + +class TestRunDebugShareRedaction: + """End-to-end: --no-redact flag, banner injection, default behavior.""" + + @pytest.fixture + def hermes_home_with_secret(self, tmp_path, monkeypatch): + """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + + logs_dir = home / "logs" + logs_dir.mkdir() + (logs_dir / "agent.log").write_text( + f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n" + ) + (logs_dir / "errors.log").write_text("") + (logs_dir / "gateway.log").write_text( + f"2026-04-12 17:00:01 INFO gateway.run: token {_REDACT_FIXTURE_TOKEN}\n" + ) + return home + + def test_default_share_redacts_uploaded_content( + self, hermes_home_with_secret, capsys + ): + """The uploaded report and full-log pastes do not contain the raw token.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = False + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + # At least the report plus one full log paste reached the upload path. + assert len(captured) >= 2 + for content in captured: + assert _REDACT_FIXTURE_TOKEN not in content, ( + "raw token leaked into upload-bound content" + ) + + def test_default_share_includes_redaction_banner( + self, hermes_home_with_secret, capsys + ): + """Each upload-bound paste carries the visible redaction banner.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = False + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + for content in captured: + assert "redacted at upload time" in content, ( + "redaction banner missing from upload-bound content" + ) + + def test_no_redact_flag_disables_redaction_and_banner( + self, hermes_home_with_secret, capsys + ): + """--no-redact preserves original log content and omits the banner.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = True + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + # The agent.log paste should now contain the raw token. + assert any(_REDACT_FIXTURE_TOKEN in c for c in captured), ( + "expected raw token in --no-redact upload" + ) + # No banner anywhere when redaction is disabled. + for content in captured: + assert "redacted at upload time" not in content, ( + "banner present with --no-redact" + ) + + # --------------------------------------------------------------------------- # run_debug router # --------------------------------------------------------------------------- From 9eaddfafa30018b1d4eb3e5e72bbe2d242f8e50e Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 00:14:36 +0530 Subject: [PATCH 003/171] fix(cli): CLI/TUI on local backend always uses launch directory, ignores terminal.cwd (#19242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLI/TUI sessions on the local backend now unconditionally use os.getcwd() as the working directory. The terminal.cwd config value is only consumed by gateway/cron/delegation modes (where there's no shell to cd from). Previously, 'hermes setup' would write an absolute path (e.g. $HOME) into terminal.cwd which then pinned the CLI to that directory regardless of where the user launched hermes from. This was a silent foot-gun — the user's 'cd' was being ignored. Changes: 1. cli.py: Restructured CWD resolution — if TERMINAL_CWD is not already set by the gateway, and the backend is local, always use os.getcwd(). Config terminal.cwd is irrelevant for interactive CLI/TUI sessions. 2. setup.py: Moved the cwd prompt from setup_terminal_backend() to setup_gateway(). It now only appears when configuring messaging platforms and is labeled 'Gateway working directory'. 3. Tests: Rewrote test_cwd_env_respect.py to validate the new behavior: explicit config paths are ignored for CLI, gateway pre-set values are preserved, non-local backends keep their config paths. 4. Docs: Updated configuration.md, profiles.md, and environment-variables.md to clarify that terminal.cwd only affects gateway/cron mode on local backend. Closes #19214 --- cli.py | 48 ++++---- hermes_cli/setup.py | 27 +++-- tests/cli/test_cwd_env_respect.py | 106 +++++++++++++----- .../docs/reference/environment-variables.md | 2 +- website/docs/user-guide/configuration.md | 2 +- website/docs/user-guide/profiles.md | 9 +- 6 files changed, 122 insertions(+), 72 deletions(-) diff --git a/cli.py b/cli.py index da917ae1906..ef745ae67bf 100644 --- a/cli.py +++ b/cli.py @@ -459,32 +459,30 @@ def load_cli_config() -> Dict[str, Any]: if "backend" in terminal_config: terminal_config["env_type"] = terminal_config["backend"] - # Handle special cwd values: "." or "auto" means use current working directory. - # Only resolve to the host's CWD for the local backend where the host - # filesystem is directly accessible. For ALL remote/container backends - # (ssh, docker, modal, singularity), the host path doesn't exist on the - # target -- remove the key so terminal_tool.py uses its per-backend default. - # - # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the - # gateway's config bridge earlier in the process), don't clobber it. - # This prevents a lazy import of cli.py during gateway runtime from - # rewriting TERMINAL_CWD to the service's working directory. - # See issue #10817. + # CWD resolution: CLI/TUI on local backend always uses os.getcwd(); + # gateway/cron uses terminal.cwd from config. Detection: gateway's config + # bridge (gateway/run.py) sets TERMINAL_CWD before this runs. + # See #19214, #4672, #10225, #10817. _CWD_PLACEHOLDERS = (".", "auto", "cwd") - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = os.environ.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - # Gateway (or earlier startup) already resolved a real path — keep it - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - # Remove so TERMINAL_CWD stays unset → tool picks backend default - terminal_config.pop("cwd", None) + _existing_cwd = os.environ.get("TERMINAL_CWD", "") + _is_gateway_import = ( + _existing_cwd + and _existing_cwd not in _CWD_PLACEHOLDERS + and os.path.isabs(_existing_cwd) + ) + effective_backend = terminal_config.get("env_type", "local") + + if _is_gateway_import: + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd + elif effective_backend == "local": + # CLI/TUI: user's `cd` is the config — ignore terminal.cwd. + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + # Non-local backend with placeholder — let terminal_tool use its default. + terminal_config.pop("cwd", None) + # else: non-local backend with explicit path — keep as-is env_mappings = { "env_type": "TERMINAL_ENV", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 31cb8460122..88c297c1b45 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1327,18 +1327,7 @@ def setup_terminal_backend(config: dict): if selected_backend == "local": print_success("Terminal backend: Local") print_info("Commands run directly on this machine.") - - # CWD for messaging - print() - print_info("Working directory for messaging sessions:") - print_info(" When using Hermes via Telegram/Discord, this is where") - print_info( - " the agent starts. CLI mode always starts in the current directory." - ) - current_cwd = cfg_get(config, "terminal", "cwd", default="") - cwd = prompt(" Messaging working directory", current_cwd or str(Path.home())) - if cwd: - config["terminal"]["cwd"] = cwd + print_info(" CLI/TUI always uses your launch directory (wherever you run 'hermes').") # Sudo support print() @@ -2390,6 +2379,20 @@ def setup_gateway(config: dict): print_info("━" * 50) print_success("Messaging platforms configured!") + # Gateway working directory — where the agent starts when you chat + # via Telegram/Discord/etc. CLI/TUI ignores this (uses launch dir). + print() + print_info("Gateway working directory:") + print_info(" When using Hermes via messaging platforms, this is where") + print_info(" the agent's terminal commands start.") + print_info(" (CLI/TUI always uses wherever you launched 'hermes' from.)") + current_cwd = cfg_get(config, "terminal", "cwd", default="") + if current_cwd in (".", "auto", "cwd", ""): + current_cwd = "" + cwd = prompt(" Gateway working directory", current_cwd or str(Path.home())) + if cwd: + config.setdefault("terminal", {})["cwd"] = cwd + # Check if any home channels are missing missing_home = [] if get_env_value("TELEGRAM_BOT_TOKEN") and not get_env_value( diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py index e9f3341d2ae..7cf592db0c0 100644 --- a/tests/cli/test_cwd_env_respect.py +++ b/tests/cli/test_cwd_env_respect.py @@ -1,12 +1,12 @@ -"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering. +"""Tests that load_cli_config() CWD resolution works correctly. -When the gateway resolves TERMINAL_CWD at startup and cli.py is later -imported lazily (via delegate_tool → CLI_CONFIG), load_cli_config() must -not overwrite the already-resolved value with os.getcwd(). +The rule: +- CLI/TUI on local backend: ALWAYS use os.getcwd() (config ignored). +- Gateway (TERMINAL_CWD pre-set to absolute path): respect it. +- Non-local backends with placeholder: pop cwd for backend default. +- Non-local backends with explicit path: keep it. -config.yaml terminal.cwd is the canonical source of truth. -.env TERMINAL_CWD and MESSAGING_CWD are deprecated. -See issue #10817. +See issues #19214, #4672, #10225, #10817. """ import os @@ -20,21 +20,29 @@ _CWD_PLACEHOLDERS = (".", "auto", "cwd") def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict): """Simulate the CWD resolution logic from load_cli_config(). - This mirrors the code in cli.py that checks for a pre-resolved - TERMINAL_CWD before falling back to os.getcwd(). + This mirrors the code in cli.py that handles the CWD resolution + based on mode (CLI vs gateway) and backend type. """ - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = env.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - terminal_config.pop("cwd", None) + _existing_cwd = env.get("TERMINAL_CWD", "") + _is_gateway_import = ( + _existing_cwd + and _existing_cwd not in _CWD_PLACEHOLDERS + and os.path.isabs(_existing_cwd) + ) + effective_backend = terminal_config.get("env_type", "local") + + if _is_gateway_import: + # Gateway already resolved a real path — keep it. + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd + elif effective_backend == "local": + # CLI/TUI on local backend: always use launch directory. + terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + # Non-local backend with placeholder — pop for backend default. + terminal_config.pop("cwd", None) + # else: non-local backend with explicit path — keep as-is # Simulate the bridging loop: write terminal_config["cwd"] to env _file_has_terminal = defaults.get("_file_has_terminal", False) @@ -66,18 +74,36 @@ class TestLazyImportGuard: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "/home/user/workspace" + def test_gateway_resolved_cwd_survives_even_with_explicit_config(self): + """Gateway pre-set TERMINAL_CWD wins even when config has explicit path. -class TestConfigCwdResolution: - """config.yaml terminal.cwd is the canonical source of truth.""" + This is the key scenario: config.yaml has terminal.cwd: /home/user + (from hermes setup), but the gateway already resolved TERMINAL_CWD. + The gateway's value must win. + """ + env = {"TERMINAL_CWD": "/home/user/workspace"} + terminal_config = {"cwd": "/home/user", "env_type": "local"} + defaults = {"terminal": {"cwd": "/home/user"}, "_file_has_terminal": True} - def test_explicit_config_cwd_wins(self): - """terminal.cwd: /explicit/path always wins.""" - env = {"TERMINAL_CWD": "/old/gateway/value"} - terminal_config = {"cwd": "/explicit/path"} + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/home/user/workspace" + + +class TestCliAlwaysUsesGetcwd: + """CLI/TUI on local backend always uses os.getcwd(), ignoring config.""" + + def test_explicit_config_cwd_ignored_on_local_cli(self): + """terminal.cwd: /explicit/path is IGNORED for CLI on local backend. + + This is the #19214 fix — 'hermes setup' may have written an absolute + path, but CLI always uses os.getcwd() (the user's launch directory). + """ + env = {} # No pre-set TERMINAL_CWD = CLI mode + terminal_config = {"cwd": "/explicit/path", "env_type": "local"} defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True} result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/explicit/path" + assert result == "/fake/getcwd" # os.getcwd(), NOT /explicit/path def test_dot_cwd_resolves_to_getcwd_when_no_prior(self): """With no pre-set TERMINAL_CWD, "." resolves to os.getcwd().""" @@ -88,7 +114,20 @@ class TestConfigCwdResolution: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "/fake/getcwd" - def test_remote_backend_pops_cwd(self): + def test_home_dir_config_ignored_on_local_cli(self): + """terminal.cwd: ~ (home dir from setup) is ignored for CLI.""" + env = {} + terminal_config = {"cwd": "/home/daimon", "env_type": "local"} + defaults = {"terminal": {"cwd": "/home/daimon"}, "_file_has_terminal": True} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/fake/getcwd" + + +class TestNonLocalBackends: + """Non-local backends use config or per-backend defaults.""" + + def test_remote_backend_pops_placeholder_cwd(self): """Remote backend + placeholder cwd → popped for backend default.""" env = {} terminal_config = {"cwd": ".", "env_type": "docker"} @@ -97,6 +136,15 @@ class TestConfigCwdResolution: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "" # cwd popped, no env var set + def test_remote_backend_keeps_explicit_path(self): + """Remote backend + explicit path → kept (e.g. SSH cwd: /srv/app).""" + env = {} + terminal_config = {"cwd": "/srv/myproject", "env_type": "ssh"} + defaults = {"terminal": {"cwd": "/srv/myproject"}, "_file_has_terminal": True} + + result = _resolve_terminal_cwd(terminal_config, defaults, env) + assert result == "/srv/myproject" + def test_remote_backend_with_prior_cwd_preserves(self): """Remote backend + pre-resolved TERMINAL_CWD → adopted.""" env = {"TERMINAL_CWD": "/project"} diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 955f4600146..b0c7e73d447 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -184,7 +184,7 @@ These variables configure the [Tool Gateway](/docs/user-guide/features/tool-gate | `TERMINAL_VERCEL_RUNTIME` | Vercel Sandbox runtime (`node24`, `node22`, `python3.13`) | | `TERMINAL_TIMEOUT` | Command timeout in seconds | | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds | -| `TERMINAL_CWD` | Working directory for all terminal sessions | +| `TERMINAL_CWD` | Working directory for gateway/cron terminal sessions (CLI/TUI on local backend ignores this — always uses launch directory) | | `SUDO_PASSWORD` | Enable sudo without interactive prompt | For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 18c96b8b184..57d1f608684 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -88,7 +88,7 @@ Hermes supports seven terminal backends. Each determines where the agent's shell ```yaml terminal: backend: local # local | docker | ssh | modal | daytona | vercel_sandbox | singularity - cwd: "." # Working directory ("." = current dir for local, "/root" for containers) + cwd: "." # Gateway/cron working directory. CLI/TUI on local backend always uses your launch directory. timeout: 180 # Per-command timeout in seconds env_passthrough: [] # Env var names to forward to sandboxed execution (terminal + execute_code) singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Singularity backend diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md index 0dcc35db0a0..6527fc1167d 100644 --- a/website/docs/user-guide/profiles.md +++ b/website/docs/user-guide/profiles.md @@ -109,12 +109,12 @@ The CLI always shows which profile is active: Profiles are often confused with workspaces or sandboxes, but they are different things: - A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state. -- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`. +- A **workspace** or **working directory** is where terminal commands start. For CLI/TUI on local backend, this is always your launch directory. For gateway mode, it's controlled by `terminal.cwd` in config. - A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent. On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory. -If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`: +If you want a profile's **gateway** to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`: ```yaml terminal: @@ -122,13 +122,14 @@ terminal: cwd: /absolute/path/to/project ``` -Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory". +:::note +This only affects gateway/cron mode. If you run `hermes -p myprofile` from CLI, the agent uses your shell's current directory regardless of `terminal.cwd`. The `terminal.cwd` config is for headless modes (gateway, cron) where there's no shell to `cd` from. +::: Also note: - `SOUL.md` can guide the model, but it does not enforce a workspace boundary. - Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state. -- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly. ## Running gateways From 167b5648ea609aafa85f56c5714f7abda5091ed6 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 00:43:58 +0530 Subject: [PATCH 004/171] Revert "fix(cli): CLI/TUI on local backend always uses launch directory, ignores terminal.cwd (#19242)" (#19329) This reverts commit 9eaddfafa30018b1d4eb3e5e72bbe2d242f8e50e. --- cli.py | 48 ++++---- hermes_cli/setup.py | 27 ++--- tests/cli/test_cwd_env_respect.py | 106 +++++------------- .../docs/reference/environment-variables.md | 2 +- website/docs/user-guide/configuration.md | 2 +- website/docs/user-guide/profiles.md | 9 +- 6 files changed, 72 insertions(+), 122 deletions(-) diff --git a/cli.py b/cli.py index ef745ae67bf..da917ae1906 100644 --- a/cli.py +++ b/cli.py @@ -459,30 +459,32 @@ def load_cli_config() -> Dict[str, Any]: if "backend" in terminal_config: terminal_config["env_type"] = terminal_config["backend"] - # CWD resolution: CLI/TUI on local backend always uses os.getcwd(); - # gateway/cron uses terminal.cwd from config. Detection: gateway's config - # bridge (gateway/run.py) sets TERMINAL_CWD before this runs. - # See #19214, #4672, #10225, #10817. + # Handle special cwd values: "." or "auto" means use current working directory. + # Only resolve to the host's CWD for the local backend where the host + # filesystem is directly accessible. For ALL remote/container backends + # (ssh, docker, modal, singularity), the host path doesn't exist on the + # target -- remove the key so terminal_tool.py uses its per-backend default. + # + # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the + # gateway's config bridge earlier in the process), don't clobber it. + # This prevents a lazy import of cli.py during gateway runtime from + # rewriting TERMINAL_CWD to the service's working directory. + # See issue #10817. _CWD_PLACEHOLDERS = (".", "auto", "cwd") - _existing_cwd = os.environ.get("TERMINAL_CWD", "") - _is_gateway_import = ( - _existing_cwd - and _existing_cwd not in _CWD_PLACEHOLDERS - and os.path.isabs(_existing_cwd) - ) - effective_backend = terminal_config.get("env_type", "local") - - if _is_gateway_import: - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - elif effective_backend == "local": - # CLI/TUI: user's `cd` is the config — ignore terminal.cwd. - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - # Non-local backend with placeholder — let terminal_tool use its default. - terminal_config.pop("cwd", None) - # else: non-local backend with explicit path — keep as-is + if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + _existing_cwd = os.environ.get("TERMINAL_CWD", "") + if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): + # Gateway (or earlier startup) already resolved a real path — keep it + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd + else: + effective_backend = terminal_config.get("env_type", "local") + if effective_backend == "local": + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + else: + # Remove so TERMINAL_CWD stays unset → tool picks backend default + terminal_config.pop("cwd", None) env_mappings = { "env_type": "TERMINAL_ENV", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 88c297c1b45..31cb8460122 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1327,7 +1327,18 @@ def setup_terminal_backend(config: dict): if selected_backend == "local": print_success("Terminal backend: Local") print_info("Commands run directly on this machine.") - print_info(" CLI/TUI always uses your launch directory (wherever you run 'hermes').") + + # CWD for messaging + print() + print_info("Working directory for messaging sessions:") + print_info(" When using Hermes via Telegram/Discord, this is where") + print_info( + " the agent starts. CLI mode always starts in the current directory." + ) + current_cwd = cfg_get(config, "terminal", "cwd", default="") + cwd = prompt(" Messaging working directory", current_cwd or str(Path.home())) + if cwd: + config["terminal"]["cwd"] = cwd # Sudo support print() @@ -2379,20 +2390,6 @@ def setup_gateway(config: dict): print_info("━" * 50) print_success("Messaging platforms configured!") - # Gateway working directory — where the agent starts when you chat - # via Telegram/Discord/etc. CLI/TUI ignores this (uses launch dir). - print() - print_info("Gateway working directory:") - print_info(" When using Hermes via messaging platforms, this is where") - print_info(" the agent's terminal commands start.") - print_info(" (CLI/TUI always uses wherever you launched 'hermes' from.)") - current_cwd = cfg_get(config, "terminal", "cwd", default="") - if current_cwd in (".", "auto", "cwd", ""): - current_cwd = "" - cwd = prompt(" Gateway working directory", current_cwd or str(Path.home())) - if cwd: - config.setdefault("terminal", {})["cwd"] = cwd - # Check if any home channels are missing missing_home = [] if get_env_value("TELEGRAM_BOT_TOKEN") and not get_env_value( diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py index 7cf592db0c0..e9f3341d2ae 100644 --- a/tests/cli/test_cwd_env_respect.py +++ b/tests/cli/test_cwd_env_respect.py @@ -1,12 +1,12 @@ -"""Tests that load_cli_config() CWD resolution works correctly. +"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering. -The rule: -- CLI/TUI on local backend: ALWAYS use os.getcwd() (config ignored). -- Gateway (TERMINAL_CWD pre-set to absolute path): respect it. -- Non-local backends with placeholder: pop cwd for backend default. -- Non-local backends with explicit path: keep it. +When the gateway resolves TERMINAL_CWD at startup and cli.py is later +imported lazily (via delegate_tool → CLI_CONFIG), load_cli_config() must +not overwrite the already-resolved value with os.getcwd(). -See issues #19214, #4672, #10225, #10817. +config.yaml terminal.cwd is the canonical source of truth. +.env TERMINAL_CWD and MESSAGING_CWD are deprecated. +See issue #10817. """ import os @@ -20,29 +20,21 @@ _CWD_PLACEHOLDERS = (".", "auto", "cwd") def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict): """Simulate the CWD resolution logic from load_cli_config(). - This mirrors the code in cli.py that handles the CWD resolution - based on mode (CLI vs gateway) and backend type. + This mirrors the code in cli.py that checks for a pre-resolved + TERMINAL_CWD before falling back to os.getcwd(). """ - _existing_cwd = env.get("TERMINAL_CWD", "") - _is_gateway_import = ( - _existing_cwd - and _existing_cwd not in _CWD_PLACEHOLDERS - and os.path.isabs(_existing_cwd) - ) - effective_backend = terminal_config.get("env_type", "local") - - if _is_gateway_import: - # Gateway already resolved a real path — keep it. - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - elif effective_backend == "local": - # CLI/TUI on local backend: always use launch directory. - terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - # Non-local backend with placeholder — pop for backend default. - terminal_config.pop("cwd", None) - # else: non-local backend with explicit path — keep as-is + if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + _existing_cwd = env.get("TERMINAL_CWD", "") + if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): + terminal_config["cwd"] = _existing_cwd + defaults["terminal"]["cwd"] = _existing_cwd + else: + effective_backend = terminal_config.get("env_type", "local") + if effective_backend == "local": + terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + else: + terminal_config.pop("cwd", None) # Simulate the bridging loop: write terminal_config["cwd"] to env _file_has_terminal = defaults.get("_file_has_terminal", False) @@ -74,36 +66,18 @@ class TestLazyImportGuard: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "/home/user/workspace" - def test_gateway_resolved_cwd_survives_even_with_explicit_config(self): - """Gateway pre-set TERMINAL_CWD wins even when config has explicit path. - This is the key scenario: config.yaml has terminal.cwd: /home/user - (from hermes setup), but the gateway already resolved TERMINAL_CWD. - The gateway's value must win. - """ - env = {"TERMINAL_CWD": "/home/user/workspace"} - terminal_config = {"cwd": "/home/user", "env_type": "local"} - defaults = {"terminal": {"cwd": "/home/user"}, "_file_has_terminal": True} +class TestConfigCwdResolution: + """config.yaml terminal.cwd is the canonical source of truth.""" - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/home/user/workspace" - - -class TestCliAlwaysUsesGetcwd: - """CLI/TUI on local backend always uses os.getcwd(), ignoring config.""" - - def test_explicit_config_cwd_ignored_on_local_cli(self): - """terminal.cwd: /explicit/path is IGNORED for CLI on local backend. - - This is the #19214 fix — 'hermes setup' may have written an absolute - path, but CLI always uses os.getcwd() (the user's launch directory). - """ - env = {} # No pre-set TERMINAL_CWD = CLI mode - terminal_config = {"cwd": "/explicit/path", "env_type": "local"} + def test_explicit_config_cwd_wins(self): + """terminal.cwd: /explicit/path always wins.""" + env = {"TERMINAL_CWD": "/old/gateway/value"} + terminal_config = {"cwd": "/explicit/path"} defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True} result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/fake/getcwd" # os.getcwd(), NOT /explicit/path + assert result == "/explicit/path" def test_dot_cwd_resolves_to_getcwd_when_no_prior(self): """With no pre-set TERMINAL_CWD, "." resolves to os.getcwd().""" @@ -114,20 +88,7 @@ class TestCliAlwaysUsesGetcwd: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "/fake/getcwd" - def test_home_dir_config_ignored_on_local_cli(self): - """terminal.cwd: ~ (home dir from setup) is ignored for CLI.""" - env = {} - terminal_config = {"cwd": "/home/daimon", "env_type": "local"} - defaults = {"terminal": {"cwd": "/home/daimon"}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/fake/getcwd" - - -class TestNonLocalBackends: - """Non-local backends use config or per-backend defaults.""" - - def test_remote_backend_pops_placeholder_cwd(self): + def test_remote_backend_pops_cwd(self): """Remote backend + placeholder cwd → popped for backend default.""" env = {} terminal_config = {"cwd": ".", "env_type": "docker"} @@ -136,15 +97,6 @@ class TestNonLocalBackends: result = _resolve_terminal_cwd(terminal_config, defaults, env) assert result == "" # cwd popped, no env var set - def test_remote_backend_keeps_explicit_path(self): - """Remote backend + explicit path → kept (e.g. SSH cwd: /srv/app).""" - env = {} - terminal_config = {"cwd": "/srv/myproject", "env_type": "ssh"} - defaults = {"terminal": {"cwd": "/srv/myproject"}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/srv/myproject" - def test_remote_backend_with_prior_cwd_preserves(self): """Remote backend + pre-resolved TERMINAL_CWD → adopted.""" env = {"TERMINAL_CWD": "/project"} diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index b0c7e73d447..955f4600146 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -184,7 +184,7 @@ These variables configure the [Tool Gateway](/docs/user-guide/features/tool-gate | `TERMINAL_VERCEL_RUNTIME` | Vercel Sandbox runtime (`node24`, `node22`, `python3.13`) | | `TERMINAL_TIMEOUT` | Command timeout in seconds | | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds | -| `TERMINAL_CWD` | Working directory for gateway/cron terminal sessions (CLI/TUI on local backend ignores this — always uses launch directory) | +| `TERMINAL_CWD` | Working directory for all terminal sessions | | `SUDO_PASSWORD` | Enable sudo without interactive prompt | For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 57d1f608684..18c96b8b184 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -88,7 +88,7 @@ Hermes supports seven terminal backends. Each determines where the agent's shell ```yaml terminal: backend: local # local | docker | ssh | modal | daytona | vercel_sandbox | singularity - cwd: "." # Gateway/cron working directory. CLI/TUI on local backend always uses your launch directory. + cwd: "." # Working directory ("." = current dir for local, "/root" for containers) timeout: 180 # Per-command timeout in seconds env_passthrough: [] # Env var names to forward to sandboxed execution (terminal + execute_code) singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Singularity backend diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md index 6527fc1167d..0dcc35db0a0 100644 --- a/website/docs/user-guide/profiles.md +++ b/website/docs/user-guide/profiles.md @@ -109,12 +109,12 @@ The CLI always shows which profile is active: Profiles are often confused with workspaces or sandboxes, but they are different things: - A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state. -- A **workspace** or **working directory** is where terminal commands start. For CLI/TUI on local backend, this is always your launch directory. For gateway mode, it's controlled by `terminal.cwd` in config. +- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`. - A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent. On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory. -If you want a profile's **gateway** to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`: +If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`: ```yaml terminal: @@ -122,14 +122,13 @@ terminal: cwd: /absolute/path/to/project ``` -:::note -This only affects gateway/cron mode. If you run `hermes -p myprofile` from CLI, the agent uses your shell's current directory regardless of `terminal.cwd`. The `terminal.cwd` config is for headless modes (gateway, cron) where there's no shell to `cd` from. -::: +Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory". Also note: - `SOUL.md` can guide the model, but it does not enforce a workspace boundary. - Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state. +- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly. ## Running gateways From 7e780f4832ed8c34a23dd292b522df3e9705bd0a Mon Sep 17 00:00:00 2001 From: asheriif Date: Sun, 3 May 2026 14:50:00 +0000 Subject: [PATCH 005/171] fix(tui): run plugin slash commands live --- tests/tui_gateway/test_protocol.py | 30 ++++++++++++++++++++++++++++++ tui_gateway/server.py | 24 +++++++++++++++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 2e54bb93eac..96df9823a61 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -391,6 +391,36 @@ def test_slash_exec_rejects_skill_commands(server): assert "skill command" in resp["error"]["message"] +def test_slash_exec_handles_plugin_commands_in_live_gateway(server): + """Plugin slash commands return normal slash.exec output without using the worker.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + lambda name: (lambda arg: f"plugin:{arg}") if name == "plugin-cmd" else None, + ): + resp = server.handle_request({ + "id": "r-plugin-slash", + "method": "slash.exec", + "params": {"command": "plugin-cmd hello", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "plugin:hello"} + assert worker.calls == [] + + @pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"]) def test_slash_exec_rejects_pending_input_commands(server, cmd): """slash.exec must reject commands that use _pending_input in the CLI.""" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index fe66d3798df..c59d358d748 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -5165,9 +5165,13 @@ def _(rid, params: dict) -> dict: return _err(rid, 4004, "empty command") # Skill slash commands and _pending_input commands must NOT go through the - # slash worker — see _PENDING_INPUT_COMMANDS definition above. - _cmd_parts = cmd.split() if not cmd.startswith("/") else cmd.lstrip("/").split() - _cmd_base = _cmd_parts[0] if _cmd_parts else "" + # slash worker — see _PENDING_INPUT_COMMANDS definition above. Plugin + # commands must also avoid the worker, but unlike skills/pending-input they + # still return normal slash.exec output so the TUI keeps the pager path. + _cmd_text = cmd.lstrip("/") if cmd.startswith("/") else cmd + _cmd_parts = _cmd_text.split(maxsplit=1) + _cmd_base = (_cmd_parts[0] if _cmd_parts else "").lower() + _cmd_arg = _cmd_parts[1] if len(_cmd_parts) > 1 else "" if _cmd_base in _PENDING_INPUT_COMMANDS: return _err( @@ -5185,6 +5189,20 @@ def _(rid, params: dict) -> dict: except Exception: pass + try: + from hermes_cli.plugins import ( + get_plugin_command_handler, + resolve_plugin_command_result, + ) + + if _cmd_base: + plugin_handler = get_plugin_command_handler(_cmd_base) + if plugin_handler: + result = resolve_plugin_command_result(plugin_handler(_cmd_arg)) + return _ok(rid, {"output": str(result or "(no output)")}) + except Exception as e: + return _err(rid, 4018, f"plugin command error: {e}") + worker = session.get("slash_worker") if not worker: try: From f5bd77b3e16d86e3cbd75a9d6bd719f28dd8dbb9 Mon Sep 17 00:00:00 2001 From: GodsBoy Date: Sun, 3 May 2026 22:33:11 +0200 Subject: [PATCH 006/171] fix(kanban): anchor board, workspaces, and worker logs at the shared Hermes root The Kanban board is documented as shared across all Hermes profiles, but `kanban_db_path()` and `workspaces_root()` resolved through `get_hermes_home()`, which returns the active profile's HERMES_HOME. When the dispatcher spawned a worker with `hermes -p --skills kanban-worker chat -q "work kanban task "`, the worker rewrote HERMES_HOME to the profile subdirectory before kanban_db.py imported, opening a profile-local `kanban.db` that did not contain the dispatcher's task. `kanban_show` and `kanban_complete` failed; the dispatcher's row stayed `running` and was retried/crashed. The same defect applied to `_default_spawn`'s log directory and `worker_log_path`, so `hermes kanban tail` did not see the worker's output. Add `kanban_home()` in `hermes_cli/kanban_db.py` that resolves through `HERMES_KANBAN_HOME` (explicit override) then `get_default_hermes_root()`, which already understands the `/profiles/` and Docker / custom HERMES_HOME shapes. Reroute `kanban_db_path`, `workspaces_root`, the `_default_spawn` log directory, `gc_worker_logs`, and `worker_log_path` through it. Profile-specific config, `.env`, memory, and sessions stay isolated as before; only the kanban surface is shared. Add a `TestSharedBoardPaths` regression class to `tests/hermes_cli/test_kanban_db.py` covering: default install, profile-worker convergence, Docker custom HERMES_HOME, Docker profile layout, explicit `HERMES_KANBAN_HOME` override, and a real SQLite round-trip across dispatcher and worker HERMES_HOME perspectives. The dispatcher/worker convergence tests fail on origin/main and pass after the fix. Update the `kanban.md` user-guide page and the misleading docstrings in `kanban_db.py` to describe the shared-root behavior. Fixes #19348 --- hermes_cli/kanban.py | 2 +- hermes_cli/kanban_db.py | 76 ++++++++++--- tests/hermes_cli/test_kanban_db.py | 171 +++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 19 deletions(-) diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index e23a4923f6d..46ec6c32ab4 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -366,7 +366,7 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu # --- log --- p_log = sub.add_parser( "log", - help="Print the worker log for a task (from $HERMES_HOME/kanban/logs/)", + help="Print the worker log for a task (from /kanban/logs/)", ) p_log.add_argument("task_id") p_log.add_argument("--tail", type=int, default=None, diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 1e8be214fb3..19311bcb697 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -1,8 +1,16 @@ """SQLite-backed Kanban board for multi-profile collaboration. -The board lives at ``$HERMES_HOME/kanban.db`` (profile-agnostic on purpose: -multiple profiles on the same machine all see the same board, which IS the -coordination primitive). +The board lives at ``/kanban.db`` where ```` is the **shared +Hermes root** (the parent of any active profile). Profiles intentionally +collapse onto a single board: it IS the cross-profile coordination +primitive. A worker spawned with ``hermes -p `` joins the same +board as the dispatcher that claimed the task. The same applies to +``/kanban/workspaces/`` and ``/kanban/logs/``. + +In standard installs ```` is ``~/.hermes``. In Docker / custom +deployments where ``HERMES_HOME`` points outside ``~/.hermes`` (e.g. +``/opt/hermes``), ```` is ``HERMES_HOME``. Set ``HERMES_KANBAN_HOME`` +to override the resolution explicitly (tests, unusual deployments). Schema is intentionally small: tasks, task_links, task_comments, task_events. The ``workspace_kind`` field decouples coordination from git @@ -61,16 +69,46 @@ _CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment # Paths # --------------------------------------------------------------------------- +def kanban_home() -> Path: + """Return the shared Hermes root that anchors the kanban board. + + Resolution order: + + 1. ``HERMES_KANBAN_HOME`` env var when set and non-empty (explicit + override for tests and unusual deployments). + 2. ``get_default_hermes_root()``, which already returns ```` + when ``HERMES_HOME`` is ``/profiles/``, and returns + ``HERMES_HOME`` directly for Docker / custom deployments. + + The kanban board is shared across profiles **by design** (see the + module docstring). Resolving the kanban paths through the active + profile's ``HERMES_HOME`` would silently fork the board per profile, + which breaks the dispatcher / worker handoff. + """ + override = os.environ.get("HERMES_KANBAN_HOME", "").strip() + if override: + return Path(override) + from hermes_constants import get_default_hermes_root + return get_default_hermes_root() + + def kanban_db_path() -> Path: - """Return the path to ``kanban.db`` inside the active HERMES_HOME.""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban.db" + """Return the path to the shared ``kanban.db``. + + Anchored at :func:`kanban_home`, not the active profile's + ``HERMES_HOME``, so profile workers and the dispatcher converge on + the same board. + """ + return kanban_home() / "kanban.db" def workspaces_root() -> Path: - """Return the directory under which ``scratch`` workspaces are created.""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban" / "workspaces" + """Return the directory under which ``scratch`` workspaces are created. + + Anchored at :func:`kanban_home` so workspace paths are stable across + profile workers spawned by the dispatcher. + """ + return kanban_home() / "kanban" / "workspaces" # --------------------------------------------------------------------------- @@ -1516,12 +1554,15 @@ def archive_task(conn: sqlite3.Connection, task_id: str) -> bool: def resolve_workspace(task: Task) -> Path: """Resolve (and create if needed) the workspace for a task. - - ``scratch``: a fresh dir under ``$HERMES_HOME/kanban/workspaces//``. + - ``scratch``: a fresh dir under ``/kanban/workspaces//``, + where ```` is the shared Hermes root (see + :func:`kanban_home`). The path is the same for the dispatcher and + every profile worker, so handoff is path-stable. - ``dir:``: the path stored in ``workspace_path``. Created if missing. MUST be absolute — relative paths are rejected to prevent confused-deputy traversal where ``../../../tmp/attacker`` resolves against the dispatcher's CWD instead of a meaningful - root. Users who want a HERMES_HOME-relative workspace should + root. Users who want a kanban-root-relative workspace should compute the absolute path themselves. - ``worktree``: a git worktree at ``workspace_path``. Not created automatically in v1 -- the kanban-worker skill documents @@ -2104,9 +2145,10 @@ def _default_spawn(task: Task, workspace: str) -> Optional[int]: "chat", "-q", prompt, ]) - # Redirect output to a per-task log under HERMES_HOME/kanban/logs/. - from hermes_constants import get_hermes_home - log_dir = get_hermes_home() / "kanban" / "logs" + # Redirect output to a per-task log under /kanban/logs/. + # Anchored at the shared kanban root, not the worker's profile home, + # so `hermes kanban tail` reads the same file the worker writes to. + log_dir = kanban_home() / "kanban" / "logs" log_dir.mkdir(parents=True, exist_ok=True) log_path = log_dir / f"{task.id}.log" _rotate_worker_log(log_path, DEFAULT_LOG_ROTATE_BYTES) @@ -2591,8 +2633,7 @@ def gc_worker_logs( """Delete worker log files older than ``older_than_seconds``. Returns the number of files removed. Kept separate from ``gc_events`` because log files live on disk, not in SQLite.""" - from hermes_constants import get_hermes_home - log_dir = get_hermes_home() / "kanban" / "logs" + log_dir = kanban_home() / "kanban" / "logs" if not log_dir.exists(): return 0 cutoff = time.time() - older_than_seconds @@ -2614,8 +2655,7 @@ def gc_worker_logs( def worker_log_path(task_id: str) -> Path: """Return the path to a worker's log file. The file may not exist (task never spawned, or log already GC'd).""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban" / "logs" / f"{task_id}.log" + return kanban_home() / "kanban" / "logs" / f"{task_id}.log" def read_worker_log( diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index fcc6396be40..6214ab758a3 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -436,3 +436,174 @@ def test_tenant_propagates_to_events(kanban_home): # The "created" event should have tenant in its payload. created = [e for e in events if e.kind == "created"] assert created and created[0].payload.get("tenant") == "biz-a" + + +# --------------------------------------------------------------------------- +# Shared-board path resolution (issue #19348) +# +# The kanban board is a cross-profile coordination primitive: a worker +# spawned with `hermes -p ` must read/write the same kanban.db +# as the dispatcher that claimed the task. These tests exercise the +# path-resolution layer directly and would have caught the regression +# where `kanban_db_path()` resolved to the active profile's HERMES_HOME. +# --------------------------------------------------------------------------- + +class TestSharedBoardPaths: + """`kanban_home`/`kanban_db_path`/`workspaces_root`/`worker_log_path` + must anchor at the **shared root**, not the active profile's HERMES_HOME.""" + + def _set_home(self, monkeypatch, tmp_path, hermes_home): + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_KANBAN_HOME", raising=False) + + def test_default_install_anchors_at_home_dot_hermes( + self, tmp_path, monkeypatch + ): + # Standard install: HERMES_HOME == ~/.hermes, no profile active. + default_home = tmp_path / ".hermes" + default_home.mkdir() + self._set_home(monkeypatch, tmp_path, default_home) + + assert kb.kanban_home() == default_home + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + assert ( + kb.worker_log_path("t_demo") + == default_home / "kanban" / "logs" / "t_demo.log" + ) + + def test_profile_worker_resolves_to_shared_root( + self, tmp_path, monkeypatch + ): + # Reproduces the bug: dispatcher uses ~/.hermes/kanban.db, + # worker spawned with -p previously resolved to + # ~/.hermes/profiles//kanban.db. After the fix both + # converge on ~/.hermes/kanban.db. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "nehemiahkanban" + profile_home.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, profile_home) + + # All four resolvers must anchor at the shared root, not the + # profile-local HERMES_HOME. + assert kb.kanban_home() == default_home + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + assert ( + kb.worker_log_path("t_0d214f19") + == default_home / "kanban" / "logs" / "t_0d214f19.log" + ) + + # Sanity: the profile-local path that used to be returned is + # explicitly NOT what we resolve to anymore. + assert kb.kanban_db_path() != profile_home / "kanban.db" + + def test_dispatcher_and_profile_worker_converge( + self, tmp_path, monkeypatch + ): + # End-to-end convergence: resolve the path under each side's + # HERMES_HOME and confirm equality. This is the property the + # dispatcher/worker handoff actually depends on. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "coder" + profile_home.mkdir(parents=True) + + # Dispatcher's perspective. + self._set_home(monkeypatch, tmp_path, default_home) + dispatcher_db = kb.kanban_db_path() + dispatcher_ws = kb.workspaces_root() + dispatcher_log = kb.worker_log_path("t_handoff") + + # Worker's perspective (profile activated by `hermes -p coder`). + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + worker_db = kb.kanban_db_path() + worker_ws = kb.workspaces_root() + worker_log = kb.worker_log_path("t_handoff") + + assert dispatcher_db == worker_db + assert dispatcher_ws == worker_ws + assert dispatcher_log == worker_log + + def test_docker_custom_hermes_home_uses_env_path_directly( + self, tmp_path, monkeypatch + ): + # Docker / custom deployment: HERMES_HOME points outside ~/.hermes. + # `get_default_hermes_root()` returns env_home directly when it + # is not a `/profiles/` shape and not under + # `Path.home() / ".hermes"`. + custom_root = tmp_path / "opt" / "hermes" + custom_root.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, custom_root) + + assert kb.kanban_home() == custom_root + assert kb.kanban_db_path() == custom_root / "kanban.db" + + def test_docker_profile_layout_uses_grandparent( + self, tmp_path, monkeypatch + ): + # Docker profile shape: HERMES_HOME=/opt/hermes/profiles/coder; + # `get_default_hermes_root()` walks up to /opt/hermes because + # the immediate parent dir is named "profiles". + custom_root = tmp_path / "opt" / "hermes" + profile = custom_root / "profiles" / "coder" + profile.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, profile) + + assert kb.kanban_home() == custom_root + assert kb.kanban_db_path() == custom_root / "kanban.db" + + def test_explicit_override_via_hermes_kanban_home( + self, tmp_path, monkeypatch + ): + # Explicit override: HERMES_KANBAN_HOME beats every other + # resolution rule. + default_home = tmp_path / ".hermes" + profile_home = default_home / "profiles" / "any" + profile_home.mkdir(parents=True) + override = tmp_path / "shared-board" + override.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(override)) + + assert kb.kanban_home() == override + assert kb.kanban_db_path() == override / "kanban.db" + assert kb.workspaces_root() == override / "kanban" / "workspaces" + + def test_empty_override_falls_through(self, tmp_path, monkeypatch): + # Empty/whitespace override is treated as unset. + default_home = tmp_path / ".hermes" + default_home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", " ") + + assert kb.kanban_home() == default_home + + def test_dispatcher_and_worker_share_a_real_database( + self, tmp_path, monkeypatch + ): + # Belt-and-suspenders: round-trip a task across the two + # HERMES_HOME perspectives via a real SQLite file. Without the + # fix the worker would open a different file and see no rows. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "nehemiahkanban" + profile_home.mkdir(parents=True) + + # Dispatcher creates the board and a task. + self._set_home(monkeypatch, tmp_path, default_home) + kb.init_db() + with kb.connect() as conn: + task_id = kb.create_task(conn, title="cross-profile") + + # Worker switches to the profile HERMES_HOME and reads. + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + with kb.connect() as conn: + task = kb.get_task(conn, task_id) + assert task is not None + assert task.title == "cross-profile" From 2658494e815b4644cb2ed47dc6cb6623b6ecf112 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 3 May 2026 15:05:28 -0700 Subject: [PATCH 007/171] fix(kanban): add per-path env overrides + dispatcher env injection Layers defense-in-depth on top of the shared-root anchoring (base commit). Changes in hermes_cli/kanban_db.py: - kanban_db_path() now honours HERMES_KANBAN_DB first, then falls through to kanban_home()/kanban.db. - workspaces_root() now honours HERMES_KANBAN_WORKSPACES_ROOT first, then falls through to kanban_home()/kanban/workspaces. - All three overrides (HERMES_KANBAN_HOME, HERMES_KANBAN_DB, HERMES_KANBAN_WORKSPACES_ROOT) now call .expanduser() for consistency. - _default_spawn() injects HERMES_KANBAN_DB and HERMES_KANBAN_WORKSPACES_ROOT into the worker subprocess env. Even when the worker's get_default_hermes_root() resolution somehow disagrees with the dispatcher's (symlinks, unusual Docker layouts), the two processes still open the same SQLite file. Module docstring updated to describe all three overrides and the dispatcher env-injection contract. Tests (tests/hermes_cli/test_kanban_db.py, TestSharedBoardPaths): - test_hermes_kanban_db_pin_beats_kanban_home - test_hermes_kanban_workspaces_root_pin_beats_kanban_home - test_empty_per_path_overrides_fall_through - test_dispatcher_spawn_injects_kanban_db_and_workspaces_root (monkeypatches subprocess.Popen, asserts both env vars reach the child even after HERMES_HOME is rewritten by `hermes -p `.) Docs: website/docs/reference/environment-variables.md gets entries for the three kanban env vars. This fusion is built on the cleanest of the seven competing PRs that targeted issue #18442: * Base commit (from PR #19350 by @GodsBoy): add `kanban_home()` helper anchored at `get_default_hermes_root()`, reroute all 5 kanban path sites through it (including the 3 sibling log-dir sites that the other six PRs missed), 8-test regression class. * Dispatcher env-var injection approach drawn from PRs #18300 (@quocanh261997) and #19100 (@cg2aigc). * Per-path env overrides drawn from PR #19100 (@cg2aigc). * get_default_hermes_root() resolution direction first proposed in PR #18503 (@beibi9966) and PR #18985 (@Gosuj). Closes the duplicate/competing PRs: #18300, #18503, #18670, #18985, #19037, #19056, #19100. Fixes #18442 and #19348. Co-authored-by: quocanh261997 <17986614+quocanh261997@users.noreply.github.com> Co-authored-by: cg2aigc <232694053+cg2aigc@users.noreply.github.com> Co-authored-by: beibi9966 Co-authored-by: Gosuj <123411271+Gosuj@users.noreply.github.com> Co-authored-by: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> --- hermes_cli/kanban_db.py | 39 ++++++- tests/hermes_cli/test_kanban_db.py | 105 ++++++++++++++++++ .../docs/reference/environment-variables.md | 3 + 3 files changed, 143 insertions(+), 4 deletions(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 19311bcb697..d8e2e861bac 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -9,8 +9,20 @@ board as the dispatcher that claimed the task. The same applies to In standard installs ```` is ``~/.hermes``. In Docker / custom deployments where ``HERMES_HOME`` points outside ``~/.hermes`` (e.g. -``/opt/hermes``), ```` is ``HERMES_HOME``. Set ``HERMES_KANBAN_HOME`` -to override the resolution explicitly (tests, unusual deployments). +``/opt/hermes``), ```` is ``HERMES_HOME``. Three env-var overrides +are available (highest precedence first, all optional): + +* ``HERMES_KANBAN_DB`` — pin the database file path directly. +* ``HERMES_KANBAN_WORKSPACES_ROOT`` — pin the workspaces root directly. +* ``HERMES_KANBAN_HOME`` — pin the umbrella root that anchors all three + kanban paths (db + workspaces + logs). Useful for tests and unusual + deployments where a single override is enough. + +The dispatcher injects ``HERMES_KANBAN_DB`` and +``HERMES_KANBAN_WORKSPACES_ROOT`` into the worker subprocess env as a +defense-in-depth measure: even if the worker's ``get_default_hermes_root()`` +resolution somehow disagrees with the dispatcher's (unusual symlink or +Docker layout), the two processes still converge on the same files. Schema is intentionally small: tasks, task_links, task_comments, task_events. The ``workspace_kind`` field decouples coordination from git @@ -87,7 +99,7 @@ def kanban_home() -> Path: """ override = os.environ.get("HERMES_KANBAN_HOME", "").strip() if override: - return Path(override) + return Path(override).expanduser() from hermes_constants import get_default_hermes_root return get_default_hermes_root() @@ -97,8 +109,13 @@ def kanban_db_path() -> Path: Anchored at :func:`kanban_home`, not the active profile's ``HERMES_HOME``, so profile workers and the dispatcher converge on - the same board. + the same board. ``HERMES_KANBAN_DB`` pins the path directly (highest + precedence) — the dispatcher injects this into worker subprocess env + as defense-in-depth. """ + override = os.environ.get("HERMES_KANBAN_DB", "").strip() + if override: + return Path(override).expanduser() return kanban_home() / "kanban.db" @@ -107,7 +124,13 @@ def workspaces_root() -> Path: Anchored at :func:`kanban_home` so workspace paths are stable across profile workers spawned by the dispatcher. + ``HERMES_KANBAN_WORKSPACES_ROOT`` pins the path directly (highest + precedence) — the dispatcher injects this into worker subprocess env + as defense-in-depth. """ + override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip() + if override: + return Path(override).expanduser() return kanban_home() / "kanban" / "workspaces" @@ -2111,6 +2134,14 @@ def _default_spawn(task: Task, workspace: str) -> Optional[int]: env["HERMES_TENANT"] = task.tenant env["HERMES_KANBAN_TASK"] = task.id env["HERMES_KANBAN_WORKSPACE"] = workspace + # Pin the shared board + workspaces root the dispatcher resolved, so + # that even when the worker activates a profile (`hermes -p ` + # rewrites HERMES_HOME), its kanban paths still match the + # dispatcher's. Belt-and-braces with the `get_default_hermes_root()` + # resolution in `kanban_home()` — symmetric resolution is the norm, + # but unusual symlink / Docker layouts are caught here too. + env["HERMES_KANBAN_DB"] = str(kanban_db_path()) + env["HERMES_KANBAN_WORKSPACES_ROOT"] = str(workspaces_root()) # HERMES_PROFILE is the author the kanban_comment tool defaults to. # `hermes -p ` activates the profile, but the env var is # what the tool reads — set it explicitly here so comments are diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index 6214ab758a3..66992a721c5 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -607,3 +607,108 @@ class TestSharedBoardPaths: task = kb.get_task(conn, task_id) assert task is not None assert task.title == "cross-profile" + + def test_hermes_kanban_db_pin_beats_kanban_home( + self, tmp_path, monkeypatch + ): + # HERMES_KANBAN_DB pins the file path directly and beats both + # HERMES_KANBAN_HOME and the `get_default_hermes_root()` path. + # This is the env the dispatcher injects into workers. + default_home = tmp_path / ".hermes" + default_home.mkdir() + umbrella = tmp_path / "umbrella" + umbrella.mkdir() + pinned_db = tmp_path / "pinned" / "board.db" + pinned_db.parent.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(umbrella)) + monkeypatch.setenv("HERMES_KANBAN_DB", str(pinned_db)) + + assert kb.kanban_db_path() == pinned_db + # workspaces_root still follows HERMES_KANBAN_HOME -- the pins + # are independent. + assert kb.workspaces_root() == umbrella / "kanban" / "workspaces" + + def test_hermes_kanban_workspaces_root_pin_beats_kanban_home( + self, tmp_path, monkeypatch + ): + # HERMES_KANBAN_WORKSPACES_ROOT pins the workspaces root directly. + default_home = tmp_path / ".hermes" + default_home.mkdir() + umbrella = tmp_path / "umbrella" + umbrella.mkdir() + pinned_ws = tmp_path / "pinned-workspaces" + pinned_ws.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(umbrella)) + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(pinned_ws)) + + assert kb.workspaces_root() == pinned_ws + # kanban_db_path still follows HERMES_KANBAN_HOME. + assert kb.kanban_db_path() == umbrella / "kanban.db" + + def test_empty_per_path_overrides_fall_through( + self, tmp_path, monkeypatch + ): + # Empty/whitespace pins are treated as unset, same as + # HERMES_KANBAN_HOME. + default_home = tmp_path / ".hermes" + default_home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_DB", " ") + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", "") + + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + + def test_dispatcher_spawn_injects_kanban_db_and_workspaces_root( + self, tmp_path, monkeypatch + ): + # The dispatcher's `_default_spawn` must inject HERMES_KANBAN_DB + # and HERMES_KANBAN_WORKSPACES_ROOT into the worker env so the + # worker converges on the dispatcher's paths even when the + # `-p ` flag rewrites HERMES_HOME. + default_home = tmp_path / ".hermes" + default_home.mkdir() + self._set_home(monkeypatch, tmp_path, default_home) + + captured = {} + + class _FakePopen: + def __init__(self, cmd, **kwargs): + captured["cmd"] = cmd + captured["env"] = kwargs.get("env", {}) + self.pid = 4242 + + monkeypatch.setattr("subprocess.Popen", _FakePopen) + + task = kb.Task( + id="t_dispatch_env", + title="x", + body=None, + assignee="coder", + status="ready", + priority=0, + created_by=None, + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + kb._default_spawn(task, str(tmp_path / "ws")) + + env = captured["env"] + assert env["HERMES_KANBAN_DB"] == str(default_home / "kanban.db") + assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str( + default_home / "kanban" / "workspaces" + ) + assert env["HERMES_KANBAN_TASK"] == "t_dispatch_env" diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 955f4600146..aa971c71037 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -88,6 +88,9 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `HERMES_LOCAL_STT_COMMAND` | Optional local speech-to-text command template. Supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders | | `HERMES_LOCAL_STT_LANGUAGE` | Default language passed to `HERMES_LOCAL_STT_COMMAND` or auto-detected local `whisper` CLI fallback (default: `en`) | | `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`). Also scopes the gateway PID file and systemd service name, so multiple installations can run concurrently | +| `HERMES_KANBAN_HOME` | Override the shared Hermes root that anchors the kanban board (db + workspaces + worker logs). Falls back to `get_default_hermes_root()` (the parent of any active profile). Useful for tests and unusual deployments | +| `HERMES_KANBAN_DB` | Pin the kanban database file path directly (highest precedence; beats `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env so profile workers converge on the dispatcher's board | +| `HERMES_KANBAN_WORKSPACES_ROOT` | Pin the kanban workspaces root directly (highest precedence for workspaces; beats `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env | ## Provider Auth (OAuth) From 4a2f822137bf69728bd594613002671c93d2a64d Mon Sep 17 00:00:00 2001 From: LeonSGP43 Date: Sun, 3 May 2026 21:33:52 +0800 Subject: [PATCH 008/171] fix(mcp): reconnect on terminated sessions --- tests/tools/test_mcp_tool_session_expired.py | 7 +++++++ tools/mcp_tool.py | 1 + 2 files changed, 8 insertions(+) diff --git a/tests/tools/test_mcp_tool_session_expired.py b/tests/tools/test_mcp_tool_session_expired.py index 67e6e587413..4533282e708 100644 --- a/tests/tools/test_mcp_tool_session_expired.py +++ b/tests/tools/test_mcp_tool_session_expired.py @@ -46,6 +46,13 @@ def test_is_session_expired_detects_session_not_found(): assert _is_session_expired_error(RuntimeError("Unknown session: abc123")) is True +def test_is_session_expired_detects_session_terminated(): + """Remote Playwright MCP reports transport loss as ``Session terminated``.""" + from tools.mcp_tool import _is_session_expired_error + + assert _is_session_expired_error(RuntimeError("Session terminated")) is True + + def test_is_session_expired_is_case_insensitive(): """Match uses lower-cased comparison so servers that emit the message in different cases (SDK formatter quirks) still trigger.""" diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 2a0115ec858..21e935a12ff 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1667,6 +1667,7 @@ _SESSION_EXPIRED_MARKERS: tuple = ( "session expired", "session not found", "unknown session", + "session terminated", ) From dfdd7b6e6fc3ec3b637d200d95e36adc7c6a49bb Mon Sep 17 00:00:00 2001 From: Zyproth Date: Sun, 3 May 2026 16:27:25 +0300 Subject: [PATCH 009/171] fix(codex-transport): preserve request override headers for xai responses --- agent/transports/codex.py | 13 ++++++++++++- scripts/release.py | 1 + tests/agent/transports/test_codex_transport.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 7d6bed46def..2ebc396fbb1 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -143,7 +143,18 @@ class ResponsesApiTransport(ProviderTransport): kwargs["max_output_tokens"] = max_tokens if is_xai_responses and session_id: - kwargs["extra_headers"] = {"x-grok-conv-id": session_id} + existing_extra_headers = kwargs.get("extra_headers") + merged_extra_headers: Dict[str, str] = {} + if isinstance(existing_extra_headers, dict): + merged_extra_headers.update( + { + str(key): str(value) + for key, value in existing_extra_headers.items() + if key and value is not None + } + ) + merged_extra_headers["x-grok-conv-id"] = session_id + kwargs["extra_headers"] = merged_extra_headers return kwargs diff --git a/scripts/release.py b/scripts/release.py index c1988049d46..6871f58ce19 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -621,6 +621,7 @@ AUTHOR_MAP = { "2114364329@qq.com": "cuyua9", "2557058999@qq.com": "Disaster-Terminator", "cine.dreamer.one@gmail.com": "LeonSGP43", + "zyprothh@gmail.com": "Zyproth", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index d9db3be7c34..26145660cca 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -126,6 +126,20 @@ class TestCodexBuildKwargs: ) assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123" + def test_xai_headers_preserve_request_override_headers(self, transport): + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-3", messages=messages, tools=[], + session_id="conv-123", + is_xai_responses=True, + request_overrides={"extra_headers": {"X-Test": "1", "X-Trace": "abc"}}, + ) + assert kw.get("extra_headers") == { + "X-Test": "1", + "X-Trace": "abc", + "x-grok-conv-id": "conv-123", + } + def test_minimal_effort_clamped(self, transport): messages = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( From 65bebb9b802616122c30fd8fea6c4c47514b45e4 Mon Sep 17 00:00:00 2001 From: Amit Gaur Date: Sun, 3 May 2026 12:45:23 -0700 Subject: [PATCH 010/171] fix(cli): follow 307 redirects in MiniMax OAuth httpx clients The MiniMax OAuth API endpoints have moved from api.minimax.io to account.minimax.io and the old paths now respond with HTTP 307. httpx defaults to follow_redirects=False (unlike requests), so the device-code and token-refresh flows fail with "Temporary Redirect". Adds follow_redirects=True to the two httpx.Client instances in hermes_cli/auth.py used by the MiniMax OAuth flow. This is forward- compatible -- if endpoints move again, the redirect chain is followed automatically. Repro before patch: curl -i -X POST https://api.minimax.io/oauth/code # -> 307 curl -i -X POST https://api.minimax.io/oauth/token # -> 307 Verified end-to-end against a real MiniMax Plus account on macOS; the existing tests/test_minimax_oauth.py suite (15 tests) still passes. --- hermes_cli/auth.py | 6 ++++-- scripts/release.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1d77fffa92f..56e72d5eb07 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -4283,7 +4283,8 @@ def _minimax_oauth_login( print(f"Portal: {portal_base_url}") with httpx.Client(timeout=httpx.Timeout(timeout_seconds), - headers={"Accept": "application/json"}) as client: + headers={"Accept": "application/json"}, + follow_redirects=True) as client: code_data = _minimax_request_user_code( client, portal_base_url=portal_base_url, client_id=pconfig.client_id, @@ -4360,7 +4361,8 @@ def _refresh_minimax_oauth_state( return state portal_base_url = state["portal_base_url"] - with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client: + with httpx.Client(timeout=httpx.Timeout(timeout_seconds), + follow_redirects=True) as client: response = client.post( f"{portal_base_url}/oauth/token", data={ diff --git a/scripts/release.py b/scripts/release.py index 6871f58ce19..8f3e094ca0c 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -622,6 +622,7 @@ AUTHOR_MAP = { "2557058999@qq.com": "Disaster-Terminator", "cine.dreamer.one@gmail.com": "LeonSGP43", "zyprothh@gmail.com": "Zyproth", + "amitgaur@gmail.com": "amitgaur", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From a5cae1649675947d04034010f1fa22d15b2c6c4c Mon Sep 17 00:00:00 2001 From: Zyproth Date: Mon, 4 May 2026 00:03:36 +0300 Subject: [PATCH 011/171] fix(api_server): fall back to default port on malformed API_SERVER_PORT --- gateway/platforms/api_server.py | 13 ++++++++++++- tests/gateway/test_api_server.py | 6 ++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index dc608874594..f1def35d9c7 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -62,6 +62,14 @@ MAX_NORMALIZED_TEXT_LENGTH = 65_536 # 64 KB cap for normalized content parts MAX_CONTENT_LIST_SIZE = 1_000 # Max items when content is an array +def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int: + """Parse a listen port without letting malformed env/config values crash startup.""" + try: + return int(value) + except (TypeError, ValueError): + return default + + def _normalize_chat_content( content: Any, *, _max_depth: int = 10, _depth: int = 0, ) -> str: @@ -573,7 +581,10 @@ class APIServerAdapter(BasePlatformAdapter): super().__init__(config, Platform.API_SERVER) extra = config.extra or {} self._host: str = extra.get("host", os.getenv("API_SERVER_HOST", DEFAULT_HOST)) - self._port: int = int(extra.get("port", os.getenv("API_SERVER_PORT", str(DEFAULT_PORT)))) + raw_port = extra.get("port") + if raw_port is None: + raw_port = os.getenv("API_SERVER_PORT", str(DEFAULT_PORT)) + self._port: int = _coerce_port(raw_port, DEFAULT_PORT) self._api_key: str = extra.get("key", os.getenv("API_SERVER_KEY", "")) self._cors_origins: tuple[str, ...] = self._parse_cors_origins( extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")), diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 74a30541dc7..0bc2d043e35 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -240,6 +240,12 @@ class TestAdapterInit: "http://127.0.0.1:3000", ) + def test_invalid_port_from_env_falls_back_to_default(self, monkeypatch): + monkeypatch.setenv("API_SERVER_PORT", "not-a-port") + config = PlatformConfig(enabled=True) + adapter = APIServerAdapter(config) + assert adapter._port == 8642 + # --------------------------------------------------------------------------- # Auth checking From 6c4aca7adca44e67e45f85b143a46ed3d88a7328 Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Mon, 4 May 2026 00:32:53 +0300 Subject: [PATCH 012/171] fix(vision): guard user_prompt type before debug_call_data construction --- tools/vision_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vision_tools.py b/tools/vision_tools.py index e7389e3efa1..dd3771578bc 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -440,6 +440,8 @@ async def vision_analyze_tool( - For local file paths, the file is used directly and NOT deleted - Supports common image formats (JPEG, PNG, GIF, WebP, etc.) """ + if not isinstance(user_prompt, str): + user_prompt = str(user_prompt) if user_prompt is not None else "" debug_call_data = { "parameters": { "image_url": image_url, From 5bd937533c9cef3646d1e464f9a9a3aabec7b774 Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Mon, 4 May 2026 00:44:47 +0300 Subject: [PATCH 013/171] fix(vision): guard user_prompt type in video_analyze_tool before debug_call_data construction --- tools/vision_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/vision_tools.py b/tools/vision_tools.py index dd3771578bc..611e6bcef60 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -918,6 +918,8 @@ async def video_analyze_tool( model: str = None, ) -> str: """Analyze a video via multimodal LLM. Returns JSON {success, analysis}.""" + if not isinstance(user_prompt, str): + user_prompt = str(user_prompt) if user_prompt is not None else "" debug_call_data = { "parameters": { "video_url": video_url, From 408dd8aa28cb959f1a1e869929651c181de63e1e Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Mon, 4 May 2026 00:57:36 +0300 Subject: [PATCH 014/171] fix(compressor): skip non-string tool content in dedup pass to prevent AttributeError --- agent/context_compressor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 21f07df491f..45c26b11b0a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -569,6 +569,8 @@ class ContextCompressor(ContextEngine): # Skip multimodal content (list of content blocks) if isinstance(content, list): continue + if not isinstance(content, str): + continue if len(content) < 200: continue h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12] From 86e64c1d3bc0324452202cf8e26703dbef7839b3 Mon Sep 17 00:00:00 2001 From: MrBob Date: Sun, 3 May 2026 16:49:37 -0300 Subject: [PATCH 015/171] fix(gateway): hide required-arg commands from Telegram menu --- hermes_cli/commands.py | 15 +++++++++++++-- scripts/release.py | 1 + tests/hermes_cli/test_commands.py | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 07e7273bf72..681f228ae93 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -399,6 +399,11 @@ def _is_gateway_available(cmd: CommandDef, config_overrides: set[str] | None = N return False +def _requires_argument(args_hint: str) -> bool: + """Return True when selecting a command without text would be incomplete.""" + return args_hint.strip().startswith("<") + + def gateway_help_lines() -> list[str]: """Generate gateway help text lines from the registry.""" overrides = _resolve_config_gates() @@ -455,7 +460,9 @@ def telegram_bot_commands() -> list[tuple[str, str]]: Telegram command names cannot contain hyphens, so they are replaced with underscores. Aliases are skipped -- Telegram shows one menu entry per - canonical command. + canonical command. Commands that require arguments are skipped because + selecting a Telegram BotCommand sends only ``/command`` and would execute + an incomplete command. Plugin-registered slash commands are included so plugins get native autocomplete in Telegram without touching core code. @@ -465,10 +472,14 @@ def telegram_bot_commands() -> list[tuple[str, str]]: for cmd in COMMAND_REGISTRY: if not _is_gateway_available(cmd, overrides): continue + if _requires_argument(cmd.args_hint): + continue tg_name = _sanitize_telegram_name(cmd.name) if tg_name: result.append((tg_name, cmd.description)) - for name, description, _args_hint in _iter_plugin_command_entries(): + for name, description, args_hint in _iter_plugin_command_entries(): + if _requires_argument(args_hint): + continue tg_name = _sanitize_telegram_name(name) if tg_name: result.append((tg_name, description)) diff --git a/scripts/release.py b/scripts/release.py index 8f3e094ca0c..b8fec8e95f6 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -623,6 +623,7 @@ AUTHOR_MAP = { "cine.dreamer.one@gmail.com": "LeonSGP43", "zyprothh@gmail.com": "Zyproth", "amitgaur@gmail.com": "amitgaur", + "albuquerque.abner@gmail.com": "mrbob-git", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index d505c8a1a7b..620611ad42c 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -236,6 +236,13 @@ class TestTelegramBotCommands: tg_name = cmd.name.replace("-", "_") assert tg_name not in names + def test_excludes_commands_with_required_args(self): + names = {name for name, _ in telegram_bot_commands()} + assert "background" not in names + assert "queue" not in names + assert "steer" not in names + assert "background" in GATEWAY_KNOWN_COMMANDS + class TestSlackSubcommandMap: def test_returns_dict(self): @@ -1661,6 +1668,19 @@ class TestPluginCommandEnumeration: names = {name for name, _desc in telegram_bot_commands()} assert "metricas" in names + def test_plugin_command_with_required_args_excluded_from_telegram_menu(self, monkeypatch): + """Telegram BotCommand selections cannot supply required arguments.""" + self._patch_plugin_commands(monkeypatch, { + "background-job": { + "handler": lambda _a: "ok", + "description": "Run a background job", + "args_hint": "", + "plugin": "jobs-plugin", + } + }) + names = {name for name, _desc in telegram_bot_commands()} + assert "background_job" not in names + def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch): """/hermes metricas must route through the Slack subcommand map.""" self._patch_plugin_commands(monkeypatch, { From 3792b77bd11dcccab3b0994bd31086969fb9f5fb Mon Sep 17 00:00:00 2001 From: Kiala Date: Sun, 3 May 2026 21:26:24 +0800 Subject: [PATCH 016/171] fix(send_message): support QQBot C2C and group chats The _send_qqbot function was hardcoded to use the guild channel endpoint (/channels/{id}/messages), which fails for C2C private chats and QQ groups with 'channel does not exist' (code 11263). This change tries the appropriate endpoints in order: 1. /channels/{id}/messages (guild channels) 2. /v2/users/{id}/messages (C2C private chats) 3. /v2/groups/{id}/messages (QQ groups) Fixes active sending to QQBot C2C and group recipients. --- scripts/release.py | 7 +++++++ tools/send_message_tool.py | 30 +++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/scripts/release.py b/scripts/release.py index b8fec8e95f6..377d355f666 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -624,6 +624,13 @@ AUTHOR_MAP = { "zyprothh@gmail.com": "Zyproth", "amitgaur@gmail.com": "amitgaur", "albuquerque.abner@gmail.com": "mrbob-git", + "kiala@users.noreply.github.com": "kiala9", + "alanxchen@gmail.com": "alanxchen85", + "clawbot@clawbots-Mac-mini.local": "John-tip", + "der@konsi.org": "konsisumer", + "cirwel@The-CIRWEL-Group.local": "CIRWEL", + "molvikar8@gmail.com": "molvikar", + "nftpoetrist@gmail.com": "nftpoetrist", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 62712e4581f..0ad30d6dcbd 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -1652,8 +1652,8 @@ async def _send_qqbot(pconfig, chat_id, message): """Send via QQBot using the REST API directly (no WebSocket needed). Uses the QQ Bot Open Platform REST endpoints to get an access token - and post a message. Works for guild channels without requiring - a running gateway adapter. + and post a message. Supports guild channels, C2C (private) chats, + and group chats by trying the appropriate endpoints. """ try: import httpx @@ -1682,20 +1682,40 @@ async def _send_qqbot(pconfig, chat_id, message): return _error(f"QQBot: no access_token in response") # Step 2: Send message via REST + # QQ Bot API has separate endpoints for channels, C2C, and groups. + # We try them in order: channel first, then fallback to C2C. headers = { "Authorization": f"QQBot {access_token}", "Content-Type": "application/json", } - url = f"https://api.sgroup.qq.com/channels/{chat_id}/messages" payload = {"content": message[:4000], "msg_type": 0} + # Try channel endpoint first (works for guild channels) + url = f"https://api.sgroup.qq.com/channels/{chat_id}/messages" resp = await client.post(url, json=payload, headers=headers) if resp.status_code in (200, 201): data = resp.json() return {"success": True, "platform": "qqbot", "chat_id": chat_id, "message_id": data.get("id")} - else: - return _error(f"QQBot send failed: {resp.status_code} {resp.text}") + + # If channel endpoint failed (likely "频道不存在"), try C2C endpoint + url_c2c = f"https://api.sgroup.qq.com/v2/users/{chat_id}/messages" + resp_c2c = await client.post(url_c2c, json=payload, headers=headers) + if resp_c2c.status_code in (200, 201): + data = resp_c2c.json() + return {"success": True, "platform": "qqbot", "chat_id": chat_id, + "message_id": data.get("id")} + + # If C2C also failed, try group endpoint + url_group = f"https://api.sgroup.qq.com/v2/groups/{chat_id}/messages" + resp_group = await client.post(url_group, json=payload, headers=headers) + if resp_group.status_code in (200, 201): + data = resp_group.json() + return {"success": True, "platform": "qqbot", "chat_id": chat_id, + "message_id": data.get("id")} + + # All endpoints failed — return the most informative error + return _error(f"QQBot send failed: channel={resp.status_code} c2c={resp_c2c.status_code} group={resp_group.status_code}") except Exception as e: return _error(f"QQBot send failed: {e}") From 3c420245395e3e5e074949c4bfdfb25d3156cb98 Mon Sep 17 00:00:00 2001 From: 0xKingBack <133716830+0xKingBack@users.noreply.github.com> Date: Sat, 2 May 2026 01:13:17 +0800 Subject: [PATCH 017/171] fix(curator): pass auxiliary curator api_key/base_url into runtime resolution Curator review fork now forwards per-slot credentials from auxiliary.curator and legacy curator.auxiliary to resolve_runtime_provider, matching the canonical aux task schema. Add regression tests for binding and main fallback. --- agent/curator.py | 104 +++++++++++++++++++++++++----------- tests/agent/test_curator.py | 80 +++++++++++++++++++++++++++ 2 files changed, 153 insertions(+), 31 deletions(-) diff --git a/agent/curator.py b/agent/curator.py index cce3d8c103d..8dee0acbbac 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -28,7 +28,7 @@ import tempfile import threading from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Set +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set from hermes_constants import get_hermes_home from tools import skill_usage @@ -36,6 +36,22 @@ from tools import skill_usage logger = logging.getLogger(__name__) +def _strip_aux_credential(value: Any) -> Optional[str]: + if value is None: + return None + text = str(value).strip() + return text or None + + +class _ReviewRuntimeBinding(NamedTuple): + """Provider/model for the curator review fork plus optional per-slot overrides.""" + + provider: str + model: str + explicit_api_key: Optional[str] + explicit_base_url: Optional[str] + + DEFAULT_INTERVAL_HOURS = 24 * 7 # 7 days DEFAULT_MIN_IDLE_HOURS = 2 DEFAULT_STALE_AFTER_DAYS = 30 @@ -1398,6 +1414,52 @@ def run_curator_review( } +def _resolve_review_runtime(cfg: Dict[str, Any]) -> _ReviewRuntimeBinding: + """Resolve provider/model and per-slot credentials for the curator review fork. + + Same precedence as `_resolve_review_model()`. Non-empty ``api_key`` / + ``base_url`` from the active slot are returned as explicit overrides so + ``resolve_runtime_provider`` does not silently reuse the main chat + credential chain for a routed auxiliary model. + """ + _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} + _main_provider = _main.get("provider") or "auto" + _main_model = _main.get("default") or _main.get("model") or "" + + # 1. Canonical aux task slot + _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} + _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {} + _task_provider = (_cur_task.get("provider") or "").strip() or None + _task_model = (_cur_task.get("model") or "").strip() or None + if _task_provider and _task_provider != "auto" and _task_model: + return _ReviewRuntimeBinding( + _task_provider, + _task_model, + _strip_aux_credential(_cur_task.get("api_key")), + _strip_aux_credential(_cur_task.get("base_url")), + ) + + # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification) + _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {} + _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {} + _legacy_provider = _legacy.get("provider") or None + _legacy_model = _legacy.get("model") or None + if _legacy_provider and _legacy_model: + logger.info( + "curator: using deprecated curator.auxiliary.{provider,model} " + "config — please migrate to auxiliary.curator.{provider,model}" + ) + return _ReviewRuntimeBinding( + str(_legacy_provider), + str(_legacy_model), + _strip_aux_credential(_legacy.get("api_key")), + _strip_aux_credential(_legacy.get("base_url")), + ) + + # 3. Fall through to the main chat model + return _ReviewRuntimeBinding(_main_provider, _main_model, None, None) + + def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]: """Pick (provider, model) for the curator review fork. @@ -1413,32 +1475,8 @@ def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]: 2. Legacy ``curator.auxiliary.{provider,model}`` when both are set 3. Main ``model.{provider,default/model}`` pair """ - _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} - _main_provider = _main.get("provider") or "auto" - _main_model = _main.get("default") or _main.get("model") or "" - - # 1. Canonical aux task slot - _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} - _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {} - _task_provider = (_cur_task.get("provider") or "").strip() or None - _task_model = (_cur_task.get("model") or "").strip() or None - if _task_provider and _task_provider != "auto" and _task_model: - return _task_provider, _task_model - - # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification) - _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {} - _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {} - _legacy_provider = _legacy.get("provider") or None - _legacy_model = _legacy.get("model") or None - if _legacy_provider and _legacy_model: - logger.info( - "curator: using deprecated curator.auxiliary.{provider,model} " - "config — please migrate to auxiliary.curator.{provider,model}" - ) - return _legacy_provider, _legacy_model - - # 3. Fall through to the main chat model - return _main_provider, _main_model + b = _resolve_review_runtime(cfg) + return b.provider, b.model def _run_llm_review(prompt: str) -> Dict[str, Any]: @@ -1477,10 +1515,10 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: # arguments hits an auto-resolution path that fails for OAuth-only # providers and for pool-backed credentials. # - # `_resolve_review_model()` honors `auxiliary.curator.{provider,model}` + # `_resolve_review_runtime()` honors `auxiliary.curator.{provider,model,...}` # (canonical aux-task slot, wired through `hermes model` → auxiliary # picker and the dashboard Models tab), with a legacy fallback to - # `curator.auxiliary.{provider,model}`. See docs/user-guide/features/curator.md. + # `curator.auxiliary.{provider,model,...}`. See docs/user-guide/features/curator.md. _api_key = None _base_url = None _api_mode = None @@ -1490,9 +1528,13 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: from hermes_cli.config import load_config from hermes_cli.runtime_provider import resolve_runtime_provider _cfg = load_config() - _provider, _model_name = _resolve_review_model(_cfg) + _binding = _resolve_review_runtime(_cfg) + _provider, _model_name = _binding.provider, _binding.model _rp = resolve_runtime_provider( - requested=_provider, target_model=_model_name + requested=_provider, + target_model=_model_name, + explicit_api_key=_binding.explicit_api_key, + explicit_base_url=_binding.explicit_base_url, ) _api_key = _rp.get("api_key") _base_url = _rp.get("base_url") diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py index aba866445c9..45b96994562 100644 --- a/tests/agent/test_curator.py +++ b/tests/agent/test_curator.py @@ -645,6 +645,86 @@ def test_review_model_honors_auxiliary_curator_slot(curator_env): ) +def test_review_runtime_passes_auxiliary_curator_credentials(curator_env): + """Per-slot api_key/base_url must ride into resolve_runtime_provider (not main-only creds).""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "custom", + "model": "local-mini", + "api_key": "sk-curator-only", + "base_url": "http://localhost:11434/v1", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert binding.provider == "custom" + assert binding.model == "local-mini" + assert binding.explicit_api_key == "sk-curator-only" + assert binding.explicit_base_url == "http://localhost:11434/v1" + + +def test_review_runtime_strips_blank_aux_credentials(curator_env): + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "openrouter", + "model": "x/y", + "api_key": " ", + "base_url": "", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert binding.explicit_api_key is None + assert binding.explicit_base_url is None + + +def test_review_runtime_ignores_auxiliary_credentials_when_using_main(curator_env): + """Falling through to main model must not pick up stray auxiliary.curator secrets.""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "auto", + "model": "", + "api_key": "must-not-leak", + "base_url": "http://curator-slot-ignored/", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert (binding.provider, binding.model) == ("openrouter", "openai/gpt-5.5") + assert binding.explicit_api_key is None + assert binding.explicit_base_url is None + + +def test_review_runtime_legacy_auxiliary_carry_credentials(curator_env, caplog): + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "curator": { + "auxiliary": { + "provider": "custom", + "model": "m", + "api_key": "legacy-key", + "base_url": "http://legacy/v1", + }, + }, + } + import logging + with caplog.at_level(logging.INFO, logger="agent.curator"): + binding = curator._resolve_review_runtime(cfg) + assert binding.explicit_api_key == "legacy-key" + assert binding.explicit_base_url == "http://legacy/v1" + assert any("deprecated curator.auxiliary" in rec.message for rec in caplog.records) + + def test_review_model_auxiliary_curator_partial_override_falls_back(curator_env): """Only one of slot provider/model set → fall back to the main pair. From 2ababfe6edf815248daa1d123bd6568e04cfd7f4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 3 May 2026 16:54:06 -0700 Subject: [PATCH 018/171] chore(release): map 0xKingBack noreply email --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 377d355f666..b0f925d4133 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -67,6 +67,7 @@ AUTHOR_MAP = { "nbot@liizfq.top": "liizfq", "274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi", "dejie.guo@gmail.com": "JayGwod", + "133716830+0xKingBack@users.noreply.github.com": "0xKingBack", "maxence@groine.fr": "MaxyMoos", "61830395+leprincep35700@users.noreply.github.com": "leprincep35700", # OpenViking viking_read salvage (April 2026) From 2d7543c61f1334bdcfa741776a357c274830de8b Mon Sep 17 00:00:00 2001 From: Alan Chen Date: Sun, 3 May 2026 22:40:34 +0800 Subject: [PATCH 019/171] fix(windows): enforce UTF-8 stdout/stderr to prevent UnicodeEncodeError crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, services and terminals default to cp1252 encoding. The CLI uses box-drawing characters (┌│├└─) in banners, doctor output, and status displays. When print() tries to encode these under cp1252, an unhandled UnicodeEncodeError crashes the gateway on startup. This fix adds early UTF-8 enforcement in hermes_cli/__init__.py: - Sets PYTHONUTF8=1 and PYTHONIOENCODING=utf-8 - Re-opens stdout/stderr with UTF-8 encoding if not already UTF-8 Runs at import time so it protects all CLI subcommands. No effect on Unix (gated on sys.platform == "win32"). Backwards-compatible: on systems already using UTF-8, the function is a no-op. Fixes #10956 --- hermes_cli/__init__.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index b3482b1e68a..9141ea93e79 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -5,11 +5,43 @@ Provides subcommands for: - hermes chat - Interactive chat (same as ./hermes) - hermes gateway - Run gateway in foreground - hermes gateway start - Start gateway service -- hermes gateway stop - Stop gateway service +- hermes gateway stop - Stop gateway service - hermes setup - Interactive setup wizard - hermes status - Show status of all components - hermes cron - Manage cron jobs """ +import os +import sys + __version__ = "0.12.0" __release_date__ = "2026.4.30" + + +def _ensure_utf8(): + """Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError. + + Windows services and terminals default to cp1252, which cannot encode + box-drawing characters used in CLI output. This causes unhandled + UnicodeEncodeError crashes on gateway startup. + """ + if sys.platform != "win32": + return + os.environ.setdefault("PYTHONUTF8", "1") + os.environ.setdefault("PYTHONIOENCODING", "utf-8") + for stream_name in ("stdout", "stderr"): + stream = getattr(sys, stream_name, None) + if stream is None: + continue + try: + if getattr(stream, "encoding", "").lower().replace("-", "") != "utf8": + new_stream = open( + stream.fileno(), "w", encoding="utf-8", + buffering=1, closefd=False, + ) + setattr(sys, stream_name, new_stream) + except (AttributeError, OSError): + pass + + +_ensure_utf8() From 6713274a4297ab1cf601d93655b458ab3e66d083 Mon Sep 17 00:00:00 2001 From: LeonSGP43 Date: Sun, 3 May 2026 22:59:04 +0800 Subject: [PATCH 020/171] fix(file): strip leaked terminal fences from reads --- tests/tools/test_file_operations.py | 52 +++++++++++++++++++++++++++++ tools/file_operations.py | 44 ++++++++++++++++++++---- 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py index dfd54ba634c..500cd6141aa 100644 --- a/tests/tools/test_file_operations.py +++ b/tests/tools/test_file_operations.py @@ -271,6 +271,58 @@ class TestShellFileOpsHelpers: ops = ShellFileOperations(env) assert ops.cwd == "/" + def test_read_file_strips_leaked_terminal_fence_markers(self, mock_env): + leaked = ( + "'\x07__HERMES_FENCE_a9f7b3__\x1b]0;cat " + "'/tmp/test/a.py' 2> /dev/null\x07\n" + "print('ok')\n" + "__HERMES_FENCE_a9f7b3__\x07'\n" + ) + + def side_effect(command, **kwargs): + if command.startswith("wc -c"): + return {"output": "12\n", "returncode": 0} + if command.startswith("head -c"): + return {"output": "print('ok')\n", "returncode": 0} + if command.startswith("sed -n"): + return {"output": leaked, "returncode": 0} + if command.startswith("wc -l"): + return {"output": "1\n", "returncode": 0} + return {"output": "", "returncode": 0} + + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.read_file("/tmp/test/a.py") + + assert result.error is None + assert "HERMES_FENCE" not in result.content + assert "\x1b]" not in result.content + assert "\x07" not in result.content + assert " 1|print('ok')" in result.content + + def test_read_file_raw_strips_leaked_terminal_fence_markers(self, mock_env): + leaked = ( + "__HERMES_FENCE_a9f7b3__\x07'\n" + "alpha\n" + "\x1b]0;cat '/tmp/test/a.txt'\x07__HERMES_FENCE_a9f7b3__\n" + ) + + def side_effect(command, **kwargs): + if command.startswith("wc -c"): + return {"output": "6\n", "returncode": 0} + if command.startswith("head -c"): + return {"output": "alpha\n", "returncode": 0} + if command.startswith("cat "): + return {"output": leaked, "returncode": 0} + return {"output": "", "returncode": 0} + + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.read_file_raw("/tmp/test/a.txt") + + assert result.error is None + assert result.content == "alpha\n" + class TestSearchPathValidation: """Test that search() returns an error for non-existent paths.""" diff --git a/tools/file_operations.py b/tools/file_operations.py index aa7a4825093..73e739e730a 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -53,6 +53,27 @@ WRITE_DENIED_PATHS = build_write_denied_paths(_HOME) WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME) +_OSC_SEQUENCE_RE = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)") +_FENCE_MARKER_RE = re.compile(r"'?\x07?__HERMES_FENCE_[A-Za-z0-9]+__\x07?'?") + + +def _strip_terminal_fence_leaks(text: str) -> str: + """Strip leaked terminal fence wrappers from file read output.""" + if not text: + return text + + cleaned_lines: List[str] = [] + for line in text.splitlines(keepends=True): + had_terminal_wrapper = "__HERMES_FENCE_" in line or "\x1b]" in line + cleaned = _OSC_SEQUENCE_RE.sub("", line) + cleaned = _FENCE_MARKER_RE.sub("", cleaned) + cleaned = cleaned.replace("\x07", "") + if had_terminal_wrapper and cleaned.strip("'\r\n\t ") == "": + continue + cleaned_lines.append(cleaned) + return "".join(cleaned_lines) + + def _get_safe_write_root() -> Optional[str]: """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset. @@ -511,8 +532,9 @@ class ShellFileOperations(FileOperations): # File not found - try to suggest similar files return self._suggest_similar_files(path) + stat_output = _strip_terminal_fence_leaks(stat_result.stdout) try: - file_size = int(stat_result.stdout.strip()) + file_size = int(stat_output.strip()) except ValueError: file_size = 0 @@ -536,8 +558,9 @@ class ShellFileOperations(FileOperations): # Read a sample to check for binary content sample_cmd = f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null" sample_result = self._exec(sample_cmd) + sample_output = _strip_terminal_fence_leaks(sample_result.stdout) - if self._is_likely_binary(path, sample_result.stdout): + if self._is_likely_binary(path, sample_output): return ReadResult( is_binary=True, file_size=file_size, @@ -551,12 +574,14 @@ class ShellFileOperations(FileOperations): if read_result.exit_code != 0: return ReadResult(error=f"Failed to read file: {read_result.stdout}") + read_output = _strip_terminal_fence_leaks(read_result.stdout) # Get total line count wc_cmd = f"wc -l < {self._escape_shell_arg(path)}" wc_result = self._exec(wc_cmd) + wc_output = _strip_terminal_fence_leaks(wc_result.stdout) try: - total_lines = int(wc_result.stdout.strip()) + total_lines = int(wc_output.strip()) except ValueError: total_lines = 0 @@ -567,7 +592,7 @@ class ShellFileOperations(FileOperations): hint = f"Use offset={end_line + 1} to continue reading (showing {offset}-{end_line} of {total_lines} lines)" return ReadResult( - content=self._add_line_numbers(read_result.stdout, offset), + content=self._add_line_numbers(read_output, offset), total_lines=total_lines, file_size=file_size, truncated=truncated, @@ -637,14 +662,16 @@ class ShellFileOperations(FileOperations): stat_result = self._exec(stat_cmd) if stat_result.exit_code != 0: return self._suggest_similar_files(path) + stat_output = _strip_terminal_fence_leaks(stat_result.stdout) try: - file_size = int(stat_result.stdout.strip()) + file_size = int(stat_output.strip()) except ValueError: file_size = 0 if self._is_image(path): return ReadResult(is_image=True, is_binary=True, file_size=file_size) sample_result = self._exec(f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null") - if self._is_likely_binary(path, sample_result.stdout): + sample_output = _strip_terminal_fence_leaks(sample_result.stdout) + if self._is_likely_binary(path, sample_output): return ReadResult( is_binary=True, file_size=file_size, error="Binary file — cannot display as text." @@ -652,7 +679,10 @@ class ShellFileOperations(FileOperations): cat_result = self._exec(f"cat {self._escape_shell_arg(path)}") if cat_result.exit_code != 0: return ReadResult(error=f"Failed to read file: {cat_result.stdout}") - return ReadResult(content=cat_result.stdout, file_size=file_size) + return ReadResult( + content=_strip_terminal_fence_leaks(cat_result.stdout), + file_size=file_size, + ) def delete_file(self, path: str) -> WriteResult: """Delete a file via rm.""" From b58db237e4e1943daeab34fcc38a25e034bafb30 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 3 May 2026 16:59:00 -0700 Subject: [PATCH 021/171] fix(kanban): drop worker identity claim from KANBAN_GUIDANCE (#19427) KANBAN_GUIDANCE layer 3 of the system prompt started with 'You are a Kanban worker', overriding the profile's SOUL.md identity at layer 1. Profiles with strict role boundaries (e.g. a reviewer profile that never writes code) still executed implementation tasks because the kanban identity claim diluted SOUL's. Drop the identity line. Layer 3 now describes the task-execution protocol only; SOUL.md remains the sole identity slot. Fixes #19351 --- agent/prompt_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index a9556e20468..8494a70eef2 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -183,8 +183,8 @@ SKILLS_GUIDANCE = ( ) KANBAN_GUIDANCE = ( - "# You are a Kanban worker\n" - "You were spawned by the Hermes Kanban dispatcher to execute ONE task from " + "# Kanban task execution protocol\n" + "You have been assigned ONE task from " "the shared board at `~/.hermes/kanban.db`. Your task id is in " "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. " "The `kanban_*` tools in your schema are your primary coordination surface — " From 1bd975c0ba87c644d560ca7bd62cc47274a8a919 Mon Sep 17 00:00:00 2001 From: clawbot Date: Sun, 3 May 2026 09:24:08 -0600 Subject: [PATCH 022/171] fix(gateway): suppress duplicate voice transcripts Deduplicate exact and near-exact Discord voice STT transcripts per guild/user over a short window to avoid duplicate delayed agent replies. Adds regression tests for exact and near-duplicate voice transcript suppression. --- gateway/run.py | 54 +++++++++++++++++++++++++++++ tests/gateway/test_voice_command.py | 40 +++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d604947e996..1ba1984bacd 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1161,6 +1161,10 @@ class GatewayRunner: # Per-chat voice reply mode: "off" | "voice_only" | "all" self._voice_mode: Dict[str, str] = self._load_voice_modes() + # Recent voice transcripts per (guild,user) for duplicate suppression. + # Protects against the same utterance being emitted twice by the voice + # capture / STT pipeline, which otherwise produces a second delayed reply. + self._recent_voice_transcripts: Dict[tuple[int, int], List[tuple[float, str]]] = {} # Track background tasks to prevent garbage collection mid-execution self._background_tasks: set = set() @@ -8261,6 +8265,47 @@ class GatewayRunner: adapter = self.adapters.get(Platform.DISCORD) self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) + def _is_duplicate_voice_transcript(self, guild_id: int, user_id: int, transcript: str) -> bool: + """Suppress repeated STT outputs for the same recent utterance. + + Voice capture can occasionally emit the same utterance twice a few + seconds apart, which creates a second queued agent run and overlapping + spoken replies. Dedup exact and near-exact repeats per guild/user over a + short window while allowing genuinely new turns through. + """ + from difflib import SequenceMatcher + + normalized = re.sub(r"\s+", " ", transcript).strip().lower() + normalized = re.sub(r"[^\w\s]", "", normalized) + if not normalized: + return False + + now = time.monotonic() + window_seconds = 12.0 + key = (guild_id, user_id) + recent_store = getattr(self, "_recent_voice_transcripts", None) + if not isinstance(recent_store, dict): + recent_store = {} + self._recent_voice_transcripts = recent_store + recent = [ + (ts, txt) + for ts, txt in recent_store.get(key, []) + if now - ts <= window_seconds + ] + + for _, prior in recent: + if prior == normalized: + recent_store[key] = recent + return True + if len(prior) >= 16 and len(normalized) >= 16: + if SequenceMatcher(None, prior, normalized).ratio() >= 0.95: + recent_store[key] = recent + return True + + recent.append((now, normalized)) + recent_store[key] = recent[-5:] + return False + async def _handle_voice_channel_input( self, guild_id: int, user_id: int, transcript: str ): @@ -8298,6 +8343,15 @@ class GatewayRunner: logger.debug("Unauthorized voice input from user %d, ignoring", user_id) return + if self._is_duplicate_voice_transcript(guild_id, user_id, transcript): + logger.info( + "Suppressing duplicate voice transcript for guild=%s user=%s: %s", + guild_id, + user_id, + transcript[:100], + ) + return + # Show transcript in text channel (after auth, with mention sanitization) try: channel = adapter._client.get_channel(text_ch_id) diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index 2e9c54608a0..947d4904aa8 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -954,6 +954,46 @@ class TestVoiceChannelCommands: assert "Test transcript" in msg assert "42" in msg # user_id in mention + @pytest.mark.asyncio + async def test_input_suppresses_duplicate_transcript(self, runner): + """Near-immediate duplicate STT output should not dispatch twice.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + + @pytest.mark.asyncio + async def test_input_suppresses_near_duplicate_transcript(self, runner): + """Small STT wording drift should still be treated as the same utterance.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "This is a test of the voice system") + await runner._handle_voice_channel_input(111, 42, "This is a test for the voice system") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + # -- _get_guild_id -- def test_get_guild_id_from_guild(self, runner): From 6fda92aa7f044ce684f6ac11e3f8871a1a70decc Mon Sep 17 00:00:00 2001 From: konsisumer Date: Sun, 3 May 2026 19:58:04 +0200 Subject: [PATCH 023/171] fix(gateway): bridge top-level require_mention to Telegram config Users commonly place `require_mention: true` at the top level of config.yaml alongside `group_sessions_per_user`, expecting it to gate Telegram group messages. The key was silently ignored because the config loader only checked `yaml_cfg["telegram"]["require_mention"]`. When `require_mention` is found at the top level and no telegram-specific value is set, the fix now: - adds it to platforms_data["telegram"]["extra"] so _telegram_require_mention() picks it up via the primary config.extra path - sets TELEGRAM_REQUIRE_MENTION env var for the secondary fallback path A telegram-specific value (telegram.require_mention) still takes precedence over the top-level shorthand. Also corrects telegram.md: bare /cmd without @botname is rejected when require_mention is enabled; only /cmd@botname (bot-menu form) passes. Fixes #3979 --- gateway/config.py | 18 ++++++- tests/gateway/test_telegram_group_gating.py | 51 +++++++++++++++++++ website/docs/user-guide/messaging/telegram.md | 2 +- 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/gateway/config.py b/gateway/config.py index 6527accec46..fa64b9046d4 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -846,11 +846,25 @@ def load_gateway_config() -> GatewayConfig: if yaml_key in allow_mentions_cfg and not os.getenv(env_key): os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower() + # Bridge top-level require_mention to Telegram when the telegram: section + # does not already provide one. Users often write "require_mention: true" + # at the top level alongside group_sessions_per_user, expecting it to work + # the same way (#3979). + _tl_require_mention = yaml_cfg.get("require_mention") + if _tl_require_mention is not None: + _tg_section = yaml_cfg.get("telegram") or {} + if "require_mention" not in _tg_section: + _tg_plat = platforms_data.setdefault(Platform.TELEGRAM.value, {}) + _tg_extra = _tg_plat.setdefault("extra", {}) + _tg_extra.setdefault("require_mention", _tl_require_mention) + # Telegram settings → env vars (env vars take precedence) telegram_cfg = yaml_cfg.get("telegram", {}) if isinstance(telegram_cfg, dict): - if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"): - os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower() + # Prefer telegram.require_mention; fall back to the top-level shorthand. + _effective_rm = telegram_cfg.get("require_mention", yaml_cfg.get("require_mention")) + if _effective_rm is not None and not os.getenv("TELEGRAM_REQUIRE_MENTION"): + os.environ["TELEGRAM_REQUIRE_MENTION"] = str(_effective_rm).lower() if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"): os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"]) frc = telegram_cfg.get("free_response_chats") diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py index a560d6cdd6e..52e4a5e6d3d 100644 --- a/tests/gateway/test_telegram_group_gating.py +++ b/tests/gateway/test_telegram_group_gating.py @@ -261,6 +261,57 @@ def test_group_allow_from_is_enforced_by_gateway_authorization_not_trigger_gate( assert adapter._should_process_message(_group_message("hello", from_user_id=333)) is True +def test_top_level_require_mention_bridges_to_telegram(monkeypatch, tmp_path): + """require_mention at the config.yaml top level (alongside group_sessions_per_user) + must behave identically to telegram.require_mention: true (#3979). + """ + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + # Intentionally no "telegram:" section — keys are at the top level. + (hermes_home / "config.yaml").write_text( + "require_mention: true\n" + "group_sessions_per_user: true\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("TELEGRAM_REQUIRE_MENTION", raising=False) + + config = load_gateway_config() + + assert config is not None + assert __import__("os").environ.get("TELEGRAM_REQUIRE_MENTION") == "true" + + # The adapter's extra dict must also carry the setting so that + # _telegram_require_mention() works even without the env var. + tg_cfg = config.platforms.get(__import__("gateway.config", fromlist=["Platform"]).Platform.TELEGRAM) + if tg_cfg is not None: + assert tg_cfg.extra.get("require_mention") is True + + +def test_top_level_require_mention_does_not_override_telegram_section(monkeypatch, tmp_path): + """When telegram.require_mention is explicitly set, top-level require_mention + must not override it (platform-specific config takes precedence). + """ + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text( + "require_mention: true\n" + "telegram:\n" + " require_mention: false\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("TELEGRAM_REQUIRE_MENTION", raising=False) + + config = load_gateway_config() + + assert config is not None + # The telegram-specific "false" must win over the top-level "true". + assert __import__("os").environ.get("TELEGRAM_REQUIRE_MENTION") == "false" + + def test_config_bridges_telegram_ignored_threads(monkeypatch, tmp_path): hermes_home = tmp_path / ".hermes" hermes_home.mkdir() diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index 5873303a04f..dd933aa2fdc 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -293,9 +293,9 @@ Hermes Agent works in Telegram group chats with a few considerations: - `TELEGRAM_ALLOWED_USERS` still applies — only authorized users can trigger the bot, even in groups - You can keep the bot from responding to ordinary group chatter with `telegram.require_mention: true` - With `telegram.require_mention: true`, group messages are accepted when they are: - - slash commands - replies to one of the bot's messages - `@botusername` mentions + - `/command@botusername` (Telegram's bot-menu command form that includes the bot name) - matches for one of your configured regex wake words in `telegram.mention_patterns` - Use `telegram.ignored_threads` to keep Hermes silent in specific Telegram forum topics, even when the group would otherwise allow free responses or mention-triggered replies - If `telegram.require_mention` is left unset or false, Hermes keeps the previous open-group behavior and responds to normal group messages it can see From 222767e5e81696fc2b184d4a806a07e05ce969d7 Mon Sep 17 00:00:00 2001 From: Kenny Wang Date: Sun, 3 May 2026 15:45:56 -0600 Subject: [PATCH 024/171] fix: sanitize Telegram help command mentions --- gateway/run.py | 33 ++++++++- tests/gateway/test_gateway_command_help.py | 78 ++++++++++++++++++++++ 2 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 tests/gateway/test_gateway_command_help.py diff --git a/gateway/run.py b/gateway/run.py index 1ba1984bacd..bbee14b4bbd 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -49,6 +49,29 @@ from hermes_cli.config import cfg_get _AGENT_CACHE_MAX_SIZE = 128 _AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h _PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT = 30.0 +_TELEGRAM_COMMAND_MENTION_RE = re.compile(r"(? str: + """Rewrite slash-command mentions to Telegram-valid command names. + + Telegram Bot API command names allow only lowercase letters, digits, and + underscores. Keep other platform renderings unchanged, but normalize + Telegram help text so command mentions remain clickable/valid there. + """ + platform_value = getattr(platform, "value", platform) + if platform_value != "telegram": + return text + + from hermes_cli.commands import _sanitize_telegram_name + + def _replace(match: re.Match[str]) -> str: + sanitized = _sanitize_telegram_name(match.group(1)) + return f"/{sanitized}" if sanitized else match.group(0) + + return _TELEGRAM_COMMAND_MENTION_RE.sub(_replace, text) + + # Only auto-continue interrupted gateway turns while the interruption is fresh. # Stale tool-tail/resume markers can otherwise revive an unrelated old task # after a gateway restart when the user's next message starts new work. @@ -7302,7 +7325,10 @@ class GatewayRunner: lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.") except Exception: pass - return "\n".join(lines) + return _telegramize_command_mentions( + "\n".join(lines), + getattr(getattr(event, "source", None), "platform", None), + ) async def _handle_commands_command(self, event: MessageEvent) -> str: """Handle /commands [page] - paginated list of all commands and skills.""" @@ -7355,7 +7381,10 @@ class GatewayRunner: lines.extend(["", " | ".join(nav_parts)]) if page != requested_page: lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_") - return "\n".join(lines) + return _telegramize_command_mentions( + "\n".join(lines), + getattr(getattr(event, "source", None), "platform", None), + ) async def _handle_model_command(self, event: MessageEvent) -> Optional[str]: """Handle /model command — switch model for this session. diff --git a/tests/gateway/test_gateway_command_help.py b/tests/gateway/test_gateway_command_help.py new file mode 100644 index 00000000000..61d5d73de0d --- /dev/null +++ b/tests/gateway/test_gateway_command_help.py @@ -0,0 +1,78 @@ +"""Gateway command help rendering tests.""" + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _make_event(text: str, platform: Platform) -> MessageEvent: + return MessageEvent( + text=text, + source=SessionSource( + platform=platform, + chat_id="chat-1", + user_id="user-1", + user_name="tester", + chat_type="dm", + ), + ) + + +def _make_runner(): + from gateway.run import GatewayRunner + + return object.__new__(GatewayRunner) + + +@pytest.mark.asyncio +async def test_help_sanitizes_slash_command_mentions_for_telegram(monkeypatch): + """Telegram help output must not expose invalid uppercase/hyphenated slashes.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: { + "/Linear": {"description": "Open Linear"}, + "/Custom-Thing": {"description": "Run a custom thing"}, + }, + ) + + result = await _make_runner()._handle_help_command( + _make_event("/help", Platform.TELEGRAM) + ) + + assert "`/linear`" in result + assert "`/custom_thing`" in result + assert "`/Linear`" not in result + assert "`/Custom-Thing`" not in result + + +@pytest.mark.asyncio +async def test_commands_sanitizes_slash_command_mentions_for_telegram(monkeypatch): + """Paginated Telegram /commands output uses Telegram-valid slash mentions.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: {"/Linear": {"description": "Open Linear"}}, + ) + + result = await _make_runner()._handle_commands_command( + _make_event("/commands 999", Platform.TELEGRAM) + ) + + assert "`/linear`" in result + assert "`/Linear`" not in result + + +@pytest.mark.asyncio +async def test_help_keeps_non_telegram_slash_command_mentions_unchanged(monkeypatch): + """Only Telegram needs slash mentions rewritten to Telegram command names.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: {"/Linear": {"description": "Open Linear"}}, + ) + + result = await _make_runner()._handle_help_command( + _make_event("/help", Platform.DISCORD) + ) + + assert "`/Linear`" in result From 74636f9c4aa3e9dbeaff64dc3b540aef9482c375 Mon Sep 17 00:00:00 2001 From: molvikar Date: Mon, 4 May 2026 00:55:12 +0300 Subject: [PATCH 025/171] fix(gateway): clear queued reload-skills notes on new/resume/branch --- gateway/run.py | 6 +++++ .../test_session_boundary_security_state.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index bbee14b4bbd..78716862568 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -11394,6 +11394,12 @@ class GatewayRunner: if not session_key: return + pending_skills_reload_notes = getattr( + self, "_pending_skills_reload_notes", None + ) + if isinstance(pending_skills_reload_notes, dict): + pending_skills_reload_notes.pop(session_key, None) + pending_approvals = getattr(self, "_pending_approvals", None) if isinstance(pending_approvals, dict): pending_approvals.pop(session_key, None) diff --git a/tests/gateway/test_session_boundary_security_state.py b/tests/gateway/test_session_boundary_security_state.py index 47cf4752755..57b58550700 100644 --- a/tests/gateway/test_session_boundary_security_state.py +++ b/tests/gateway/test_session_boundary_security_state.py @@ -124,6 +124,10 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state(): runner, session_key = _make_resume_runner() other_key = "agent:main:telegram:dm:other-chat" + runner._pending_skills_reload_notes = { + session_key: "[USER INITIATED SKILLS RELOAD: target]", + other_key: "[USER INITIATED SKILLS RELOAD: other]", + } approve_session(session_key, "recursive delete") approve_session(other_key, "recursive delete") enable_session_yolo(session_key) @@ -140,10 +144,12 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes @pytest.mark.asyncio @@ -151,6 +157,10 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state(): runner, session_key = _make_branch_runner() other_key = "agent:main:telegram:dm:other-chat" + runner._pending_skills_reload_notes = { + session_key: "[USER INITIATED SKILLS RELOAD: target]", + other_key: "[USER INITIATED SKILLS RELOAD: other]", + } approve_session(session_key, "recursive delete") approve_session(other_key, "recursive delete") enable_session_yolo(session_key) @@ -167,10 +177,12 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes @pytest.mark.asyncio @@ -216,6 +228,7 @@ def test_clear_session_boundary_security_state_is_scoped(): runner = object.__new__(GatewayRunner) runner._pending_approvals = {} runner._update_prompt_pending = {} + runner._pending_skills_reload_notes = {} source = _make_source() session_key = build_session_key(source) @@ -229,6 +242,12 @@ def test_clear_session_boundary_security_state_is_scoped(): runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"} runner._update_prompt_pending[session_key] = True runner._update_prompt_pending[other_key] = True + runner._pending_skills_reload_notes[session_key] = ( + "[USER INITIATED SKILLS RELOAD: target]" + ) + runner._pending_skills_reload_notes[other_key] = ( + "[USER INITIATED SKILLS RELOAD: other]" + ) runner._clear_session_boundary_security_state(session_key) @@ -237,16 +256,19 @@ def test_clear_session_boundary_security_state_is_scoped(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes # Other session untouched assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes # Empty session_key is a no-op runner._clear_session_boundary_security_state("") assert is_approved(other_key, "recursive delete") is True assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes def test_clear_session_boundary_security_state_wakes_blocked_approvals(): From 808fee151d42b77a763ea4a8ec711d7b501cece6 Mon Sep 17 00:00:00 2001 From: nftpoetrist Date: Mon, 4 May 2026 00:12:58 +0300 Subject: [PATCH 026/171] fix(auxiliary): propagate explicit_api_key to _try_anthropic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _try_anthropic() lacked the explicit_api_key parameter added to _try_openrouter() in #18768. When resolve_provider_client() is called with provider="anthropic" and an explicit key (e.g. from a fallback_model entry with api_key set), the key was silently ignored — _try_anthropic() always fell back to resolve_anthropic_token(), so the fallback returned None,None for users without a default Anthropic credential configured. Fix: add explicit_api_key: str = None to _try_anthropic() and use explicit_api_key or in both the pool-present and no-pool paths. Pass explicit_api_key=explicit_api_key at the call site in resolve_provider_client(). Symmetric with the _try_openrouter() fix. No behavior change when explicit_api_key is None. --- agent/auxiliary_client.py | 8 ++--- tests/agent/test_auxiliary_client.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index b86f78f8ec8..0c688d23dca 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1529,7 +1529,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: return CodexAuxiliaryClient(real_client, model), model -def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: +def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]: try: from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token except ImportError: @@ -1539,10 +1539,10 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: if pool_present: if entry is None: return None, None - token = _pool_runtime_api_key(entry) + token = explicit_api_key or _pool_runtime_api_key(entry) else: entry = None - token = resolve_anthropic_token() + token = explicit_api_key or resolve_anthropic_token() if not token: return None, None @@ -2336,7 +2336,7 @@ def resolve_provider_client( if pconfig.auth_type == "api_key": if provider == "anthropic": - client, default_model = _try_anthropic() + client, default_model = _try_anthropic(explicit_api_key=explicit_api_key) if client is None: logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found") return None, None diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c57a0b63720..43125554dfa 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1893,3 +1893,53 @@ class TestOpenRouterExplicitApiKey: assert call_kwargs["api_key"] == "env-fallback-key", ( f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}" ) + + +class TestAnthropicExplicitApiKey: + """Test that explicit_api_key is correctly propagated to _try_anthropic(). + + Parity with the OpenRouter fix in #18768: resolve_provider_client() passes + explicit_api_key to _try_openrouter(), but the anthropic branch was not + updated — _try_anthropic() always fell back to resolve_anthropic_token() + even when an explicit key was supplied (e.g. from a fallback_model entry). + """ + + def test_try_anthropic_uses_explicit_api_key_over_env(self): + """_try_anthropic(explicit_api_key) must use the supplied key, not the env fallback.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + from agent.auxiliary_client import _try_anthropic + client, model = _try_anthropic("explicit-pool-key") + assert client is not None + assert mock_build.call_args.args[0] == "explicit-pool-key", ( + f"Expected explicit_api_key to be passed, got: {mock_build.call_args.args[0]}" + ) + assert mock_build.call_args.args[0] != "env-fallback-key" + + def test_try_anthropic_without_explicit_key_falls_back_to_resolve(self): + """Without explicit_api_key, _try_anthropic falls back to resolve_anthropic_token.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + from agent.auxiliary_client import _try_anthropic + client, model = _try_anthropic() + assert client is not None + assert mock_build.call_args.args[0] == "env-fallback-key" + + def test_resolve_provider_client_passes_explicit_api_key_to_anthropic(self): + """resolve_provider_client(provider='anthropic', explicit_api_key=...) must propagate the key.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + client, model = resolve_provider_client( + provider="anthropic", + explicit_api_key="explicit-fallback-key", + ) + assert client is not None + assert mock_build.call_args.args[0] == "explicit-fallback-key", ( + "resolve_provider_client must forward explicit_api_key to _try_anthropic()" + ) From 363cc936746c3f2964427b635f80f57df528da54 Mon Sep 17 00:00:00 2001 From: Chris Danis Date: Sat, 2 May 2026 07:51:22 -0400 Subject: [PATCH 027/171] fix(cron): bump skill usage when cron jobs load skills Cron jobs that reference skills via their skills: config never bumped the usage counters in .usage.json, so the curator could auto-archive skills actively used by cron jobs based on stale timestamps. Now _build_job_prompt() calls bump_use(skill_name) for each successfully loaded skill so the curator sees them as active. --- cron/scheduler.py | 7 ++++++ tests/cron/test_scheduler.py | 48 ++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/cron/scheduler.py b/cron/scheduler.py index 2cb1547ad32..f8aaf6e3ca2 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -782,6 +782,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: return prompt from tools.skills_tool import skill_view + from tools.skill_usage import bump_use parts = [] skipped: list[str] = [] @@ -793,6 +794,12 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: skipped.append(skill_name) continue + # Bump usage so the curator sees this skill as actively used. + try: + bump_use(skill_name) + except Exception: + logger.debug("Cron job: failed to bump skill usage for '%s'", skill_name, exc_info=True) + content = str(loaded.get("content") or "").strip() if parts: parts.append("") diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index b12bb578a3f..66df251a454 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1857,6 +1857,54 @@ class TestBuildJobPromptMissingSkill: assert "go" in result +class TestBuildJobPromptBumpUse: + """Verify that cron jobs bump skill usage counters so the curator sees them as active.""" + + def test_bump_use_called_for_loaded_skill(self): + """bump_use is called for each successfully loaded skill.""" + + def _skill_view(name: str) -> str: + return json.dumps({"success": True, "content": f"Content for {name}."}) + + with patch("tools.skills_tool.skill_view", side_effect=_skill_view), \ + patch("tools.skill_usage.bump_use") as mock_bump: + _build_job_prompt({"skills": ["alpha", "beta"], "prompt": "go"}) + + assert mock_bump.call_count == 2 + calls = [c[0][0] for c in mock_bump.call_args_list] + assert "alpha" in calls + assert "beta" in calls + + def test_bump_use_not_called_for_missing_skill(self): + """bump_use is NOT called when a skill fails to load.""" + + def _missing_view(name: str) -> str: + return json.dumps({"success": False, "error": "not found"}) + + with patch("tools.skills_tool.skill_view", side_effect=_missing_view), \ + patch("tools.skill_usage.bump_use") as mock_bump: + _build_job_prompt({"skills": ["ghost"], "prompt": "go"}) + + assert mock_bump.call_count == 0 + + def test_bump_failure_does_not_break_prompt(self, caplog): + """If bump_use raises, the prompt still builds — error is logged at DEBUG.""" + + def _skill_view(name: str) -> str: + return json.dumps({"success": True, "content": "Works."}) + + with patch("tools.skills_tool.skill_view", side_effect=_skill_view), \ + patch("tools.skill_usage.bump_use", side_effect=RuntimeError("boom")), \ + caplog.at_level(logging.DEBUG, logger="cron.scheduler"): + result = _build_job_prompt({"skills": ["good-skill"], "prompt": "go"}) + + # Prompt should still contain the skill content and original instruction + assert "Works." in result + assert "go" in result + # The error should be logged at DEBUG level, not crash + assert any("failed to bump" in r.message for r in caplog.records) + + class TestSendMediaViaAdapter: """Unit tests for _send_media_via_adapter — routes files to typed adapter methods.""" From 2f2998bb1b0d6a1b1c2c14b66b72982595b91506 Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 4 May 2026 14:13:38 +1000 Subject: [PATCH 028/171] =?UTF-8?q?fix(tui):=20tolerate=20npm's=20peer-fla?= =?UTF-8?q?g=20drop=20in=20lockfile=20comparison=20`=5Ftui=5Fneed=5Fnpm=5F?= =?UTF-8?q?install()`=20compares=20the=20canonical=20`package-lock.json`?= =?UTF-8?q?=20against=20the=20hidden=20`node=5Fmodules/.package-lock.json`?= =?UTF-8?q?=20to=20decide=20whether=20`npm=20install`=20needs=20to=20re-ru?= =?UTF-8?q?n.=20npm=209=20drops=20the=20`"peer":=20true`=20field=20from=20?= =?UTF-8?q?the=20hidden=20lock=20on=20dev-deps=20that=20are=20*also*=20dec?= =?UTF-8?q?lared=20as=20peers=20(the=20canonical=20lock=20preserves=20the?= =?UTF-8?q?=20dual=20annotation).=20That=20made=20the=20check=20flag=2016?= =?UTF-8?q?=20packages=20(`@babel/core`,=20`@types/node`,=20`@types/react`?= =?UTF-8?q?,=20`@typescript-eslint/*`,=20`react`,=20`vite`,=20`tsx`,=20`ty?= =?UTF-8?q?pescript`,=20=E2=80=A6)=20as=20mismatched=20on=20every=20launch?= =?UTF-8?q?,=20triggering=20a=20runtime=20`npm=20install`.=20Inside=20the?= =?UTF-8?q?=20Docker=20image,=20that=20runtime=20install=20then=20fails=20?= =?UTF-8?q?with=20EACCES=20because=20`/opt/hermes/ui-tui/node=5Fmodules/`?= =?UTF-8?q?=20is=20root-owned=20from=20build=20time,=20so=20`docker=20run?= =?UTF-8?q?=20=E2=80=A6=20hermes-agent=20--tui`=20prints:=20=20=20=20=20In?= =?UTF-8?q?stalling=20TUI=20dependencies=E2=80=A6=20=20=20=20=20npm=20inst?= =?UTF-8?q?all=20failed.=20=E2=80=A6and=20exits=201,=20with=20no=20preview?= =?UTF-8?q?.=20The=20empty=20preview=20is=20a=20second=20bug:=20the=20laun?= =?UTF-8?q?cher=20captured=20only=20stderr,=20but=20npm=209=20writes=20EAC?= =?UTF-8?q?CES=20to=20stdout,=20which=20was=20DEVNULL'd.=20Fixes:=20=20-?= =?UTF-8?q?=20Add=20`"peer"`=20to=20`=5FNPM=5FLOCK=5FRUNTIME=5FKEYS`=20so?= =?UTF-8?q?=20the=20comparison=20ignores=20the=20=20=20=20non-deterministi?= =?UTF-8?q?c=20field,=20alongside=20the=20existing=20`"ideallyInert"`.=20?= =?UTF-8?q?=20-=20Capture=20stdout=20as=20well=20as=20stderr=20in=20the=20?= =?UTF-8?q?install=20subprocess=20so=20future=20=20=20=20failures=20surfac?= =?UTF-8?q?e=20a=20useful=20preview=20instead=20of=20a=20bare=20"failed."?= =?UTF-8?q?=20line.=20Regression=20tests:=20=20-=20`test=5Fno=5Finstall=5F?= =?UTF-8?q?when=5Fonly=5Fpeer=5Fannotation=5Fdiffers`=20=E2=80=94=20the=20?= =?UTF-8?q?exact=20scenario=20=20-=20`test=5Finstall=5Fwhen=5Fversion=5Fdi?= =?UTF-8?q?ffers=5Feven=5Fwith=5Fpeer=5Fdrop`=20=E2=80=94=20guards=20again?= =?UTF-8?q?st=20=20=20=20the=20peer-drop=20tolerance=20masking=20a=20real?= =?UTF-8?q?=20version=20skew=20On-host=20impact:=20the=20same=20false-posi?= =?UTF-8?q?tive=20was=20firing=20on=20every=20`hermes=20--tui`=20invocatio?= =?UTF-8?q?n=20from=20a=20normal=20checkout,=20silently=20running=20a=20no?= =?UTF-8?q?-op=20`npm=20install`=20each=20time=20(it=20converged=20because?= =?UTF-8?q?=20the=20host's=20`node=5Fmodules/`=20is=20writable).=20Startup?= =?UTF-8?q?=20time=20on=20the=20TUI=20should=20drop=20noticeably.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- hermes_cli/main.py | 22 +++++++++++++--- tests/hermes_cli/test_tui_npm_install.py | 33 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index d80e31f6901..4fe5ff3508b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -837,7 +837,17 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti ) -_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"}) +_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert", "peer"}) +"""Lockfile fields npm writes non-deterministically at install time. + +``ideallyInert`` is npm's runtime annotation for packages it skipped installing +(per-platform opt-outs). ``peer`` is dropped from the hidden ``.package-lock.json`` +on dev-dependencies that are *also* declared as peers — the canonical +``package-lock.json`` records the dual role, but npm 9's actualized tree strips +it. Neither key represents a real skew between what was declared and what was +installed, so we exclude them from the comparison in :func:`_tui_need_npm_install` +to avoid false-positive reinstalls on every launch. +""" def _tui_need_npm_install(root: Path) -> bool: @@ -1042,17 +1052,21 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: if _tui_need_npm_install(tui_dir): if not os.environ.get("HERMES_QUIET"): print("Installing TUI dependencies…") + # Capture stdout as well as stderr — some npm errors (notably EACCES on a + # root-owned node_modules in containers) are emitted on stdout, and a + # bare "npm install failed." with no preview defeats debugging. We keep + # the failure-only print path so a successful install stays silent. result = subprocess.run( [npm, "install", "--silent", "--no-fund", "--no-audit", "--progress=false"], cwd=str(tui_dir), - stdout=subprocess.DEVNULL, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env={**os.environ, "CI": "1"}, ) if result.returncode != 0: - err = (result.stderr or "").strip() - preview = "\n".join(err.splitlines()[-30:]) + combined = f"{result.stdout or ''}\n{result.stderr or ''}".strip() + preview = "\n".join(combined.splitlines()[-30:]) print("npm install failed.") if preview: print(preview) diff --git a/tests/hermes_cli/test_tui_npm_install.py b/tests/hermes_cli/test_tui_npm_install.py index e56196e07ed..1dec6257165 100644 --- a/tests/hermes_cli/test_tui_npm_install.py +++ b/tests/hermes_cli/test_tui_npm_install.py @@ -69,6 +69,39 @@ def test_no_install_when_only_optional_peer_package_missing_from_hidden_lock(tmp assert main_mod._tui_need_npm_install(tmp_path) is False +def test_no_install_when_only_peer_annotation_differs(tmp_path: Path, main_mod) -> None: + """npm 9 drops the ``peer`` flag from the hidden lock on dev-deps that are + *also* declared as peers. That's a cosmetic difference — the package is + installed at the requested version — so it must not trigger a reinstall. + Regression for the TUI-in-Docker failure where 16 such mismatches caused + `Installing TUI dependencies…` → EACCES on every launch. + """ + _touch_ink(tmp_path) + (tmp_path / "package-lock.json").write_text( + '{"packages":{' + '"node_modules/foo":{"version":"1.0.0","dev":true,"peer":true,"resolved":"https://x/foo.tgz"}' + '}}' + ) + (tmp_path / "node_modules" / ".package-lock.json").write_text( + '{"packages":{' + '"node_modules/foo":{"version":"1.0.0","dev":true,"resolved":"https://x/foo.tgz"}' + '}}' + ) + assert main_mod._tui_need_npm_install(tmp_path) is False + + +def test_install_when_version_differs_even_with_peer_drop(tmp_path: Path, main_mod) -> None: + """The peer-drop tolerance must not mask a real version skew.""" + _touch_ink(tmp_path) + (tmp_path / "package-lock.json").write_text( + '{"packages":{"node_modules/foo":{"version":"2.0.0","dev":true,"peer":true}}}' + ) + (tmp_path / "node_modules" / ".package-lock.json").write_text( + '{"packages":{"node_modules/foo":{"version":"1.0.0","dev":true}}}' + ) + assert main_mod._tui_need_npm_install(tmp_path) is True + + def test_no_install_when_lock_older_than_marker(tmp_path: Path, main_mod) -> None: _touch_ink(tmp_path) (tmp_path / "package-lock.json").write_text("{}") From 5671059f62ab28fa118b15fa148d5ae9a4200574 Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 4 May 2026 15:37:27 +1000 Subject: [PATCH 029/171] =?UTF-8?q?feat(docker):=20launch=20dashboard=20as?= =?UTF-8?q?=20side-process=20via=20HERMES=5FDASHBOARD=3D1=20Adds=20an=20op?= =?UTF-8?q?tional=20dashboard=20side-process=20to=20the=20container=20entr?= =?UTF-8?q?ypoint,=20toggled=20by=20`HERMES=5FDASHBOARD=3D1`=20(also=20acc?= =?UTF-8?q?epts=20`true`=20/=20`yes`).=20=20When=20set,=20the=20entrypoint?= =?UTF-8?q?=20backgrounds=20`hermes=20dashboard`=20before=20`exec`-ing=20t?= =?UTF-8?q?he=20main=20command=20so=20the=20user's=20chosen=20foreground?= =?UTF-8?q?=20process=20(gateway,=20chat,=20`sleep=20infinity`,=20?= =?UTF-8?q?=E2=80=A6)=20remains=20PID-of-interest=20for=20the=20container?= =?UTF-8?q?=20runtime.=20=20=20docker=20run=20-d=20\=20=20=20=20=20-v=20~/?= =?UTF-8?q?.hermes:/opt/data=20\=20=20=20=20=20-p=208642:8642=20-p=209119:?= =?UTF-8?q?9119=20\=20=20=20=20=20-e=20HERMES=5FDASHBOARD=3D1=20\=20=20=20?= =?UTF-8?q?=20=20nousresearch/hermes-agent=20gateway=20run=20Defaults=20ch?= =?UTF-8?q?osen=20for=20the=20container=20case:=20=20-=20Host:=200.0.0.0?= =?UTF-8?q?=20(reachable=20through=20published=20port;=20can=20override=20?= =?UTF-8?q?to=20=20=20=20127.0.0.1=20via=20HERMES=5FDASHBOARD=5FHOST=20for?= =?UTF-8?q?=20sidecar/reverse-proxy=20setups)=20=20-=20Port:=209119=20(mat?= =?UTF-8?q?ches=20`hermes=20dashboard`)=20=20-=20Auto-adds=20`--insecure`?= =?UTF-8?q?=20when=20binding=20to=20non-localhost,=20matching=20the=20=20?= =?UTF-8?q?=20=20dashboard's=20own=20safety=20gate=20for=20exposing=20API?= =?UTF-8?q?=20keys=20=20-=20HERMES=5FDASHBOARD=5FTUI=20is=20read=20by=20`h?= =?UTF-8?q?ermes=20dashboard`=20directly=20=E2=80=94=20no=20=20=20=20entry?= =?UTF-8?q?point=20plumbing=20needed=20Dashboard=20output=20is=20prefixed?= =?UTF-8?q?=20with=20`[dashboard]`=20via=20`stdbuf`+`sed=20-u`=20so=20it's?= =?UTF-8?q?=20easy=20to=20separate=20from=20gateway=20logs=20in=20`docker?= =?UTF-8?q?=20logs`.=20=20No=20supervision:=20if=20the=20dashboard=20crash?= =?UTF-8?q?es=20it=20stays=20down=20until=20the=20container=20restarts=20(?= =?UTF-8?q?documented=20in=20the=20`:::note`=20panel).=20Other=20changes?= =?UTF-8?q?=20bundled=20in:=20=20-=20Deprecate=20GATEWAY=5FHEALTH=5FURL=20?= =?UTF-8?q?/=20GATEWAY=5FHEALTH=5FTIMEOUT=20env=20vars=20in=20=20=20=20her?= =?UTF-8?q?mes=5Fcli/web=5Fserver.py=20with=20a=20DEPRECATED=20block=20com?= =?UTF-8?q?ment=20and=20a=20=20=20=20`..=20deprecated::`=20note=20on=20=5F?= =?UTF-8?q?probe=5Fgateway=5Fhealth.=20=20The=20feature=20still=20=20=20?= =?UTF-8?q?=20works=20for=20this=20release;=20it'll=20be=20removed=20along?= =?UTF-8?q?side=20the=20move=20to=20a=20=20=20=20first-class=20dashboard?= =?UTF-8?q?=20config=20key.=20=20-=20Rewrite=20the=20"Running=20the=20dash?= =?UTF-8?q?board"=20doc=20section=20around=20the=20new=20=20=20=20single-c?= =?UTF-8?q?ontainer=20pattern.=20=20Drops=20the=20previously-documented=20?= =?UTF-8?q?=20=20=20dashboard-as-its-own-container=20setup=20=E2=80=94=20t?= =?UTF-8?q?hat=20pattern=20relied=20on=20the=20=20=20=20deprecated=20env?= =?UTF-8?q?=20vars=20for=20cross-container=20gateway-liveness=20detection,?= =?UTF-8?q?=20=20=20=20and=20without=20them=20the=20dashboard=20would=20pe?= =?UTF-8?q?rmanently=20report=20the=20gateway=20=20=20=20as=20"not=20runni?= =?UTF-8?q?ng".=20=20-=20Collapse=20the=20two-service=20Compose=20example?= =?UTF-8?q?=20(gateway=20+=20dashboard=20=20=20=20container)=20into=20a=20?= =?UTF-8?q?single=20service=20with=20HERMES=5FDASHBOARD=3D1.=20=20Removes?= =?UTF-8?q?=20=20=20=20the=20now-unnecessary=20bridge=20network=20and=20`d?= =?UTF-8?q?epends=5Fon`.=20=20-=20Drop=20the=20":::warning"=20caveat=20abo?= =?UTF-8?q?ut=20"Running=20a=20dashboard=20container=20=20=20=20alongside?= =?UTF-8?q?=20the=20gateway=20is=20safe"=20=E2=80=94=20that=20case=20no=20?= =?UTF-8?q?longer=20exists.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/entrypoint.sh | 35 +++++++++++++++ hermes_cli/web_server.py | 13 ++++++ website/docs/user-guide/docker.md | 71 +++++++++++-------------------- 3 files changed, 74 insertions(+), 45 deletions(-) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 299aab97a22..65386e53dd5 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -86,6 +86,41 @@ if [ -d "$INSTALL_DIR/skills" ]; then python3 "$INSTALL_DIR/tools/skills_sync.py" fi +# Optionally start `hermes dashboard` as a side-process. +# +# Toggled by HERMES_DASHBOARD=1 (also accepts "true"/"yes", case-insensitive). +# Host/port/TUI can be overridden via: +# HERMES_DASHBOARD_HOST (default 0.0.0.0 — exposed outside the container) +# HERMES_DASHBOARD_PORT (default 9119, matches `hermes dashboard` default) +# HERMES_DASHBOARD_TUI (already honored by `hermes dashboard` itself) +# +# The dashboard is a long-lived server. We background it *before* the final +# `exec hermes "$@"` so the user's chosen foreground command (chat, gateway, +# sleep infinity, …) remains PID-of-interest for the container runtime. When +# the container stops the whole process tree is torn down, so no explicit +# cleanup is needed. +case "${HERMES_DASHBOARD:-}" in + 1|true|TRUE|True|yes|YES|Yes) + dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}" + dash_port="${HERMES_DASHBOARD_PORT:-9119}" + dash_args=(--host "$dash_host" --port "$dash_port" --no-open) + # Binding to anything other than localhost requires --insecure — the + # dashboard refuses otherwise because it exposes API keys. Inside a + # container this is the expected deployment (host reaches it via + # published port), so opt in automatically. + if [ "$dash_host" != "127.0.0.1" ] && [ "$dash_host" != "localhost" ]; then + dash_args+=(--insecure) + fi + echo "Starting hermes dashboard on ${dash_host}:${dash_port} (background)" + # Prefix dashboard output so it's distinguishable from the main + # process in `docker logs`. stdbuf keeps the pipe line-buffered. + ( + stdbuf -oL -eL hermes dashboard "${dash_args[@]}" 2>&1 \ + | sed -u 's/^/[dashboard] /' + ) & + ;; +esac + # Final exec: two supported invocation patterns. # # docker run -> exec `hermes` with no args (legacy default) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 014a938e070..97ebf9e29d6 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -470,10 +470,23 @@ except (ValueError, TypeError): ) _GATEWAY_HEALTH_TIMEOUT = 3.0 +# DEPRECATED (scheduled for removal): GATEWAY_HEALTH_URL / GATEWAY_HEALTH_TIMEOUT. +# Cross-container / cross-host gateway liveness detection will be folded into a +# first-class dashboard config key so it's no longer Docker-adjacent lore buried +# in env vars. The env vars still work for now so existing Compose deployments +# don't break. Do not add new callers — wire new uses through the planned +# config surface. + def _probe_gateway_health() -> tuple[bool, dict | None]: """Probe the gateway via its HTTP health endpoint (cross-container). + .. deprecated:: + Driven by the deprecated ``GATEWAY_HEALTH_URL`` / + ``GATEWAY_HEALTH_TIMEOUT`` env vars. Scheduled for removal alongside + a move to a first-class dashboard config key. See + :data:`_GATEWAY_HEALTH_URL` for context. + Uses ``/health/detailed`` first (returns full state), falling back to the simpler ``/health`` endpoint. Returns ``(is_alive, body_dict)``. diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 21f8246ace3..2a13fe6662a 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -45,28 +45,33 @@ Opening any port on an internet facing machine is a security risk. You should no ## Running the dashboard -The built-in web dashboard can run alongside the gateway as a separate container. - -To run the dashboard as its own container, point it at the gateway's health endpoint so it can detect gateway status across containers: +The built-in web dashboard runs as an optional side-process inside the same container as the gateway. Set `HERMES_DASHBOARD=1` and expose port `9119` alongside the gateway's `8642`: ```sh docker run -d \ - --name hermes-dashboard \ + --name hermes \ --restart unless-stopped \ -v ~/.hermes:/opt/data \ + -p 8642:8642 \ -p 9119:9119 \ - -e GATEWAY_HEALTH_URL=http://$HOST_IP:8642 \ - nousresearch/hermes-agent dashboard + -e HERMES_DASHBOARD=1 \ + nousresearch/hermes-agent gateway run ``` -Replace `$HOST_IP` with the IP address of the machine running the gateway container (e.g. `192.168.1.100`), or use a Docker network hostname if both containers share a network (see the [Compose example](#docker-compose-example) below). +The entrypoint starts `hermes dashboard` in the background (running as the non-root `hermes` user) before `exec`-ing the main command. Dashboard output is prefixed with `[dashboard]` in `docker logs` so it's easy to separate from gateway logs. | Environment variable | Description | Default | |---------------------|-------------|---------| -| `GATEWAY_HEALTH_URL` | Base URL of the gateway's API server, e.g. `http://gateway:8642` | *(unset — local PID check only)* | -| `GATEWAY_HEALTH_TIMEOUT` | Health probe timeout in seconds | `3` | +| `HERMES_DASHBOARD` | Set to `1` (or `true` / `yes`) to launch the dashboard alongside the main command | *(unset — dashboard not started)* | +| `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `0.0.0.0` | +| `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` | +| `HERMES_DASHBOARD_TUI` | Set to `1` to expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket) | *(unset)* | -Without `GATEWAY_HEALTH_URL`, the dashboard falls back to local process detection — which only works when the gateway runs in the same container or on the same host. +The default `HERMES_DASHBOARD_HOST=0.0.0.0` is required for the host to reach the dashboard through the published port; the entrypoint automatically passes `--insecure` to `hermes dashboard` in that case. Override to `127.0.0.1` if you want to restrict the dashboard to in-container access only (e.g. behind a reverse proxy in a sidecar). + +:::note +The dashboard side-process is **not supervised** — if it crashes, it stays down until the container restarts. Running it as a separate container is not supported: the dashboard's gateway-liveness detection requires a shared PID namespace with the gateway process. +::: ## Running interactively (CLI chat) @@ -102,7 +107,7 @@ The `/opt/data` volume is the single source of truth for all Hermes state. It ma | `skins/` | Custom CLI skins | :::warning -Never run two Hermes **gateway** containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent write access. Running a dashboard container alongside the gateway is safe since the dashboard only reads data. +Never run two Hermes **gateway** containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent write access. ::: ## Multi-profile support @@ -188,49 +193,24 @@ services: restart: unless-stopped command: gateway run ports: - - "8642:8642" + - "8642:8642" # gateway API + - "9119:9119" # dashboard (only reached when HERMES_DASHBOARD=1) volumes: - ~/.hermes:/opt/data - networks: - - hermes-net - # Uncomment to forward specific env vars instead of using .env file: - # environment: - # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - # - OPENAI_API_KEY=${OPENAI_API_KEY} - # - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} + environment: + - HERMES_DASHBOARD=1 + # Uncomment to forward specific env vars instead of using .env file: + # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + # - OPENAI_API_KEY=${OPENAI_API_KEY} + # - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} deploy: resources: limits: memory: 4G cpus: "2.0" - - dashboard: - image: nousresearch/hermes-agent:latest - container_name: hermes-dashboard - restart: unless-stopped - command: dashboard --host 0.0.0.0 --insecure - ports: - - "9119:9119" - volumes: - - ~/.hermes:/opt/data - environment: - - GATEWAY_HEALTH_URL=http://hermes:8642 - networks: - - hermes-net - depends_on: - - hermes - deploy: - resources: - limits: - memory: 512M - cpus: "0.5" - -networks: - hermes-net: - driver: bridge ``` -Start with `docker compose up -d` and view logs with `docker compose logs -f`. +Start with `docker compose up -d` and view logs with `docker compose logs -f`. Dashboard output is prefixed with `[dashboard]` so it's easy to filter from gateway logs. ## Resource limits @@ -273,6 +253,7 @@ The entrypoint script (`docker/entrypoint.sh`) bootstraps the data volume on fir - Copies default `config.yaml` if missing - Copies default `SOUL.md` if missing - Syncs bundled skills using a manifest-based approach (preserves user edits) +- Optionally launches `hermes dashboard` as a background side-process when `HERMES_DASHBOARD=1` (see [Running the dashboard](#running-the-dashboard)) - Then runs `hermes` with whatever arguments you pass ## Upgrading From a11aed1accc735ae0d7af80d626b33870d4b696c Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 11:36:19 +0530 Subject: [PATCH 030/171] fix(cli): local backend CLI always uses launch directory, stops .env sync of TERMINAL_CWD (#19334) The old CWD heuristic was fooled by: 1. TERMINAL_CWD persisted to .env by `hermes config set terminal.cwd` 2. Inherited TERMINAL_CWD from parent hermes processes 3. Only resolved when config had a placeholder value (not explicit paths) Fix: - load_cli_config() unconditionally uses os.getcwd() for local backend - TERMINAL_CWD always force-exported in CLI mode (overrides stale values) - Gateway sets _HERMES_GATEWAY=1 marker so lazy cli.py imports don't clobber - Remove terminal.cwd from config-set .env sync map (prevents re-poisoning) - Clarify setup wizard label as 'Gateway working directory' Closes #19214 --- cli.py | 52 +++--- gateway/run.py | 4 + hermes_cli/config.py | 4 +- hermes_cli/setup.py | 12 +- tests/cli/test_cwd_env_respect.py | 166 +++++++++--------- .../docs/reference/environment-variables.md | 2 +- website/docs/user-guide/configuration.md | 2 +- 7 files changed, 116 insertions(+), 126 deletions(-) diff --git a/cli.py b/cli.py index da917ae1906..472218271fc 100644 --- a/cli.py +++ b/cli.py @@ -459,32 +459,19 @@ def load_cli_config() -> Dict[str, Any]: if "backend" in terminal_config: terminal_config["env_type"] = terminal_config["backend"] - # Handle special cwd values: "." or "auto" means use current working directory. - # Only resolve to the host's CWD for the local backend where the host - # filesystem is directly accessible. For ALL remote/container backends - # (ssh, docker, modal, singularity), the host path doesn't exist on the - # target -- remove the key so terminal_tool.py uses its per-backend default. - # - # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the - # gateway's config bridge earlier in the process), don't clobber it. - # This prevents a lazy import of cli.py during gateway runtime from - # rewriting TERMINAL_CWD to the service's working directory. - # See issue #10817. + # CWD resolution for CLI/TUI. The gateway has its own config bridge in + # gateway/run.py but may lazily import cli.py (triggering this code). + # Local backend: always os.getcwd(). Use `cd /dir && hermes` to control it. + # Non-local with placeholder: pop so terminal_tool uses its per-backend default. + # Non-local with explicit path: keep as-is. _CWD_PLACEHOLDERS = (".", "auto", "cwd") - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = os.environ.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - # Gateway (or earlier startup) already resolved a real path — keep it - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - # Remove so TERMINAL_CWD stays unset → tool picks backend default - terminal_config.pop("cwd", None) + effective_backend = terminal_config.get("env_type", "local") + + if effective_backend == "local": + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + terminal_config.pop("cwd", None) env_mappings = { "env_type": "TERMINAL_ENV", @@ -517,13 +504,18 @@ def load_cli_config() -> Dict[str, Any]: "sudo_password": "SUDO_PASSWORD", } - # Apply config values to env vars so terminal_tool picks them up. - # If the config file explicitly has a [terminal] section, those values are - # authoritative and override any .env settings. When using defaults only - # (no config file or no terminal section), don't overwrite env vars that - # were already set by .env -- the user's .env is the fallback source. + # Bridge config → env vars for terminal_tool. TERMINAL_CWD is force-exported + # UNLESS we're inside a gateway process (detected by _HERMES_GATEWAY marker) + # where it was already set correctly by gateway/run.py's config bridge. + _is_gateway = os.environ.get("_HERMES_GATEWAY") == "1" for config_key, env_var in env_mappings.items(): if config_key in terminal_config: + if env_var == "TERMINAL_CWD": + if _is_gateway: + continue + # CLI: always export (overrides stale .env or inherited values) + os.environ[env_var] = str(terminal_config[config_key]) + continue if _file_has_terminal_config or env_var not in os.environ: val = terminal_config[config_key] if isinstance(val, list): diff --git a/gateway/run.py b/gateway/run.py index 78716862568..d4f2ba8d253 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -316,6 +316,10 @@ def _restart_notification_pending() -> bool: return (_hermes_home / ".restart_notify.json").exists() +# Mark this process as a gateway so cli.py's module-level load_cli_config() +# knows not to clobber TERMINAL_CWD if lazily imported. +os.environ["_HERMES_GATEWAY"] = "1" + _ensure_ssl_certs() # Add parent directory to path diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 25df4b3e2f3..98317a9043f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -4675,7 +4675,9 @@ def set_config_value(key: str, value: str): "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME", "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", - "terminal.cwd": "TERMINAL_CWD", + # terminal.cwd intentionally excluded — CLI resolves at runtime, + # gateway bridges it in gateway/run.py. Persisting to .env causes + # stale values to poison child processes. "terminal.timeout": "TERMINAL_TIMEOUT", "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR", "terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 31cb8460122..9ca29968fd7 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1328,15 +1328,13 @@ def setup_terminal_backend(config: dict): print_success("Terminal backend: Local") print_info("Commands run directly on this machine.") - # CWD for messaging + # Gateway/cron working directory print() - print_info("Working directory for messaging sessions:") - print_info(" When using Hermes via Telegram/Discord, this is where") - print_info( - " the agent starts. CLI mode always starts in the current directory." - ) + print_info("Gateway working directory:") + print_info(" Used by Telegram/Discord/cron sessions.") + print_info(" CLI/TUI always uses your launch directory instead.") current_cwd = cfg_get(config, "terminal", "cwd", default="") - cwd = prompt(" Messaging working directory", current_cwd or str(Path.home())) + cwd = prompt(" Gateway working directory", current_cwd or str(Path.home())) if cwd: config["terminal"]["cwd"] = cwd diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py index e9f3341d2ae..04e62cc12f8 100644 --- a/tests/cli/test_cwd_env_respect.py +++ b/tests/cli/test_cwd_env_respect.py @@ -1,107 +1,101 @@ -"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering. +"""Tests for CLI/TUI CWD resolution in load_cli_config(). -When the gateway resolves TERMINAL_CWD at startup and cli.py is later -imported lazily (via delegate_tool → CLI_CONFIG), load_cli_config() must -not overwrite the already-resolved value with os.getcwd(). - -config.yaml terminal.cwd is the canonical source of truth. -.env TERMINAL_CWD and MESSAGING_CWD are deprecated. -See issue #10817. +Rules: +- Local backend CLI/TUI: always os.getcwd(), ignoring config and inherited env. +- Non-local with placeholder: pop cwd for backend default. +- Non-local with explicit path: keep as-is. """ import os import pytest - -# The sentinel values that mean "resolve at runtime" _CWD_PLACEHOLDERS = (".", "auto", "cwd") -def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict): - """Simulate the CWD resolution logic from load_cli_config(). +def _resolve_cwd(terminal_config: dict, defaults: dict, env: dict): + """Mirror the CWD resolution logic from cli.py load_cli_config().""" + effective_backend = terminal_config.get("env_type", "local") - This mirrors the code in cli.py that checks for a pre-resolved - TERMINAL_CWD before falling back to os.getcwd(). - """ - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = env.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - terminal_config.pop("cwd", None) + if effective_backend == "local": + terminal_config["cwd"] = "/fake/getcwd" + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + terminal_config.pop("cwd", None) - # Simulate the bridging loop: write terminal_config["cwd"] to env - _file_has_terminal = defaults.get("_file_has_terminal", False) + # Bridge: TERMINAL_CWD always exported in CLI, skipped in gateway + _is_gateway = env.get("_HERMES_GATEWAY") == "1" if "cwd" in terminal_config: - if _file_has_terminal or "TERMINAL_CWD" not in env: + if _is_gateway: + pass # don't touch env + else: env["TERMINAL_CWD"] = str(terminal_config["cwd"]) return env.get("TERMINAL_CWD", "") -class TestLazyImportGuard: - """TERMINAL_CWD resolved by gateway must survive a lazy cli.py import.""" +class TestLocalBackendCli: + """Local backend always uses os.getcwd().""" - def test_gateway_resolved_cwd_survives(self): - """Gateway set TERMINAL_CWD → lazy cli import must not clobber.""" - env = {"TERMINAL_CWD": "/home/user/workspace"} - terminal_config = {"cwd": ".", "env_type": "local"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/home/user/workspace" - - def test_gateway_resolved_cwd_survives_with_file_terminal(self): - """Even when config.yaml has a terminal: section, resolved CWD survives.""" - env = {"TERMINAL_CWD": "/home/user/workspace"} - terminal_config = {"cwd": ".", "env_type": "local"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/home/user/workspace" - - -class TestConfigCwdResolution: - """config.yaml terminal.cwd is the canonical source of truth.""" - - def test_explicit_config_cwd_wins(self): - """terminal.cwd: /explicit/path always wins.""" - env = {"TERMINAL_CWD": "/old/gateway/value"} - terminal_config = {"cwd": "/explicit/path"} - defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/explicit/path" - - def test_dot_cwd_resolves_to_getcwd_when_no_prior(self): - """With no pre-set TERMINAL_CWD, "." resolves to os.getcwd().""" + def test_explicit_config_ignored(self): env = {} - terminal_config = {"cwd": "."} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + tc = {"cwd": "/explicit/path", "env_type": "local"} + d = {"terminal": {"cwd": "/explicit/path"}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" - result = _resolve_terminal_cwd(terminal_config, defaults, env) + def test_inherited_env_overwritten(self): + env = {"TERMINAL_CWD": "/parent/hermes"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + def test_placeholder_resolved(self): + env = {} + tc = {"cwd": "."} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + def test_env_and_no_config_file(self): + env = {"TERMINAL_CWD": "/stale/value"} + tc = {"cwd": ".", "env_type": "local"} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + +class TestNonLocalBackends: + """Non-local backends use config or per-backend defaults.""" + + def test_placeholder_popped(self): + env = {} + tc = {"cwd": ".", "env_type": "docker"} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "" + + def test_explicit_path_kept(self): + env = {} + tc = {"cwd": "/srv/app", "env_type": "ssh"} + d = {"terminal": {"cwd": "/srv/app"}} + assert _resolve_cwd(tc, d, env) == "/srv/app" + + def test_auto_placeholder_popped(self): + env = {} + tc = {"cwd": "auto", "env_type": "modal"} + d = {"terminal": {"cwd": "auto"}} + assert _resolve_cwd(tc, d, env) == "" + + +class TestGatewayLazyImport: + """Gateway lazy import of cli.py must not clobber TERMINAL_CWD.""" + + def test_gateway_cwd_preserved(self): + env = {"_HERMES_GATEWAY": "1", "TERMINAL_CWD": "/home/user/project"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + result = _resolve_cwd(tc, d, env) + assert result == "/home/user/project" + + def test_cli_overwrites_stale_env(self): + env = {"TERMINAL_CWD": "/stale/from/dotenv"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + result = _resolve_cwd(tc, d, env) assert result == "/fake/getcwd" - - def test_remote_backend_pops_cwd(self): - """Remote backend + placeholder cwd → popped for backend default.""" - env = {} - terminal_config = {"cwd": ".", "env_type": "docker"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "" # cwd popped, no env var set - - def test_remote_backend_with_prior_cwd_preserves(self): - """Remote backend + pre-resolved TERMINAL_CWD → adopted.""" - env = {"TERMINAL_CWD": "/project"} - terminal_config = {"cwd": ".", "env_type": "docker"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/project" diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index aa971c71037..ec2c5ec0e8c 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -187,7 +187,7 @@ These variables configure the [Tool Gateway](/docs/user-guide/features/tool-gate | `TERMINAL_VERCEL_RUNTIME` | Vercel Sandbox runtime (`node24`, `node22`, `python3.13`) | | `TERMINAL_TIMEOUT` | Command timeout in seconds | | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds | -| `TERMINAL_CWD` | Working directory for all terminal sessions | +| `TERMINAL_CWD` | Working directory for terminal sessions (gateway/cron only; CLI uses launch dir) | | `SUDO_PASSWORD` | Enable sudo without interactive prompt | For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 18c96b8b184..517cb2e988c 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -88,7 +88,7 @@ Hermes supports seven terminal backends. Each determines where the agent's shell ```yaml terminal: backend: local # local | docker | ssh | modal | daytona | vercel_sandbox | singularity - cwd: "." # Working directory ("." = current dir for local, "/root" for containers) + cwd: "." # Gateway/cron working directory (CLI always uses launch dir) timeout: 180 # Per-command timeout in seconds env_passthrough: [] # Env var names to forward to sandboxed execution (terminal + execute_code) singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Singularity backend From 8163d371922768c32f43eb6036d7d36e56775605 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 12:54:50 +0530 Subject: [PATCH 031/171] fix(skill): reference built-in video_analyze/vision_analyze tools in kanban-video-orchestrator (#19562) The tool-matrix.md had a vague 'Gemini multimodal / Claude vision' entry in the external tools table that didn't point to the actual built-in Hermes tools. Now that video_analyze exists (merged in #19301), update the skill to reference it properly: - Add 'Built-in Hermes tools for media review' section with proper toolset names, enablement instructions, and capability details - Add video + vision toolsets to cinematographer, editor, and reviewer profile configs - Update role-archetypes.md to reference tools by name - Update API key table to explain video_analyze routing --- .../references/role-archetypes.md | 12 +++++----- .../references/tool-matrix.md | 24 ++++++++++++++----- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md index 5a4cb207a28..95eaeb33b66 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md @@ -82,14 +82,14 @@ film and music video. Often pairs with a diagramming tool. Designs the visual language: framing, color, motion, transitions. Reviews generator output for visual consistency. Hands off per-scene `VISUAL_SPEC.md`. -- **Toolsets:** kanban, terminal, file +- **Toolsets:** kanban, terminal, file, video, vision - **Skills:** `kanban-worker` plus the visual skill that matches the project (e.g., `ascii-video` for ASCII work, `manim-video` for explainers, `touchdesigner-mcp` for real-time visuals, etc.) - **Outputs:** `scenes/scene-NN/VISUAL_SPEC.md`, review comments on renderer tasks -- **Reviews via:** any media-analysis approach (Gemini multimodal, manual - inspection of clip thumbnails, ffprobe summaries) +- **Reviews via:** `video_analyze` (sends full clip to multimodal LLM for + native review), `vision_analyze` for spot-checking frames, ffprobe summaries ## Production roles @@ -247,10 +247,10 @@ specifically on what's off (pacing, sync, brand alignment, technical quality). Distinct from the cinematographer (who reviews visuals during production) and the editor (who reviews for assembly). -- **Toolsets:** kanban, terminal, file +- **Toolsets:** kanban, terminal, file, video, vision - **Skills:** `kanban-worker` -- **External tools:** any media-analysis approach (Gemini multimodal, - ffprobe, manual frame extraction) +- **Review tools:** `video_analyze` (native clip review via multimodal LLM), + `vision_analyze` (frame/thumbnail review), ffprobe - **Outputs:** `review-notes.md`, comments on tasks ### brand-cop diff --git a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md index 5c78c4ff3df..5a52d15ddd0 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md @@ -81,7 +81,16 @@ them directly. | Remotion CLI (`npx remotion render`) | React-based motion graphics | renderer-motion-graphics | | Manim CE (`manim`) | Math animation render (driven by `manim-video` skill's recipes) | renderer-manim | | Blender (`blender -b`) | 3D rendering (alternative to `blender-mcp`) | renderer-3d | -| Gemini multimodal / Claude vision | AI review of clips | reviewer, cinematographer, editor | + +## Built-in Hermes tools for media review + +These are native Hermes tools — not invoked via terminal but through their own +toolsets. Enable them per-profile by adding the toolset to the profile config. + +| Tool | Toolset | What it does | Profile that uses it | +|------|---------|--------------|----------------------| +| `video_analyze` | `video` (opt-in — `hermes tools enable video`) | Native video understanding — sends full clip to a multimodal LLM (Gemini via OpenRouter) for review without frame extraction. Supports mp4, webm, mov, avi, mkv. 50 MB cap. Model: `AUXILIARY_VIDEO_MODEL` env → `AUXILIARY_VISION_MODEL` fallback. | reviewer, cinematographer, editor | +| `vision_analyze` | `vision` (core — enabled by default) | Image/frame analysis — review stills, thumbnails, exported frames. Already available to all profiles without opt-in. | reviewer, cinematographer, concept-artist | ## Standard toolset configurations per role @@ -156,6 +165,8 @@ toolsets: - kanban - terminal - file + - video # video_analyze — review full clips natively + - vision # vision_analyze — review stills / exported frames skills: always_load: - kanban-worker @@ -246,6 +257,8 @@ toolsets: - kanban - terminal - file + - video # video_analyze — editor reviews assembled cuts natively + - vision # vision_analyze — spot-check frames skills: always_load: - kanban-worker @@ -259,14 +272,13 @@ For captioner add Whisper invocation patterns to the SOUL.md. ```yaml toolsets: - kanban - - terminal # for media inspection + - terminal # for media inspection (ffprobe, etc.) - file + - video # video_analyze — review full clips natively + - vision # vision_analyze — review stills / exported frames skills: always_load: - kanban-worker -env_required: - - OPENROUTER_API_KEY # if using Gemini multimodal review - # or ANTHROPIC_API_KEY if using Claude vision (already required globally) ``` ## API key requirements @@ -278,7 +290,7 @@ key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban. |---------|---------|---------| | ElevenLabs | `ELEVENLABS_API_KEY` | voice-talent | | OpenAI | `OPENAI_API_KEY` | image-generator (DALL-E), voice-talent (TTS) | -| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (Gemini multimodal review) | +| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (`video_analyze` routes through `AUXILIARY_VIDEO_MODEL` → OpenRouter) | | FAL | `FAL_KEY` | image-generator (FAL flux models) | | Replicate | `REPLICATE_API_TOKEN` | image-generator (alternate provider) | | Runway | `RUNWAY_API_KEY` | image-to-video-generator | From 1964b0565b96b15ac8435d522de5844aec2261a6 Mon Sep 17 00:00:00 2001 From: Clooooode Date: Sun, 3 May 2026 11:39:40 +1200 Subject: [PATCH 032/171] test(kanban): add failing test for list_profiles_on_disk with custom HERMES_HOME list_profiles_on_disk() hardcodes Path.home() / ".hermes" / "profiles", ignoring HERMES_HOME when set to a custom root (e.g. /opt/data). Add test_list_profiles_on_disk_custom_root to cover this case. Related to #18442, #18985. --- .../test_kanban_core_functionality.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 551480ff721..a7896bf940e 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -902,12 +902,13 @@ def test_list_profiles_on_disk(tmp_path, monkeypatch): """list_profiles_on_disk returns directories under ~/.hermes/profiles/ that contain a config.yaml.""" monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.delenv("HERMES_HOME", raising=False) profiles = tmp_path / ".hermes" / "profiles" profiles.mkdir(parents=True) - (profiles / "researcher").mkdir() - (profiles / "researcher" / "config.yaml").write_text("model: {}\n") - (profiles / "writer").mkdir() - (profiles / "writer" / "config.yaml").write_text("model: {}\n") + for name in ("researcher", "writer"): + d = profiles / name + d.mkdir() + (d / "config.yaml").write_text("model: {}\n") (profiles / "empty_dir").mkdir() # A stray file; should be ignored. (profiles / "stray.txt").write_text("noise") @@ -916,6 +917,20 @@ def test_list_profiles_on_disk(tmp_path, monkeypatch): assert names == ["researcher", "writer"] +def test_list_profiles_on_disk_custom_root(tmp_path, monkeypatch): + """list_profiles_on_disk respects a custom HERMES_HOME root.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + profiles = tmp_path / "profiles" + profiles.mkdir(parents=True) + for name in ("researcher", "writer"): + d = profiles / name + d.mkdir() + (d / "config.yaml").write_text("model: {}\n") + + names = kb.list_profiles_on_disk() + assert names == ["researcher", "writer"] + + def test_known_assignees_merges_disk_and_board(tmp_path, monkeypatch): """known_assignees unions profiles on disk with currently-assigned names, and reports per-status counts.""" From c0300575c19f23aa8ed0ad067bb4faa20aa1b722 Mon Sep 17 00:00:00 2001 From: Clooooode Date: Sun, 3 May 2026 11:39:58 +1200 Subject: [PATCH 033/171] fix(kanban): use get_default_hermes_root() in list_profiles_on_disk Path.home() / ".hermes" / "profiles" breaks custom-root deployments (e.g. HERMES_HOME=/opt/data). Switch to get_default_hermes_root() so profile discovery is consistent with kanban_db_path() and workspaces_root() fixed in #18985. Fixes #19017. Related to #18442, #18985. --- hermes_cli/kanban_db.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index d8e2e861bac..98ee4828d3e 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -2732,7 +2732,8 @@ def list_profiles_on_disk() -> list[str]: ``config.yaml`` — a bare dir without config isn't a real profile. """ try: - home = Path.home() / ".hermes" / "profiles" + from hermes_constants import get_default_hermes_root + home = get_default_hermes_root() / "profiles" except Exception: return [] if not home.is_dir(): From 744079ffe604371774f454ce46779ce0fcd43f0f Mon Sep 17 00:00:00 2001 From: daixin1204 Date: Sun, 3 May 2026 21:02:03 +0800 Subject: [PATCH 034/171] fix(curator): prevent false-positive consolidation from substring matching _classify_removed_skills used naive 'in' substring matching to detect whether a removed skill's name appeared in skill_manage arguments. Short/common skill names (api, git, test, foo, etc.) matched incorrectly when they appeared as substrings of longer words in file paths (references/api-design.md) or content (latest, testing). Replace with field-aware matching: - file_path: needle must match a complete filename stem or directory name, with -/_ normalised for variant tolerance - content fields: word-boundary regex (\b) prevents embedding in longer words Also add 3 regression tests covering the false-positive scenarios. --- agent/curator.py | 41 ++++++++++-- tests/agent/test_curator_classification.py | 75 ++++++++++++++++++++++ 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/agent/curator.py b/agent/curator.py index 8dee0acbbac..a726e875b69 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -24,6 +24,7 @@ from __future__ import annotations import json import logging import os +import re import tempfile import threading from datetime import datetime, timedelta, timezone @@ -469,6 +470,24 @@ def _reports_root() -> Path: return root +def _needle_in_path_component(needle: str, path: str) -> bool: + """Check if *needle* is a complete filename stem or directory name in *path*. + + Unlike simple substring matching, this avoids false positives where short + skill names are embedded in longer filenames (e.g. "api" matching + "references/api-design.md"). Hyphens and underscores are normalised so + "open-webui-setup" matches "open_webui_setup.md". + """ + norm_needle = needle.replace("-", "_") + for part in path.replace("\\", "/").split("/"): + if not part: + continue + stem = part.rsplit(".", 1)[0] if "." in part else part + if stem.replace("-", "_") == norm_needle: + return True + return False + + def _classify_removed_skills( removed: List[str], added: List[str], @@ -547,15 +566,29 @@ def _classify_removed_skills( continue # Look for the removed skill's name in file_path / content / raw. - haystacks: List[str] = [] + # Matching strategy differs by field type: + # file_path — needle must be a complete path component + # (filename stem or directory name), so "api" does NOT + # falsely match "references/api-design.md". + # content fields — word-boundary regex so "test" does NOT + # falsely match "latest" or "testing". + haystacks: List[tuple[str, str]] = [] for key in ("file_path", "file_content", "content", "new_string", "_raw"): v = args.get(key) if isinstance(v, str): - haystacks.append(v) + haystacks.append((key, v)) hit = False - for hay in haystacks: + for key, hay in haystacks: for needle in needles: - if needle and needle in hay: + if not needle: + continue + if key == "file_path": + matched = _needle_in_path_component(needle, hay) + else: + matched = bool( + re.search(rf'\b{re.escape(needle)}\b', hay) + ) + if matched: hit = True evidence = ( f"skill_manage action={args.get('action', '?')} " diff --git a/tests/agent/test_curator_classification.py b/tests/agent/test_curator_classification.py index 031d66529bf..625776f5373 100644 --- a/tests/agent/test_curator_classification.py +++ b/tests/agent/test_curator_classification.py @@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env): assert len(result["pruned"]) == 1 +def test_classify_no_false_positive_short_name_in_file_path(curator_env): + """Short skill name that is a substring of another filename = pruned, not consolidated.""" + # e.g. "api" should NOT match "references/api-design.md" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"conventions"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "write_file", + "name": "conventions", + "file_path": "references/api-design.md", + "file_content": "# API Design\n...", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'api' should NOT match file_path 'references/api-design.md'" + ) + assert len(result["pruned"]) == 1 + assert result["pruned"][0]["name"] == "api" + + +def test_classify_no_false_positive_short_name_in_content(curator_env): + """Short skill name embedded in longer word in content = pruned, not consolidated.""" + # e.g. "test" should NOT match content "running latest tests" + result = curator_env._classify_removed_skills( + removed=["test"], + added=[], + after_names={"umbrella"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "patch", + "name": "umbrella", + "old_string": "old", + "new_string": "running latest tests with pytest", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'test' should NOT match 'latest' via word boundary" + ) + assert len(result["pruned"]) == 1 + + +def test_classify_still_matches_exact_word_in_content(curator_env): + """Word-boundary match still works for exact word occurrences.""" + # "api" SHOULD match content "use the api gateway" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"gateway"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "edit", + "name": "gateway", + "content": "# Gateway\n\nUse the api gateway for all requests.\n", + }), + }, + ], + ) + assert len(result["consolidated"]) == 1, ( + f"'api' should match as a standalone word in content" + ) + assert result["consolidated"][0]["into"] == "gateway" + + def test_report_md_splits_consolidated_and_pruned_sections(curator_env): """End-to-end: REPORT.md shows both sections distinctly.""" curator = curator_env From c90f25dd1f86f3fb015a4c8c63645a61db5ac8b5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 3 May 2026 17:09:33 -0700 Subject: [PATCH 035/171] chore(release): map daixin1204@gmail.com to @SimbaKingjoe --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index b0f925d4133..9bc2e8c4475 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -68,6 +68,7 @@ AUTHOR_MAP = { "274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi", "dejie.guo@gmail.com": "JayGwod", "133716830+0xKingBack@users.noreply.github.com": "0xKingBack", + "daixin1204@gmail.com": "SimbaKingjoe", "maxence@groine.fr": "MaxyMoos", "61830395+leprincep35700@users.noreply.github.com": "leprincep35700", # OpenViking viking_read salvage (April 2026) From 3072e5543ba01c23358772db5fe1a2e770ee108a Mon Sep 17 00:00:00 2001 From: dh Date: Wed, 15 Apr 2026 07:44:28 +0800 Subject: [PATCH 036/171] skills-hub: hash binary skill bundle files correctly --- tests/tools/test_skills_hub.py | 16 ++++++++++++++++ tools/skills_hub.py | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index 8e3453c04d8..40143adc845 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -901,6 +901,22 @@ class TestCheckForSkillUpdates: assert bundle_content_hash(bundle) == content_hash(skill_dir) + def test_bundle_content_hash_accepts_binary_files(self): + bundle = SkillBundle( + name="demo-binary-skill", + files={ + "SKILL.md": "# Demo\n", + "assets/logo.png": b"\x89PNG\r\n\x1a\nbinary", + }, + source="github", + identifier="owner/repo/demo-binary-skill", + trust_level="community", + ) + + digest = bundle_content_hash(bundle) + + assert digest.startswith("sha256:") + def test_reports_update_when_remote_hash_differs(self): lock = MagicMock() lock.list_installed.return_value = [{ diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 0ce1d9b34e3..aaeabd2c289 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -2801,7 +2801,11 @@ def bundle_content_hash(bundle: SkillBundle) -> str: """Compute a deterministic hash for an in-memory skill bundle.""" h = hashlib.sha256() for rel_path in sorted(bundle.files): - h.update(bundle.files[rel_path].encode("utf-8")) + content = bundle.files[rel_path] + if isinstance(content, bytes): + h.update(content) + else: + h.update(content.encode("utf-8")) return f"sha256:{h.hexdigest()[:16]}" From 91ea3ae4b2e5214390f0865329648f9cffbbc8bc Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 3 May 2026 12:10:20 -0700 Subject: [PATCH 037/171] test(skills): add bytes-vs-str equivalence and on-disk hash parity tests Follow-up on #9925 cherry-pick adding two additional tests: - bytes content hashes identically to its str-decoded form - mixed bytes+str bundle hash equals the on-disk content_hash from skills_guard (the production invariant used to detect drift) Also map dodofun@126.com and 1615063567@qq.com in AUTHOR_MAP so the CI contributor check passes for the cherry-picked commit. Co-authored-by: LeonSGP43 Co-authored-by: zhao0112 <1615063567@qq.com> --- scripts/release.py | 2 ++ tests/tools/test_skills_hub.py | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 9bc2e8c4475..d7fab88b03e 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -633,6 +633,8 @@ AUTHOR_MAP = { "cirwel@The-CIRWEL-Group.local": "CIRWEL", "molvikar8@gmail.com": "molvikar", "nftpoetrist@gmail.com": "nftpoetrist", + "dodofun@126.com": "colorcross", + "1615063567@qq.com": "zhao0112", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index 40143adc845..19692724112 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -917,6 +917,53 @@ class TestCheckForSkillUpdates: assert digest.startswith("sha256:") + def test_bundle_content_hash_bytes_matches_str_equivalent(self): + """Bytes content must hash identically to its str-decoded form.""" + text_bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": "same content", + "references/checklist.md": "- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + bytes_bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": b"same content", + "references/checklist.md": b"- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + + assert bundle_content_hash(bytes_bundle) == bundle_content_hash(text_bundle) + + def test_bundle_content_hash_mixed_matches_on_disk(self, tmp_path): + """In-memory bundle hash must equal on-disk content_hash for mixed bytes+str.""" + from tools.skills_guard import content_hash + + bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": b"# Demo Skill\n", + "references/checklist.md": "- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + skill_dir = tmp_path / "demo-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_bytes(b"# Demo Skill\n") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "checklist.md").write_text("- [ ] security\n") + + assert bundle_content_hash(bundle) == content_hash(skill_dir) + def test_reports_update_when_remote_hash_differs(self): lock = MagicMock() lock.list_installed.return_value = [{ From 78b635ee3c1d489c8dfe7e01119b774edd80e50b Mon Sep 17 00:00:00 2001 From: ethan Date: Sun, 3 May 2026 03:59:16 +0000 Subject: [PATCH 038/171] fix(cron): recover null next_run_at jobs and tolerate non-dict origin Fixes #18722 get_due_jobs() now recomputes next_run_at via compute_next_run() for cron/interval jobs that arrived with null next_run_at (e.g. via direct jobs.json edits) instead of silently skipping them. _resolve_origin() guards with isinstance(origin, dict), and _deliver_result() now routes through _resolve_origin() so string/non-dict origins no longer crash the ticker. References: references #18735 (open competing fix from automated bulk PR touching 79 files); this PR is a focused single-issue contribution and adds the missing interval-recovery test variant Co-Authored-By: Claude --- cron/jobs.py | 21 +++++++++++++++++++-- cron/scheduler.py | 2 +- scripts/release.py | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/cron/jobs.py b/cron/jobs.py index 2f572c6acbd..5e493ae3f7a 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -797,19 +797,36 @@ def get_due_jobs() -> List[Dict[str, Any]]: next_run = job.get("next_run_at") if not next_run: + schedule = job.get("schedule", {}) + kind = schedule.get("kind") + + # One-shot jobs use a small grace window via the dedicated helper. recovered_next = _recoverable_oneshot_run_at( - job.get("schedule", {}), + schedule, now, last_run_at=job.get("last_run_at"), ) + recovery_kind = "one-shot" if recovered_next else None + + # Recurring jobs reach here only when something — typically a + # direct jobs.json edit that bypassed add_job() — left + # next_run_at unset. Without this branch, such jobs are + # silently skipped forever; recompute next_run_at from the + # schedule so they pick up at their next scheduled tick. + if not recovered_next and kind in ("cron", "interval"): + recovered_next = compute_next_run(schedule, now.isoformat()) + if recovered_next: + recovery_kind = kind + if not recovered_next: continue job["next_run_at"] = recovered_next next_run = recovered_next logger.info( - "Job '%s' had no next_run_at; recovering one-shot run at %s", + "Job '%s' had no next_run_at; recovering %s run at %s", job.get("name", job["id"]), + recovery_kind, recovered_next, ) for rj in raw_jobs: diff --git a/cron/scheduler.py b/cron/scheduler.py index f8aaf6e3ca2..0be6d362392 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -417,7 +417,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option thread_id = target.get("thread_id") # Diagnostic: log thread_id for topic-aware delivery debugging - origin = job.get("origin") or {} + origin = _resolve_origin(job) or {} origin_thread = origin.get("thread_id") if origin_thread and not thread_id: logger.warning( diff --git a/scripts/release.py b/scripts/release.py index d7fab88b03e..17a48e8cff7 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -635,6 +635,7 @@ AUTHOR_MAP = { "nftpoetrist@gmail.com": "nftpoetrist", "dodofun@126.com": "colorcross", "1615063567@qq.com": "zhao0112", + "ethanguo.2003@gmail.com": "EthanGuo-coder", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From 645b99aadd11e858e7b01aafc79c26030133a55c Mon Sep 17 00:00:00 2001 From: ethan Date: Sun, 3 May 2026 04:02:33 +0000 Subject: [PATCH 039/171] test(cron): cover null next_run_at recovery and non-dict origin tolerance Adds four regression tests guarding the bugfix in the previous commit: - TestGetDueJobs::test_broken_cron_without_next_run_is_recovered exercises cron schedules whose next_run_at was lost; expects compute_next_run to repopulate it within get_due_jobs() rather than silently skipping the job. - TestGetDueJobs::test_broken_interval_without_next_run_is_recovered does the same for interval schedules. - TestResolveOrigin::test_string_origin_is_tolerated and test_non_dict_origin_is_tolerated confirm _resolve_origin() returns None for legacy/hand-edited origins (string, list, int) instead of raising. Co-Authored-By: Claude --- tests/cron/test_jobs.py | 68 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index 30bd6b41d54..b9d34e1a5c6 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -647,6 +647,74 @@ class TestGetDueJobs: assert get_due_jobs() == [] assert get_job("oneshot-stale")["next_run_at"] is None + def test_broken_cron_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch): + now = datetime(2026, 3, 18, 10, 0, 0, tzinfo=timezone.utc) + monkeypatch.setattr("cron.jobs._hermes_now", lambda: now) + + save_jobs( + [{ + "id": "cron-recover", + "name": "AI Daily Digest", + "prompt": "...", + "schedule": {"kind": "cron", "expr": "0 12 * * *", "display": "0 12 * * *"}, + "schedule_display": "0 12 * * *", + "repeat": {"times": None, "completed": 0}, + "enabled": True, + "state": "scheduled", + "paused_at": None, + "paused_reason": None, + "created_at": "2026-03-18T09:00:00+00:00", + "next_run_at": None, + "last_run_at": None, + "last_status": None, + "last_error": None, + "deliver": "local", + "origin": None, + }] + ) + + assert get_due_jobs() == [] + recovered = get_job("cron-recover")["next_run_at"] + assert recovered is not None + recovered_dt = datetime.fromisoformat(recovered) + if recovered_dt.tzinfo is None: + recovered_dt = recovered_dt.replace(tzinfo=timezone.utc) + assert recovered_dt > now + + def test_broken_interval_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch): + now = datetime(2026, 3, 18, 10, 0, 0, tzinfo=timezone.utc) + monkeypatch.setattr("cron.jobs._hermes_now", lambda: now) + + save_jobs( + [{ + "id": "interval-recover", + "name": "Hourly heartbeat", + "prompt": "...", + "schedule": {"kind": "interval", "minutes": 60, "display": "every 60m"}, + "schedule_display": "every 1h", + "repeat": {"times": None, "completed": 0}, + "enabled": True, + "state": "scheduled", + "paused_at": None, + "paused_reason": None, + "created_at": "2026-03-18T09:00:00+00:00", + "next_run_at": None, + "last_run_at": None, + "last_status": None, + "last_error": None, + "deliver": "local", + "origin": None, + }] + ) + + assert get_due_jobs() == [] + recovered = get_job("interval-recover")["next_run_at"] + assert recovered is not None + recovered_dt = datetime.fromisoformat(recovered) + if recovered_dt.tzinfo is None: + recovered_dt = recovered_dt.replace(tzinfo=timezone.utc) + assert recovered_dt > now + class TestEnabledToolsets: def test_enabled_toolsets_stored(self, tmp_cron_dir): From 314fe9f82791b0e0cbbe32f98bf068f1f84e4cdb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 01:33:54 -0700 Subject: [PATCH 040/171] chore(release): add AUTHOR_MAP entries for upcoming salvage batch Pre-adds author-email mappings for the 21 Tier 1b salvage PRs so their cherry-picked commits land with mapped GitHub logins in the release notes. --- scripts/release.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 17a48e8cff7..72abbd7ed72 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -636,6 +636,20 @@ AUTHOR_MAP = { "dodofun@126.com": "colorcross", "1615063567@qq.com": "zhao0112", "ethanguo.2003@gmail.com": "EthanGuo-coder", + "dev0jsh@gmail.com": "tmdgusya", + "leavr@163.com": "leavrcn", + "17683456+wanazhar@users.noreply.github.com": "wanazhar", + "26782336+cixuuz@users.noreply.github.com": "cixuuz", + "aleksandr.pasevin@openzeppelin.com": "pasevin", + "ubuntu@localhost.localdomain": "holynn-q", + "holynn@placeholder.local": "holynn-q", + "agent@hermes.local": "jacdevos", + "sunsky.lau@gmail.com": "liuhao1024", + "qiuqfang98@qq.com": "keepcalmqqf", + "261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026", + "yanzh.su@gmail.com": "YanzhongSu", + "wanderwang@users.noreply.github.com": "WanderWang", + "yueheime@gmail.com": "yuehei", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From a1cb811cb8cfca1d3ae902bae87a9cd7c696a0ad Mon Sep 17 00:00:00 2001 From: tmdgusya Date: Sun, 3 May 2026 14:12:28 +0900 Subject: [PATCH 041/171] fix(cli): avoid voice TTS restart race --- cli.py | 17 ++++++++++++----- tests/tools/test_voice_cli_integration.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/cli.py b/cli.py index 472218271fc..98370b83838 100644 --- a/cli.py +++ b/cli.py @@ -8375,6 +8375,17 @@ class HermesCLI: _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}") threading.Thread(target=_restart_recording, daemon=True).start() + def _voice_speak_response_async(self, text: str) -> None: + """Schedule TTS and mark it pending before continuous recording can restart.""" + if not self._voice_tts or not text: + return + self._voice_tts_done.clear() + threading.Thread( + target=self._voice_speak_response, + args=(text,), + daemon=True, + ).start() + def _voice_speak_response(self, text: str): """Speak the agent's response aloud using TTS (runs in background thread).""" if not self._voice_tts: @@ -9535,11 +9546,7 @@ class HermesCLI: # Speak response aloud if voice TTS is enabled # Skip batch TTS when streaming TTS already handled it if self._voice_tts and response and not use_streaming_tts: - threading.Thread( - target=self._voice_speak_response, - args=(response,), - daemon=True, - ).start() + self._voice_speak_response_async(response) # Re-queue the interrupt message (and any that arrived while we were diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index e7d8811e02f..93dffa649a7 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -1040,6 +1040,25 @@ class TestDisableVoiceModeReal: class TestVoiceSpeakResponseReal: """Tests _voice_speak_response with real CLI instance.""" + def test_async_scheduling_clears_done_before_thread_start(self): + cli = _make_voice_cli(_voice_tts=True) + starts = [] + + class FakeThread: + def __init__(self, target=None, args=(), daemon=None): + self.target = target + self.args = args + self.daemon = daemon + + def start(self): + starts.append(cli._voice_tts_done.is_set()) + + with patch("cli.threading.Thread", FakeThread): + cli._voice_speak_response_async("Hello") + + assert starts == [False] + assert not cli._voice_tts_done.is_set() + @patch("cli._cprint") def test_early_return_when_tts_off(self, _cp): cli = _make_voice_cli(_voice_tts=False) From ccb5d87076b598c14691b55fb6f25168ca72cc3c Mon Sep 17 00:00:00 2001 From: leavr Date: Sun, 3 May 2026 14:05:03 +0800 Subject: [PATCH 042/171] test: cover max-iterations summary message sanitization --- tests/run_agent/test_run_agent.py | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 55ce86e51af..a0a11b15306 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2181,6 +2181,55 @@ class TestHandleMaxIterations: kwargs = agent.client.chat.completions.create.call_args.kwargs assert "reasoning" not in kwargs.get("extra_body", {}) + def test_summary_request_removes_orphan_tool_result(self, agent): + """Regression: max-iterations summary request must NOT contain + orphan tool results (tool_call_id with no matching assistant tool_call).""" + resp = _mock_response(content="Summary of work done.") + agent.client.chat.completions.create.return_value = resp + agent._cached_system_prompt = "You are helpful." + messages = [ + {"role": "user", "content": "Analyze finance-data-router"}, + {"role": "assistant", "content": "[Session Arc Summary] ..."}, + {"role": "tool", "tool_call_id": "call_cfedFhJjGmu1RvRc1OUC38j8", "content": "file content here"}, + {"role": "assistant", "tool_calls": [{"id": "call_8fXBXsT592Vpvm7wnW4obPEu", "function": {"name": "patch", "arguments": "{}"}}]}, + {"role": "tool", "tool_call_id": "call_8fXBXsT592Vpvm7wnW4obPEu", "content": "patch result"}, + {"role": "assistant", "content": "Done."}, + ] + + result = agent._handle_max_iterations(messages, 120) + + assert result == "Summary of work done." + kwargs = agent.client.chat.completions.create.call_args.kwargs + sent_msgs = kwargs.get("messages", []) + orphan_ids = [ + m.get("tool_call_id") for m in sent_msgs + if m.get("role") == "tool" and m.get("tool_call_id") == "call_cfedFhJjGmu1RvRc1OUC38j8" + ] + assert len(orphan_ids) == 0, f"Orphan tool result still present: {orphan_ids}" + + def test_summary_request_inserts_stub_for_missing_tool_result(self, agent): + """If an assistant tool_call has no matching tool result in the + summary request, a stub must be inserted to satisfy the API contract.""" + resp = _mock_response(content="Summary") + agent.client.chat.completions.create.return_value = resp + agent._cached_system_prompt = "You are helpful." + messages = [ + {"role": "user", "content": "do stuff"}, + {"role": "assistant", "tool_calls": [{"id": "call_no_result", "function": {"name": "terminal", "arguments": "{}"}}]}, + {"role": "assistant", "content": "Continuing..."}, + ] + + result = agent._handle_max_iterations(messages, 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + sent_msgs = kwargs.get("messages", []) + stub_ids = [ + m.get("tool_call_id") for m in sent_msgs + if m.get("role") == "tool" and m.get("tool_call_id") == "call_no_result" + ] + assert len(stub_ids) >= 1, f"No stub result for assistant tool_call: {stub_ids}" + def test_codex_summary_sanitizes_orphan_tool_results(self, agent): agent.api_mode = "codex_responses" agent.provider = "openai-codex" From df88375f0d8f29f75327cdbaa11670085b5711ca Mon Sep 17 00:00:00 2001 From: wanazhar <17683456+wanazhar@users.noreply.github.com> Date: Sun, 3 May 2026 06:22:47 +0000 Subject: [PATCH 043/171] fix: treat ctrl-c as curses cancel --- hermes_cli/curses_ui.py | 6 ++++++ tests/hermes_cli/test_plugins_cmd.py | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hermes_cli/curses_ui.py b/hermes_cli/curses_ui.py index b05295f1e61..01d759d3872 100644 --- a/hermes_cli/curses_ui.py +++ b/hermes_cli/curses_ui.py @@ -156,6 +156,8 @@ def curses_checklist( flush_stdin() return result_holder[0] if result_holder[0] is not None else cancel_returns + except KeyboardInterrupt: + return cancel_returns except Exception: return _numbered_fallback(title, items, selected, cancel_returns, status_fn) @@ -278,6 +280,8 @@ def curses_radiolist( flush_stdin() return result_holder[0] if result_holder[0] is not None else cancel_returns + except KeyboardInterrupt: + return cancel_returns except Exception: return _radio_numbered_fallback(title, items, selected, cancel_returns) @@ -401,6 +405,8 @@ def curses_single_select( return None return result_holder[0] + except KeyboardInterrupt: + return None except Exception: all_items = list(items) + [cancel_label] cancel_idx = len(items) diff --git a/tests/hermes_cli/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py index 72b9bdde2c1..11231350e10 100644 --- a/tests/hermes_cli/test_plugins_cmd.py +++ b/tests/hermes_cli/test_plugins_cmd.py @@ -508,7 +508,7 @@ class TestPromptPluginEnvVars: class TestCursesRadiolist: - """Test the curses_radiolist function (non-TTY fallback path).""" + """Test the curses_radiolist function.""" def test_non_tty_returns_default(self): from hermes_cli.curses_ui import curses_radiolist @@ -524,6 +524,14 @@ class TestCursesRadiolist: result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=1) assert result == 1 + def test_keyboard_interrupt_returns_cancel_value(self): + from hermes_cli.curses_ui import curses_radiolist + + with patch("sys.stdin") as mock_stdin, patch("curses.wrapper", side_effect=KeyboardInterrupt): + mock_stdin.isatty.return_value = True + result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=-1) + assert result == -1 + # ── Provider discovery helpers ─────────────────────────────────────────── From 81ce945450ac46480980a613b886c6e61e34149d Mon Sep 17 00:00:00 2001 From: Byrn Tong <26782336+cixuuz@users.noreply.github.com> Date: Sun, 3 May 2026 07:08:02 +0000 Subject: [PATCH 044/171] fix(gateway): show other profiles in `gateway status` to prevent confusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple gateway profiles are running (e.g. default and wx1), `hermes gateway status` can be misleading — stopping one profile's gateway and checking status may still show the other profile's process without indicating which profile it belongs to. Add `_print_other_profiles_gateway_status()` which displays running gateways from other profiles at the bottom of the status output: Other profiles: ✓ wx1 — PID 166893 This uses the existing `find_profile_gateway_processes()` and `get_active_profile_name()` — no new dependencies. Closes #19113 Related: #4402, #4587 --- hermes_cli/gateway.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index af40444922e..39fc7476bc0 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -690,6 +690,32 @@ def _print_gateway_process_mismatch(snapshot: GatewayRuntimeSnapshot) -> None: print(" can refuse to start another copy until this process stops.") +def _print_other_profiles_gateway_status() -> None: + """Print a summary of gateway status across all profiles. + + Shown at the bottom of ``hermes gateway status`` output so users with + multiple profiles can tell at a glance which gateways are running and + avoid confusing another profile's process with the current one. + """ + try: + from hermes_cli.profiles import get_active_profile_name + + current = get_active_profile_name() + other_processes = [ + p for p in find_profile_gateway_processes() + if p.profile != current + ] + if not other_processes: + return + + print() + print("Other profiles:") + for proc in other_processes: + print(f" ✓ {proc.profile:<16s} — PID {proc.pid}") + except Exception: + pass + + def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None, all_profiles: bool = False) -> int: """Kill any running gateway processes. Returns count killed. @@ -4456,6 +4482,9 @@ def _gateway_command_inner(args): print(" hermes gateway install # Install as user service") print(" sudo hermes gateway install --system # Install as boot-time system service") + # Show other profiles' gateway status for multi-profile awareness + _print_other_profiles_gateway_status() + elif subcmd == "migrate-legacy": # Stop, disable, and remove legacy Hermes gateway unit files from # pre-rename installs (e.g. hermes.service). Profile units and From e89376d66ff2f72caf069cdddd65c161bb4f7540 Mon Sep 17 00:00:00 2001 From: nftpoetrist Date: Sun, 3 May 2026 11:05:20 +0300 Subject: [PATCH 045/171] fix(setup): add missing SLACK_HOME_CHANNEL prompt to _setup_slack() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _setup_slack() was the only platform setup function that did not prompt for a home channel. All four sibling setups (_setup_telegram, _setup_discord, _setup_mattermost, _setup_bluebubbles) close with an identical home-channel block, and setup_gateway() already checks for SLACK_HOME_CHANNEL presence at the end of the wizard — but the value was never collected, leaving cron delivery and cross-platform notifications silently broken for Slack after a fresh hermes setup run. Add the standard home-channel prompt at the end of _setup_slack(), symmetric with the Discord implementation. Add two unit tests that verify the prompt is saved when provided and skipped when left blank. --- hermes_cli/setup.py | 10 ++++++++++ tests/hermes_cli/test_setup.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 9ca29968fd7..6b51f810594 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -2047,6 +2047,16 @@ def _setup_slack(): print_warning("⚠️ No Slack allowlist set - unpaired users will be denied by default.") print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.") + print() + print_info("📬 Home Channel: where Hermes delivers cron job results,") + print_info(" cross-platform messages, and notifications.") + print_info(" To get a channel ID: open the channel in Slack, then right-click") + print_info(" the channel name → Copy link — the ID starts with C (e.g. C01ABC2DE3F).") + print_info(" You can also set this later by typing /set-home in a Slack channel.") + home_channel = prompt("Home channel ID (leave empty to set later with /set-home)") + if home_channel: + save_env_value("SLACK_HOME_CHANNEL", home_channel.strip()) + def _write_slack_manifest_and_instruct(): """Generate the Slack manifest, write it under HERMES_HOME, and print diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index 72adc27c0c2..f7b491ddf31 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -613,3 +613,35 @@ def test_offer_launch_chat_falls_back_to_module(monkeypatch): setup_mod._offer_launch_chat() assert exec_calls == [(sys.executable, [sys.executable, "-m", "hermes_cli.main", "chat"])] + + +def test_setup_slack_saves_home_channel(monkeypatch): + """_setup_slack() saves SLACK_HOME_CHANNEL when the user provides one.""" + saved = {} + prompts = iter(["xoxb-test-token", "xapp-test-token", "", "C01ABC2DE3F"]) + + monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "") + monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v})) + monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts)) + monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False) + monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None) + + setup_mod._setup_slack() + + assert saved.get("SLACK_HOME_CHANNEL") == "C01ABC2DE3F" + + +def test_setup_slack_home_channel_empty_not_saved(monkeypatch): + """_setup_slack() does not save SLACK_HOME_CHANNEL when left blank.""" + saved = {} + prompts = iter(["xoxb-test-token", "xapp-test-token", "", ""]) + + monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "") + monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v})) + monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts)) + monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False) + monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None) + + setup_mod._setup_slack() + + assert "SLACK_HOME_CHANNEL" not in saved From 8a4fe80f8df35fdd70ecf81284cbcd6940f2157c Mon Sep 17 00:00:00 2001 From: Aleksandr Pasevin Date: Sun, 3 May 2026 08:15:42 +0000 Subject: [PATCH 046/171] fix(signal): skip reactions for unauthorized senders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The on_processing_start hook fired a reaction emoji (👀) on every inbound Signal message before run.py's _is_user_authorized check. This meant contacts not in SIGNAL_ALLOWED_USERS would see the bot react to their messages even though Hermes silently dropped them — leaking the presence of the bot and causing confusing UX. Two changes to gateway/platforms/signal.py: 1. Read SIGNAL_ALLOWED_USERS into self.dm_allow_from in __init__ (mirrors the group_allow_from pattern already in place). 2. Add _reactions_enabled(event) — two-gate check: - SIGNAL_REACTIONS=false/0/no disables reactions globally - If SIGNAL_ALLOWED_USERS is set, only react to senders in the allowlist (skips unauthorized contacts) Both on_processing_start and on_processing_complete now call this guard before sending any reaction. Telegram already has an equivalent _reactions_enabled() guard (controlled by TELEGRAM_REACTIONS). This brings Signal to parity. --- gateway/platforms/signal.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 77d3c18cb61..a0053317f7e 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -192,6 +192,15 @@ class SignalAdapter(BasePlatformAdapter): group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "") self.group_allow_from = set(_parse_comma_list(group_allowed_str)) + # DM allowlist — mirrors SIGNAL_ALLOWED_USERS checked by run.py. + # Stored here so the reaction hooks can skip unauthorized senders + # (reactions fire before run.py's auth gate, so without this check + # every inbound DM from any contact gets a 👀 reaction). + # "*" means all users allowed (open mode); empty means no restriction + # recorded at adapter level (run.py still enforces auth separately). + dm_allowed_str = os.getenv("SIGNAL_ALLOWED_USERS", "*") + self.dm_allow_from = set(_parse_comma_list(dm_allowed_str)) + # HTTP client self.client: Optional[httpx.AsyncClient] = None @@ -1430,8 +1439,28 @@ class SignalAdapter(BasePlatformAdapter): return None return (author, ts) + def _reactions_enabled(self, event: "MessageEvent" = None) -> bool: + """Check if message reactions are enabled for this event. + + Two gates: + 1. SIGNAL_REACTIONS env var — set to false/0/no to disable globally. + 2. DM allowlist — if SIGNAL_ALLOWED_USERS is set, only react to + messages from senders in that list. This prevents unauthorized + contacts from seeing the 👀 reaction (which fires before run.py's + auth gate and would otherwise reveal that a bot is listening). + """ + if os.getenv("SIGNAL_REACTIONS", "true").lower() in ("false", "0", "no"): + return False + if event is not None: + sender = getattr(getattr(event, "source", None), "user_id", None) + if sender and "*" not in self.dm_allow_from and sender not in self.dm_allow_from: + return False + return True + async def on_processing_start(self, event: MessageEvent) -> None: """React with 👀 when processing begins.""" + if not self._reactions_enabled(event): + return target = self._extract_reaction_target(event) if target: await self.send_reaction(event.source.chat_id, "👀", *target) @@ -1442,6 +1471,8 @@ class SignalAdapter(BasePlatformAdapter): On CANCELLED we leave the 👀 in place — no terminal outcome means the reaction should keep reflecting "in progress" (matches Telegram). """ + if not self._reactions_enabled(event): + return if outcome == ProcessingOutcome.CANCELLED: return target = self._extract_reaction_target(event) From e8cdcf532882edb575f3dca4b0e53adf5c416b69 Mon Sep 17 00:00:00 2001 From: Byrn Tong <26782336+cixuuz@users.noreply.github.com> Date: Sun, 3 May 2026 09:08:01 +0000 Subject: [PATCH 047/171] fix: exclude ancestor PIDs from gateway process scan (#13242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _scan_gateway_pids() uses ps-based pattern matching to find running gateways. When invoked from the CLI (e.g. `hermes gateway status`), the calling process itself matches gateway patterns, causing false positives — the CLI is mistakenly counted as a running gateway. Add _get_ancestor_pids() that walks the process tree from the current PID up to init (PID 1). Merge this set into exclude_pids at the top of _scan_gateway_pids() so the entire ancestor chain is filtered out. This complements the existing os.getpid() exclusion in _append_unique_pid() by also covering parent/grandparent processes (e.g. when hermes is invoked via a wrapper script or shell). Closes #13242 --- hermes_cli/gateway.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 39fc7476bc0..c7abea5bad4 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -237,6 +237,26 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool: return False +def _get_ancestor_pids() -> set[int]: + """Return the set of PIDs in the current process's ancestor chain. + + Walks from the current PID up to PID 1 (init) so that process-table scans + never match the calling CLI process or any of its parents. This prevents + ``hermes gateway status`` from falsely counting the ``hermes`` CLI that + invoked it as a running gateway instance (see #13242). + """ + ancestors: set[int] = set() + pid = os.getpid() + # Cap iterations to avoid infinite loops on exotic platforms. + for _ in range(64): + ancestors.add(pid) + parent = _get_parent_pid(pid) + if parent is None or parent <= 0 or parent in ancestors: + break + pid = parent + return ancestors + + def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None: if pid is None or pid <= 0: return @@ -252,6 +272,10 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li a live gateway when the PID file is stale/missing, and ``--all`` sweeps can discover gateways outside the current profile. """ + # Exclude the entire ancestor chain so the CLI process that invoked this + # scan (e.g. ``hermes gateway status``) is never mistaken for a running + # gateway. See #13242. + exclude_pids = exclude_pids | _get_ancestor_pids() pids: list[int] = [] patterns = [ "hermes_cli.main gateway", From c8575925589dac37dcfbe7e92ac87d07c3e9c3f1 Mon Sep 17 00:00:00 2001 From: holynn Date: Sun, 3 May 2026 20:19:49 +0800 Subject: [PATCH 048/171] fix(cli): allow custom:* provider slugs in model validation Two related fixes for custom_providers model switching: 1. validate_requested_model() now recognizes custom: slugs (e.g. custom:volcengine) as custom endpoints, not generic providers. Previously only the bare 'custom' slug matched the relaxed validation branch, causing model validation to fail with 'not found in provider listing' for all named custom providers. 2. switch_model() now consults the custom_providers list when deciding whether to override a validation rejection. If the requested model matches the entry's 'model' field or any key in its 'models' dict, the switch is accepted even when the remote /v1/models endpoint does not list it. Both changes are covered by existing tests (86 passed). --- hermes_cli/model_switch.py | 20 ++++++++++++++++++++ hermes_cli/models.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 4c323145da6..61693b73947 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -904,6 +904,26 @@ def switch_model( if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)): override = True break + # Also check custom_providers list — models declared there should be accepted + # even if the remote /v1/models endpoint doesn't list them. + if not override and custom_providers and isinstance(custom_providers, list): + for entry in custom_providers: + if not isinstance(entry, dict): + continue + # Match by provider slug (custom:) or by base_url + entry_name = entry.get("name", "") + entry_slug = f"custom:{entry_name}" if entry_name else "" + entry_url = entry.get("base_url", "") + if entry_slug == target_provider or entry_url == base_url: + # Check if the requested model matches the entry's model + entry_model = entry.get("model", "") + entry_models = entry.get("models", {}) + if new_model == entry_model: + override = True + break + if isinstance(entry_models, dict) and new_model in entry_models: + override = True + break if override: validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")} else: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 755bac72e3f..d7bae9ab09e 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3087,7 +3087,7 @@ def validate_requested_model( "message": f"Model `{requested}` was not found in LM Studio's model listing.", } - if normalized == "custom": + if normalized == "custom" or normalized.startswith("custom:"): # Try probing with correct auth for the api_mode. if api_mode == "anthropic_messages": probe = probe_api_models(api_key, base_url, api_mode=api_mode) From 74c997d9851581f169b35e33a633255f40bcbc6b Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Sun, 3 May 2026 22:02:24 +1000 Subject: [PATCH 049/171] fix(gateway): move quick-command dispatch before built-in handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quick commands of type "alias" that target built-in slash commands (e.g. /h -> /model) were processed too late in _handle_message — after the if-canonical=="model" checks. This meant alias expansion never reached the target handler and fell through to the LLM as raw text. Two fixes: 1. Move the quick_commands block before built-in dispatch so alias targets (like /model) hit the correct handler after expansion. 2. Extract bare command name from target_command via .split()[0] to feed _resolve_cmd() correctly (was using the full arg-string). --- gateway/run.py | 24 +++++++++++++++++++++++- tests/e2e/test_platform_commands.py | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/gateway/run.py b/gateway/run.py index d4f2ba8d253..f023b0d3497 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5138,6 +5138,28 @@ class GatewayRunner: _cmd_def = _resolve_cmd(command) if command else None canonical = _cmd_def.name if _cmd_def else command + # Expand alias quick commands before built-in dispatch so targets like + # /model openai/gpt-5.5 --provider openrouter reach the /model handler. + # Preserve built-in precedence; aliases only need early handling when + # the typed command is not already known. + if command and _cmd_def is None: + if isinstance(self.config, dict): + quick_commands = self.config.get("quick_commands", {}) or {} + else: + quick_commands = getattr(self.config, "quick_commands", {}) or {} + if isinstance(quick_commands, dict) and command in quick_commands: + qcmd = quick_commands[command] + if qcmd.get("type") == "alias": + target = qcmd.get("target", "").strip() + if target: + target = target if target.startswith("/") else f"/{target}" + target_command = target.lstrip("/") + user_args = event.get_command_args().strip() + event.text = f"{target} {user_args}".strip() + command = target_command.split()[0] if target_command else target_command + _cmd_def = _resolve_cmd(command) if command else None + canonical = _cmd_def.name if _cmd_def else command + # Fire the ``command:`` hook for any recognized slash # command — built-in OR plugin-registered. Handlers can return a # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}`` @@ -5351,7 +5373,7 @@ class GatewayRunner: target_command = target.lstrip("/") user_args = event.get_command_args().strip() event.text = f"{target} {user_args}".strip() - command = target_command + command = target_command.split()[0] if target_command else target_command # Fall through to normal command dispatch below else: return f"Quick command '/{command}' has no target defined." diff --git a/tests/e2e/test_platform_commands.py b/tests/e2e/test_platform_commands.py index b891ea7372d..4924eed6a9e 100644 --- a/tests/e2e/test_platform_commands.py +++ b/tests/e2e/test_platform_commands.py @@ -138,6 +138,29 @@ class TestSlashCommands: response_text = send.call_args[1].get("content") or send.call_args[0][1] assert "compress" in response_text.lower() or "context" in response_text.lower() + @pytest.mark.asyncio + async def test_quick_command_alias_targets_builtin_command_with_args( + self, adapter, runner, platform + ): + """Alias targets with args must reach the built-in command handler.""" + runner.config.quick_commands = { + "s": {"type": "alias", "target": "/status extra-arg"} + } + async def _handle_status(event): + assert event.get_command_args() == "extra-arg" + return "status via alias" + + runner._handle_status_command = AsyncMock(side_effect=_handle_status) + + send = await send_and_capture(adapter, "/s", platform) + + send.assert_called_once() + response_text = send.call_args[1].get("content") or send.call_args[0][1] + assert response_text == "status via alias" + runner._handle_status_command.assert_awaited_once() + runner._handle_message_with_agent.assert_not_awaited() + + class TestSessionLifecycle: """Verify session state changes across command sequences.""" From 9c93fc5775c872d9a10b5b56c3b9fcc59946f742 Mon Sep 17 00:00:00 2001 From: liuhao1024 Date: Mon, 4 May 2026 00:04:58 +0800 Subject: [PATCH 050/171] fix(tui): call process.exit(0) after Ink exit to trigger terminal cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ink's exit() calls unmount() which resets terminal modes (kitty keyboard, mouse, etc.) but does NOT call process.exit(). The Node process stays alive because stdin is still open (Ink listens on it), so the process.on('exit') handler in entry.tsx — which sends the final resetTerminalModes() — never fires. This left kitty keyboard protocol and other terminal modes enabled in the parent shell after /quit, Ctrl+C, or Ctrl+D, breaking arrow keys and other input in subsequent programs. Add explicit process.exit(0) after exit() in die() so the process actually terminates and the exit handler runs. Fixes #19194 --- ui-tui/src/app/useMainApp.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 9ec18337bbd..17924ca4a69 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -358,6 +358,13 @@ export function useMainApp(gw: GatewayClient) { const die = useCallback(() => { gw.kill() exit() + // Ink's exit() calls unmount() which resets terminal modes but does NOT + // call process.exit(). Without an explicit exit the Node process stays + // alive (stdin listener keeps the event loop open), so the process.on('exit') + // handler in entry.tsx — which sends the final resetTerminalModes() — never + // fires. This leaves kitty keyboard protocol, mouse modes, etc. enabled + // in the parent shell. See issue #19194. + process.exit(0) }, [exit, gw]) const session = useSessionLifecycle({ From 06031229e8d5efc312be4751909a1270dd444610 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 01:40:39 -0700 Subject: [PATCH 051/171] fix(tests): tolerate ps ancestor-walk in find_gateway_pids fallback test (#19590) Follow-up to #19586 (@cixuuz salvage): _get_ancestor_pids walks ps -o ppid= up the process tree, which the pre-existing mock in test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails didn't expect. Return empty stdout so the ancestor loop terminates cleanly and the original fallback assertion still passes. --- tests/hermes_cli/test_gateway.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index 0a44ac95326..6dfbd636f4c 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -310,6 +310,10 @@ def test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails(monkey def fake_run(cmd, **kwargs): if cmd[:4] == ["ps", "-A", "eww", "-o"]: return SimpleNamespace(returncode=1, stdout="", stderr="ps failed") + if cmd[:3] == ["ps", "-o", "ppid="]: + # _get_ancestor_pids() walks up the tree; return "no parent" so + # the loop terminates cleanly. + return SimpleNamespace(returncode=1, stdout="", stderr="") raise AssertionError(f"Unexpected command: {cmd}") monkeypatch.setattr(gateway.subprocess, "run", fake_run) From d8be50d772f45619e1baac988c2e2b5313fb1a74 Mon Sep 17 00:00:00 2001 From: qiqufang Date: Sun, 3 May 2026 20:57:35 +0800 Subject: [PATCH 052/171] fix(web): add missing icons for config page category sidebar Add icon mappings for 9 categories that fell back to FileQuestion: - bedrock (Cloud), curator (Sparkles), kanban (LayoutDashboard) - model_catalog (BookOpen), openrouter (Route), sessions (History) - tool_loop_guardrails (Shield), tool_output (FileOutput), updates (RefreshCw) --- web/src/pages/ConfigPage.tsx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx index 1a8be51e654..6fc510cc05f 100644 --- a/web/src/pages/ConfigPage.tsx +++ b/web/src/pages/ConfigPage.tsx @@ -27,6 +27,15 @@ import { Wrench, FileQuestion, Filter, + Cloud, + Sparkles, + LayoutDashboard, + BookOpen, + Route, + History, + Shield, + FileOutput, + RefreshCw, } from "lucide-react"; import { api } from "@/lib/api"; import { getNestedValue, setNestedValue } from "@/lib/nested"; @@ -66,6 +75,15 @@ const CATEGORY_ICONS: Record< logging: ClipboardList, discord: MessageCircle, auxiliary: Wrench, + bedrock: Cloud, + curator: Sparkles, + kanban: LayoutDashboard, + model_catalog: BookOpen, + openrouter: Route, + sessions: History, + tool_loop_guardrails: Shield, + tool_output: FileOutput, + updates: RefreshCw, }; function CategoryIcon({ From 8bdec8088204a321b15dabe7c0df731ad3a66ae1 Mon Sep 17 00:00:00 2001 From: ai-ag2026 <261867348+ai-ag2026@users.noreply.github.com> Date: Sun, 3 May 2026 16:01:28 +0200 Subject: [PATCH 053/171] fix(agent): surface preflight compression status Preflight compression can run synchronously before the first model call when a loaded session exceeds the active context threshold. Gateway users saw no visible progress while the compression LLM call was in flight, which can look like a dropped message during long compactions.\n\nEmit the existing lifecycle status through _emit_status before starting preflight compression so CLI, gateway, and WebUI status callbacks all get immediate feedback.\n\nAdds a regression assertion for the preflight path. --- run_agent.py | 10 +++++----- tests/run_agent/test_413_compression.py | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/run_agent.py b/run_agent.py index cfcd325eb61..2e0baa2a473 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10623,11 +10623,11 @@ class AIAgent: self.model, f"{self.context_compressor.context_length:,}", ) - if not self.quiet_mode: - self._safe_print( - f"📦 Preflight compression: ~{_preflight_tokens:,} tokens " - f">= {self.context_compressor.threshold_tokens:,} threshold" - ) + self._emit_status( + f"📦 Preflight compression: ~{_preflight_tokens:,} tokens " + f">= {self.context_compressor.threshold_tokens:,} threshold. " + "This may take a moment." + ) # May need multiple passes for very large sessions with small # context windows (each pass summarises the middle N turns). for _pass in range(3): diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py index 8bd357d3d28..5410f196e65 100644 --- a/tests/run_agent/test_413_compression.py +++ b/tests/run_agent/test_413_compression.py @@ -432,6 +432,8 @@ class TestPreflightCompression: ok_resp = _mock_response(content="After preflight", finish_reason="stop") agent.client.chat.completions.create.side_effect = [ok_resp] + status_messages = [] + agent.status_callback = lambda ev, msg: status_messages.append((ev, msg)) with ( patch.object(agent, "_compress_context") as mock_compress, @@ -460,6 +462,10 @@ class TestPreflightCompression: ) assert result["completed"] is True assert result["final_response"] == "After preflight" + assert any( + ev == "lifecycle" and "Preflight compression" in msg + for ev, msg in status_messages + ) def test_no_preflight_when_under_threshold(self, agent): """When history fits within context, no preflight compression needed.""" From c653f5dc3f4d5602eef3bd56780fdd378fd9acca Mon Sep 17 00:00:00 2001 From: Yanzhong Su Date: Sun, 3 May 2026 17:26:52 +0100 Subject: [PATCH 054/171] Clarify session_search auxiliary model docs --- tools/session_search_tool.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index f770fe88869..c043ede6a78 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -3,7 +3,9 @@ Session Search Tool - Long-Term Conversation Recall Searches past session transcripts in SQLite via FTS5, then summarizes the top -matching sessions using a cheap/fast model (same pattern as web_extract). +matching sessions using the configured auxiliary session_search model (same +pattern as web_extract). By default, auxiliary "auto" routing uses the main +chat provider/model unless the user overrides auxiliary.session_search. Returns focused summaries of past conversations rather than raw transcripts, keeping the main model's context window clean. @@ -11,7 +13,7 @@ Flow: 1. FTS5 search finds matching messages ranked by relevance 2. Groups by session, takes the top N unique sessions (default 3) 3. Loads each session's conversation, truncates to ~100k chars centered on matches - 4. Sends to Gemini Flash with a focused summarization prompt + 4. Sends to the configured auxiliary model with a focused summarization prompt 5. Returns per-session summaries with metadata """ @@ -330,7 +332,8 @@ def session_search( """ Search past sessions and return focused summaries of matching conversations. - Uses FTS5 to find matches, then summarizes the top sessions with Gemini Flash. + Uses FTS5 to find matches, then summarizes the top sessions with the + configured auxiliary session_search model. The current session is excluded from results since the agent already has that context. """ if db is None: From 45fd45103d6cb3d8a1910aad6ec9de7e3d55e4fb Mon Sep 17 00:00:00 2001 From: WanderWang Date: Mon, 4 May 2026 00:39:21 +0800 Subject: [PATCH 055/171] fix: _chromium_installed() now checks AGENT_BROWSER_EXECUTABLE_PATH and system Chrome Before this fix, _chromium_installed() only searched Playwright-style chromium-* / chromium_headless_shell-* directories, which meant users with system Chrome or AGENT_BROWSER_EXECUTABLE_PATH configured still had all browser_* tools gated. Now checks three sources in priority order: 1. AGENT_BROWSER_EXECUTABLE_PATH env var (if set and points to a real binary) 2. System Chrome/Chromium via shutil.which() (google-chrome, chromium-browser, chrome) 3. Playwright browser cache (existing logic, kept as fallback) Closes #19294 --- tools/browser_tool.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 5cd431de317..f9ca1a0af1a 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2757,17 +2757,40 @@ def _chromium_search_roots() -> List[str]: def _chromium_installed() -> bool: """Return True when a usable Chromium (or headless-shell) build is on disk. + Checks, in order: + + 1. ``AGENT_BROWSER_EXECUTABLE_PATH`` env var — the official way to point + agent-browser at a pre-installed Chrome/Chromium. + 2. System Chrome/Chromium in PATH (``google-chrome``, ``chromium-browser``, + ``chrome``). + 3. Playwright's browser cache (current logic) — directories containing + ``chromium-*`` or ``chromium_headless_shell-*``. + agent-browser (0.26+) downloads Playwright's chromium / headless-shell - builds into ``PLAYWRIGHT_BROWSERS_PATH`` and won't start without them. - When the CLI is present but no browser build is, the first browser tool - call hangs for the full command timeout (often ~30s each) before - surfacing a useless error. Guarding the tool behind this check prevents - advertising a capability that will fail at runtime. + builds into ``PLAYWRIGHT_BROWSERS_PATH`` and won't start without at least + one of the three above being present. Without a browser binary the CLI + hangs on first use until the command timeout fires (often ~30s). Guarding + the tool behind this check prevents advertising a capability that will + fail at runtime. """ global _cached_chromium_installed if _cached_chromium_installed is not None: return _cached_chromium_installed + # 1. AGENT_BROWSER_EXECUTABLE_PATH — explicit user-configured browser + ab_path = os.environ.get("AGENT_BROWSER_EXECUTABLE_PATH", "").strip() + if ab_path: + if os.path.isfile(ab_path) or shutil.which(ab_path): + _cached_chromium_installed = True + return True + + # 2. System Chrome/Chromium in PATH (common names) + system_chrome = shutil.which("google-chrome") or shutil.which("chromium-browser") or shutil.which("chrome") + if system_chrome: + _cached_chromium_installed = True + return True + + # 3. Playwright browser cache (legacy — chromium-* / chromium_headless_shell-* dirs) for root in _chromium_search_roots(): if not root or not os.path.isdir(root): continue From cdde0c841190613564e86815d0ba84c4d3e654c5 Mon Sep 17 00:00:00 2001 From: yuehei Date: Sun, 3 May 2026 21:52:26 +0800 Subject: [PATCH 056/171] fix(feishu): enable MEDIA attachment delivery in send_message tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _send_feishu() function already supports media_files (images, video, audio, documents) via the adapter's send_image_file/send_video/send_voice /send_document methods, but _send_to_platform() never routed Feishu into the early media-handling branch — media attachments were silently dropped with a "not supported" warning. Add a Feishu-specific media branch (matching the existing Yuanbao/Signal pattern) so that MEDIA: tags in send_message calls are correctly delivered as native Feishu attachments. Also update the two error/warning message strings to include feishu in the supported platform list. Co-Authored-By: Claude Opus 4.6 (1M context) --- tools/send_message_tool.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 0ad30d6dcbd..b4de998fe5e 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -588,11 +588,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, last_result = result return last_result + # --- Feishu: native media attachment support via adapter --- + if platform == Platform.FEISHU and media_files: + last_result = None + for i, chunk in enumerate(chunks): + is_last = (i == len(chunks) - 1) + result = await _send_feishu( + pconfig, + chat_id, + chunk, + media_files=media_files if is_last else None, + thread_id=thread_id, + ) + if isinstance(result, dict) and result.get("error"): + return result + last_result = result + return last_result + # --- Non-media platforms --- if media_files and not message.strip(): return { "error": ( - f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal and yuanbao; " + f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu; " f"target {platform.value} had only media attachments" ) } @@ -600,7 +617,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, if media_files: warning = ( f"MEDIA attachments were omitted for {platform.value}; " - "native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal and yuanbao" + "native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu" ) last_result = None From 2c7d7a9b2f75593e7949a8ed64883e6425df9684 Mon Sep 17 00:00:00 2001 From: memosr Date: Mon, 4 May 2026 01:10:50 +0300 Subject: [PATCH 057/171] fix(security): bind Meet node server to localhost and restrict token file to owner read --- plugins/google_meet/node/server.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/plugins/google_meet/node/server.py b/plugins/google_meet/node/server.py index a0d802dfdc2..cff01d265ff 100644 --- a/plugins/google_meet/node/server.py +++ b/plugins/google_meet/node/server.py @@ -43,7 +43,7 @@ class NodeServer: def __init__( self, - host: str = "0.0.0.0", + host: str = "127.0.0.1", port: int = 18789, token_path: Optional[Path] = None, display_name: str = "hermes-meet-node", @@ -76,6 +76,13 @@ class NodeServer: json.dumps({"token": tok, "generated_at": time.time()}, indent=2), encoding="utf-8", ) + # Restrict to owner-read-write only — the token grants full RPC + # access to the meet bot (start, transcribe, speak in meetings). + try: + tmp.chmod(0o600) + except (OSError, NotImplementedError): + # Best-effort on non-POSIX filesystems; mode is set on POSIX. + pass tmp.replace(self.token_path) self._token = tok return tok From aede94e7573fc47cf4837f1812fe4d75a142eac3 Mon Sep 17 00:00:00 2001 From: SHL0MS Date: Mon, 30 Mar 2026 23:03:33 -0400 Subject: [PATCH 058/171] fix: back up config.yaml before hermes setup modifies it Create a timestamped backup (~/.hermes/config.yaml.bak.YYYYMMDD_HHMMSS) before the setup wizard runs any configuration sections. After setup completes, show the backup path and a restore command. This protects user-customized values (compression thresholds, provider routing, PII redaction, auxiliary model configs) from being silently overwritten by setup defaults. Addresses #3522 --- hermes_cli/setup.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 6b51f810594..e8c2b3b6fc0 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -3003,6 +3003,21 @@ def run_setup_wizard(args): config = load_config() hermes_home = get_hermes_home() + # Back up existing config before setup modifies it (#3522) + config_path = get_config_path() + if config_path.exists(): + from datetime import datetime as _dt + _backup_path = config_path.with_suffix( + f".yaml.bak.{_dt.now().strftime('%Y%m%d_%H%M%S')}" + ) + try: + import shutil + shutil.copy2(config_path, _backup_path) + except Exception: + _backup_path = None + else: + _backup_path = None + # Detect non-interactive environments (headless SSH, Docker, CI/CD) non_interactive = getattr(args, 'non_interactive', False) if not non_interactive and not is_interactive_stdin(): @@ -3172,6 +3187,10 @@ def run_setup_wizard(args): # Save and show summary save_config(config) + if _backup_path and _backup_path.exists(): + print_info(f"Previous config backed up to: {_backup_path}") + print_info("If setup changed a value you customized, restore it with:") + print_info(f" cp {_backup_path} {config_path}") _print_setup_summary(config, hermes_home) _offer_launch_chat() From 8a364df2c829a4619893195a5dde1f9f64dbe2db Mon Sep 17 00:00:00 2001 From: Asunfly Date: Sun, 3 May 2026 21:10:30 +0800 Subject: [PATCH 059/171] fix: inherit reasoning config in API server runs --- gateway/platforms/api_server.py | 5 +++-- tests/gateway/test_api_server.py | 36 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index f1def35d9c7..b4d3ccb20cd 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -738,10 +738,11 @@ class APIServerAdapter(BasePlatformAdapter): gateway platforms), falling back to the hermes-api-server default. """ from run_agent import AIAgent - from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config + from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner from hermes_cli.tools_config import _get_platform_tools runtime_kwargs = _resolve_runtime_agent_kwargs() + reasoning_config = GatewayRunner._load_reasoning_config() model = _resolve_gateway_model() user_config = _load_gateway_config() @@ -751,7 +752,6 @@ class APIServerAdapter(BasePlatformAdapter): # Load fallback provider chain so the API server platform has the # same fallback behaviour as Telegram/Discord/Slack (fixes #4954). - from gateway.run import GatewayRunner fallback_model = GatewayRunner._load_fallback_model() agent = AIAgent( @@ -770,6 +770,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_complete_callback=tool_complete_callback, session_db=self._ensure_session_db(), fallback_model=fallback_model, + reasoning_config=reasoning_config, ) return agent diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 0bc2d043e35..d519eee278c 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -246,6 +246,42 @@ class TestAdapterInit: adapter = APIServerAdapter(config) assert adapter._port == 8642 + def test_create_agent_forwards_config_reasoning_effort(self, monkeypatch): + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr("run_agent.AIAgent", FakeAgent) + monkeypatch.setattr( + "gateway.run._resolve_runtime_agent_kwargs", + lambda: { + "provider": "openai-codex", + "base_url": "https://example.test/v1", + "api_mode": "codex_responses", + }, + ) + monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5.5") + monkeypatch.setattr( + "gateway.run._load_gateway_config", + lambda: {"agent": {"reasoning_effort": "xhigh"}}, + ) + monkeypatch.setattr( + "gateway.run.GatewayRunner._load_reasoning_config", + staticmethod(lambda: {"enabled": True, "effort": "xhigh"}), + ) + monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None)) + monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set()) + + adapter = APIServerAdapter(PlatformConfig(enabled=True)) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + + agent = adapter._create_agent(session_id="api-session") + + assert isinstance(agent, FakeAgent) + assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"} + # --------------------------------------------------------------------------- # Auth checking From cb33c73418520bf38d83995c6e914ff42277eb10 Mon Sep 17 00:00:00 2001 From: molvikar Date: Mon, 4 May 2026 01:45:52 +0300 Subject: [PATCH 060/171] fix(run_agent): gate iteration-limit provider routing to OpenRouter --- run_agent.py | 5 ++++- tests/run_agent/test_run_agent.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 2e0baa2a473..a6ea2b1e728 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10300,7 +10300,10 @@ class AIAgent: provider_preferences["order"] = self.providers_order if self.provider_sort: provider_preferences["sort"] = self.provider_sort - if provider_preferences: + if provider_preferences and ( + (self.provider or "").strip().lower() == "openrouter" + or self._is_openrouter_url() + ): summary_extra_body["provider"] = provider_preferences if summary_extra_body: diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index a0a11b15306..d663805f8f0 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2230,6 +2230,34 @@ class TestHandleMaxIterations: ] assert len(stub_ids) >= 1, f"No stub result for assistant tool_call: {stub_ids}" + def test_summary_omits_provider_preferences_for_non_openrouter(self, agent): + agent.base_url = "https://api.openai.com/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "openai" + agent.providers_allowed = ["Anthropic"] + agent.client.chat.completions.create.return_value = _mock_response(content="Summary") + agent._cached_system_prompt = "You are helpful." + + result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + assert "provider" not in kwargs.get("extra_body", {}) + + def test_summary_keeps_provider_preferences_for_openrouter(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "openrouter" + agent.providers_allowed = ["Anthropic"] + agent.client.chat.completions.create.return_value = _mock_response(content="Summary") + agent._cached_system_prompt = "You are helpful." + + result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] + def test_codex_summary_sanitizes_orphan_tool_results(self, agent): agent.api_mode = "codex_responses" agent.provider = "openai-codex" From 9faaa292b460ad8d8f46e79f907b972edeca9e50 Mon Sep 17 00:00:00 2001 From: nftpoetrist Date: Sun, 3 May 2026 11:35:12 +0300 Subject: [PATCH 061/171] fix(delegate): inherit parent fallback_chain in _build_child_agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _build_child_agent constructed child AIAgents without passing fallback_model, leaving _fallback_chain=[] for every subagent. When a subagent hit a rate-limit or credential exhaustion the runtime fallback check (run_agent.py:7486 / 12267) found an empty chain and failed immediately — even though the parent agent was configured with fallback_providers and would have recovered. The cron scheduler already propagates fallback_model correctly (scheduler.py:1038). Fix closes the parity gap by reading the parent's _fallback_chain (the normalised list form accepted by AIAgent's fallback_model parameter) and threading it through. Empty chains coerce to None so AIAgent initialises _fallback_chain=[] as usual rather than iterating an empty list. --- tests/tools/test_delegate.py | 47 ++++++++++++++++++++++++++++++++++++ tools/delegate_tool.py | 7 ++++++ 2 files changed, 54 insertions(+) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 1806a7e60fb..089c46da09b 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -2403,5 +2403,52 @@ class TestSubagentApprovalCallback(unittest.TestCase): self.assertIsNone(_get_approval_callback()) +class TestFallbackModelInheritance(unittest.TestCase): + """Subagents must inherit the parent's fallback provider chain.""" + + def test_child_inherits_fallback_chain(self): + """_build_child_agent passes parent._fallback_chain as fallback_model.""" + parent = _make_mock_parent(depth=0) + fallback_entry = {"provider": "openrouter", "model": "gpt-4o-mini", "api_key": "sk-or-x"} + parent._fallback_chain = [fallback_entry] + + with patch("run_agent.AIAgent") as MockAgent: + MockAgent.return_value = MagicMock() + _build_child_agent( + task_index=0, + goal="test fallback inheritance", + context=None, + toolsets=None, + model=None, + max_iterations=10, + parent_agent=parent, + task_count=1, + ) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["fallback_model"], [fallback_entry]) + + def test_child_gets_no_fallback_when_parent_chain_empty(self): + """When parent._fallback_chain is empty, fallback_model is None.""" + parent = _make_mock_parent(depth=0) + parent._fallback_chain = [] + + with patch("run_agent.AIAgent") as MockAgent: + MockAgent.return_value = MagicMock() + _build_child_agent( + task_index=0, + goal="test no fallback", + context=None, + toolsets=None, + model=None, + max_iterations=10, + parent_agent=parent, + task_count=1, + ) + + _, kwargs = MockAgent.call_args + self.assertIsNone(kwargs["fallback_model"]) + + if __name__ == "__main__": unittest.main() diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 844e7bdfb0e..55c8ad31c4f 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1026,6 +1026,12 @@ def _build_child_agent( except Exception as exc: logger.debug("Could not load delegation reasoning_effort: %s", exc) + # Inherit the parent's fallback provider chain so subagents can recover + # from rate-limits and credential exhaustion exactly like the top-level + # agent does. _fallback_chain is a list accepted by AIAgent's + # fallback_model parameter (which handles both list and dict forms). + parent_fallback = getattr(parent_agent, "_fallback_chain", None) or None + child = AIAgent( base_url=effective_base_url, api_key=effective_api_key, @@ -1038,6 +1044,7 @@ def _build_child_agent( max_tokens=getattr(parent_agent, "max_tokens", None), reasoning_config=child_reasoning, prefill_messages=getattr(parent_agent, "prefill_messages", None), + fallback_model=parent_fallback, enabled_toolsets=child_toolsets, quiet_mode=True, ephemeral_system_prompt=child_prompt, From deb59eab727c757d7ea9a239616582b25b531525 Mon Sep 17 00:00:00 2001 From: pdonizete <2013236+pdonizete@users.noreply.github.com> Date: Sat, 2 May 2026 17:35:24 -0300 Subject: [PATCH 062/171] fix: allow kanban tools for orchestrator profiles with kanban toolset The _check_kanban_mode() gating function only checked for HERMES_KANBAN_TASK env var, which is only set by the dispatcher when spawning workers. This prevented orchestrator profiles (like techlead) from using kanban_create, kanban_link, etc. even when they had 'kanban' explicitly in their toolsets config. Now uses load_config() from hermes_cli.config (which has mtime-based caching) to check if 'kanban' is in the profile's toolsets list. This enables orchestrators to route work via Kanban while workers continue using the dispatcher env var. Fixes #18968 --- tools/kanban_tools.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index de5d180c833..d0023a30784 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -40,13 +40,31 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- def _check_kanban_mode() -> bool: - """Tools are available iff the current process has ``HERMES_KANBAN_TASK`` - set in its env, which the dispatcher sets when spawning a worker. + """Tools are available when: - Humans running ``hermes chat`` see zero kanban tools. Workers spawned - by the kanban dispatcher (gateway-embedded by default) see all seven. + 1. ``HERMES_KANBAN_TASK`` is set (dispatcher-spawned worker), OR + 2. The current profile has ``kanban`` in its toolsets config + (orchestrator profiles like techlead that route work via Kanban). + + Humans running ``hermes chat`` without the kanban toolset see zero + kanban tools. Workers spawned by the kanban dispatcher (gateway- + embedded by default) and orchestrator profiles with the kanban + toolset enabled see all seven. """ - return bool(os.environ.get("HERMES_KANBAN_TASK")) + if os.environ.get("HERMES_KANBAN_TASK"): + return True + + # Check if the current profile has the kanban toolset enabled. + # Uses load_config() which has mtime-based caching, so this adds + # negligible overhead. The check_fn results are further TTL-cached + # (~30s) by the tool registry. + try: + from hermes_cli.config import load_config + cfg = load_config() + toolsets = cfg.get("toolsets", []) + return "kanban" in toolsets + except Exception: + return False # --------------------------------------------------------------------------- From cac4f2c0e6800628445f230195c150b5f225e945 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:00:13 -0700 Subject: [PATCH 063/171] test(kanban): update worker-prompt header assertion to match #19427 PR #19427 dropped the 'You are a Kanban worker' identity line from KANBAN_GUIDANCE so SOUL.md stays authoritative for profile identity. This test assertion was stale against that change; update it to the new protocol-only header. --- tests/tools/test_kanban_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index a7a8fda44db..1217e7c7387 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -467,8 +467,8 @@ def test_kanban_guidance_in_worker_prompt(monkeypatch, tmp_path): skip_memory=True, ) prompt = a._build_system_prompt() - # Header phrase - assert "You are a Kanban worker" in prompt + # Header phrase (identity-free — SOUL.md owns identity, layer 3 is protocol) + assert "Kanban task execution protocol" in prompt # Lifecycle signals assert "kanban_show()" in prompt assert "kanban_complete" in prompt From 21c7c9f0ca5f3c2fc5e1c64d4165879c004338a4 Mon Sep 17 00:00:00 2001 From: asheriif Date: Mon, 4 May 2026 09:07:37 +0000 Subject: [PATCH 064/171] fix(tui): harden plugin slash exec errors --- tests/tui_gateway/test_protocol.py | 63 ++++++++++++++++++++++++++++++ tui_gateway/server.py | 29 ++++++++------ 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 96df9823a61..a26a360a24d 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -421,6 +421,69 @@ def test_slash_exec_handles_plugin_commands_in_live_gateway(server): assert worker.calls == [] +def test_slash_exec_plugin_lookup_failure_falls_back_to_worker(server): + """Plugin discovery failures must not break ordinary slash-worker commands.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + side_effect=RuntimeError("discovery boom"), + ): + resp = server.handle_request({ + "id": "r-plugin-lookup-failure", + "method": "slash.exec", + "params": {"command": "help", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "worker:help"} + assert worker.calls == ["help"] + + +def test_slash_exec_plugin_handler_error_returns_output(server): + """Plugin handler failures return slash output so the TUI does not redispatch.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + def handler(arg): + raise RuntimeError(f"handler boom: {arg}") + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + lambda name: handler if name == "plugin-cmd" else None, + ): + resp = server.handle_request({ + "id": "r-plugin-handler-error", + "method": "slash.exec", + "params": {"command": "plugin-cmd hello", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "Plugin command error: handler boom: hello"} + assert worker.calls == [] + + @pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"]) def test_slash_exec_rejects_pending_input_commands(server, cmd): """slash.exec must reject commands that use _pending_input in the CLI.""" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index c59d358d748..fff8c8d51aa 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -5189,19 +5189,26 @@ def _(rid, params: dict) -> dict: except Exception: pass - try: - from hermes_cli.plugins import ( - get_plugin_command_handler, - resolve_plugin_command_result, - ) + plugin_handler = None + resolve_plugin_command_result = None + if _cmd_base: + try: + from hermes_cli.plugins import ( + get_plugin_command_handler, + resolve_plugin_command_result, + ) - if _cmd_base: plugin_handler = get_plugin_command_handler(_cmd_base) - if plugin_handler: - result = resolve_plugin_command_result(plugin_handler(_cmd_arg)) - return _ok(rid, {"output": str(result or "(no output)")}) - except Exception as e: - return _err(rid, 4018, f"plugin command error: {e}") + except Exception: + plugin_handler = None + resolve_plugin_command_result = None + + if plugin_handler and resolve_plugin_command_result: + try: + result = resolve_plugin_command_result(plugin_handler(_cmd_arg)) + return _ok(rid, {"output": str(result or "(no output)")}) + except Exception as e: + return _ok(rid, {"output": f"Plugin command error: {e}"}) worker = session.get("slash_worker") if not worker: From abcaf0522905ff849cc8241037f42fb669bcb664 Mon Sep 17 00:00:00 2001 From: LeonSGP43 Date: Sun, 3 May 2026 21:44:04 +0800 Subject: [PATCH 065/171] fix(skills): keep manual skills out of curator --- tests/agent/test_curator.py | 37 +++++++++++++++++++++++ tests/tools/test_skill_manager_tool.py | 3 ++ tests/tools/test_skill_usage.py | 32 +++++++++++++++----- tools/skill_manager_tool.py | 6 ++-- tools/skill_usage.py | 42 ++++++++++++++++++++------ 5 files changed, 102 insertions(+), 18 deletions(-) diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py index 45b96994562..69dc5f85786 100644 --- a/tests/agent/test_curator.py +++ b/tests/agent/test_curator.py @@ -154,6 +154,7 @@ def test_unused_skill_transitions_to_stale(curator_env): long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat() data = u.load_usage() data["old-skill"] = u._empty_record() + data["old-skill"]["created_by"] = "agent" data["old-skill"]["last_used_at"] = long_ago data["old-skill"]["created_at"] = long_ago u.save_usage(data) @@ -172,6 +173,7 @@ def test_very_old_skill_gets_archived(curator_env): super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat() data = u.load_usage() data["ancient"] = u._empty_record() + data["ancient"]["created_by"] = "agent" data["ancient"]["last_used_at"] = super_old data["ancient"]["created_at"] = super_old u.save_usage(data) @@ -192,6 +194,7 @@ def test_pinned_skill_is_never_touched(curator_env): super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() data = u.load_usage() data["precious"] = u._empty_record() + data["precious"]["created_by"] = "agent" data["precious"]["last_used_at"] = super_old data["precious"]["created_at"] = super_old data["precious"]["pinned"] = True @@ -214,6 +217,7 @@ def test_stale_skill_reactivates_on_recent_use(curator_env): recent = datetime.now(timezone.utc).isoformat() data = u.load_usage() data["revived"] = u._empty_record() + data["revived"]["created_by"] = "agent" data["revived"]["state"] = "stale" data["revived"]["last_used_at"] = recent data["revived"]["created_at"] = recent @@ -240,6 +244,27 @@ def test_new_skill_without_last_used_not_immediately_archived(curator_env): assert (skills_dir / "fresh").exists() +def test_manual_skill_is_not_auto_archived(curator_env): + """Manual skills can have usage records, but without the agent-created + marker they must stay out of curator transitions.""" + c = curator_env["curator"] + u = curator_env["usage"] + skills_dir = curator_env["home"] / "skills" + skill_dir = _write_skill(skills_dir, "manual") + + super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() + data = u.load_usage() + data["manual"] = u._empty_record() + data["manual"]["last_used_at"] = super_old + data["manual"]["created_at"] = super_old + u.save_usage(data) + + counts = c.apply_automatic_transitions() + assert counts["checked"] == 0 + assert counts["archived"] == 0 + assert skill_dir.exists() + + def test_bundled_skill_not_touched_by_transitions(curator_env): c = curator_env["curator"] u = curator_env["usage"] @@ -267,8 +292,10 @@ def test_bundled_skill_not_touched_by_transitions(curator_env): def test_run_review_records_state(curator_env): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") result = c.run_curator_review(synchronous=True) assert "started_at" in result @@ -284,8 +311,10 @@ def test_dry_run_does_not_advance_state(curator_env, monkeypatch): `hermes curator status`. Fixes #18373. """ c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") # Stub the LLM so the test doesn't need a provider. monkeypatch.setattr( @@ -311,8 +340,10 @@ def test_dry_run_injects_report_only_banner(curator_env, monkeypatch): skips automatic transitions — but the LLM prompt is the only guard against the model calling skill_manage directly.""" c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") captured = {} def _stub(prompt): @@ -331,8 +362,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch): archives skills deterministically, and a preview must not touch the filesystem.""" c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") called = {"n": 0} def _explode(*_a, **_kw): @@ -351,8 +384,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch): def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") calls = [] def _stub(prompt): @@ -409,8 +444,10 @@ def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch): def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") # Seed last_run_at far in the past so the interval gate opens — the # "no state" path intentionally defers the first run now (#18373). long_ago = datetime.now(timezone.utc) - timedelta(hours=c.get_interval_hours() * 2) diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 004924b9f42..934215d9450 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -533,8 +533,11 @@ class TestSkillManageDispatcher: def test_full_create_via_dispatcher(self, tmp_path): with _skill_dir(tmp_path): raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT) + from tools.skill_usage import load_usage + usage = load_usage() result = json.loads(raw) assert result["success"] is True + assert usage["test-skill"]["created_by"] == "agent" def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path): # Dispatcher must plumb absorbed_into through to _delete_skill so the diff --git a/tests/tools/test_skill_usage.py b/tests/tools/test_skill_usage.py index 7dd92eb18c8..b66e2bba765 100644 --- a/tests/tools/test_skill_usage.py +++ b/tests/tools/test_skill_usage.py @@ -194,10 +194,11 @@ def test_forget_removes_record(skills_home): # --------------------------------------------------------------------------- def test_agent_created_excludes_bundled(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "bundled-skill", category="github") _write_skill(skills_dir, "my-skill") + mark_agent_created("my-skill") # Seed a bundled manifest marking bundled-skill as upstream (skills_dir / ".bundled_manifest").write_text( "bundled-skill:abc123\n", encoding="utf-8", @@ -208,10 +209,11 @@ def test_agent_created_excludes_bundled(skills_home): def test_agent_created_excludes_hub_installed(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "hub-skill") _write_skill(skills_dir, "my-skill") + mark_agent_created("my-skill") hub_dir = skills_dir / ".hub" hub_dir.mkdir() (hub_dir / "lock.json").write_text( @@ -238,9 +240,10 @@ def test_is_agent_created(skills_home): def test_agent_created_skips_archive_and_hub_dirs(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "real-skill") + mark_agent_created("real-skill") # Dot-prefixed dirs must be ignored even if they contain SKILL.md archive = skills_dir / ".archive" / "old-skill" archive.mkdir(parents=True) @@ -368,27 +371,41 @@ def test_archive_collision_gets_suffix(skills_home): # Reporting # --------------------------------------------------------------------------- -def test_agent_created_report_includes_defaults(skills_home): - from tools.skill_usage import agent_created_report, bump_view +def test_agent_created_report_includes_marked_skills_with_defaults(skills_home): + from tools.skill_usage import agent_created_report, bump_view, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "a") _write_skill(skills_dir, "b") + mark_agent_created("a") + mark_agent_created("b") bump_view("a") rows = agent_created_report() by_name = {r["name"]: r for r in rows} assert "a" in by_name and "b" in by_name assert by_name["a"]["view_count"] == 1 - # b has no usage record yet — must still appear with defaults + # b has only the provenance marker — activity fields still default. assert by_name["b"]["view_count"] == 0 assert by_name["b"]["state"] == "active" +def test_manual_skill_with_usage_is_not_curator_managed(skills_home): + from tools.skill_usage import agent_created_report, bump_view, list_agent_created_skill_names + skills_dir = skills_home / "skills" + _write_skill(skills_dir, "manual-skill") + + bump_view("manual-skill") + + assert "manual-skill" not in list_agent_created_skill_names() + assert "manual-skill" not in {r["name"] for r in agent_created_report()} + + def test_agent_created_report_excludes_bundled_and_hub(skills_home): - from tools.skill_usage import agent_created_report + from tools.skill_usage import agent_created_report, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "mine") _write_skill(skills_dir, "bundled") _write_skill(skills_dir, "hubbed") + mark_agent_created("mine") (skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8") hub = skills_dir / ".hub" hub.mkdir() @@ -414,6 +431,7 @@ def test_agent_created_report_derives_activity_from_view_and_patch(skills_home, ]) monkeypatch.setattr(skill_usage, "_now_iso", lambda: next(timestamps)) + skill_usage.mark_agent_created("mine") skill_usage.bump_view("mine") skill_usage.bump_patch("mine") diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index d8d44f1a8b7..e7d264de67e 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -786,8 +786,10 @@ def skill_manage( # that mutate an existing skill's guidance), drop the record on delete. # Best-effort; telemetry failures never break the tool. try: - from tools.skill_usage import bump_patch, forget - if action in ("patch", "edit", "write_file", "remove_file"): + from tools.skill_usage import bump_patch, forget, mark_agent_created + if action == "create": + mark_agent_created(name) + elif action in ("patch", "edit", "write_file", "remove_file"): bump_patch(name) elif action == "delete": forget(name) diff --git a/tools/skill_usage.py b/tools/skill_usage.py index 8a4a1aa4252..0491f1d8b1a 100644 --- a/tools/skill_usage.py +++ b/tools/skill_usage.py @@ -11,8 +11,9 @@ Design notes: - Atomic writes via tempfile + os.replace (same pattern as .bundled_manifest). - All counter bumps are best-effort: failures log at DEBUG and return silently. A broken sidecar never breaks the underlying tool call. - - Provenance filter: "agent-created" == not in .bundled_manifest AND not in - .hub/lock.json. The curator only ever mutates agent-created skills. + - Provenance filter: curator-managed skills are explicitly marked when + created through skill_manage. Bundled / hub-installed skills stay + off-limits, and manually authored skills are not inferred from location. Lifecycle states: active -> default @@ -149,11 +150,13 @@ def _read_hub_installed_names() -> Set[str]: def list_agent_created_skill_names() -> List[str]: - """Enumerate skills that were authored by the agent (or user), NOT by a - bundled or hub-installed source. + """Enumerate skills explicitly authored by the agent. - The curator operates exclusively on this set. Bundled / hub skills are - maintained by their upstream sources and must never be pruned here. + The curator operates exclusively on this set. Skills are only eligible + after ``skill_manage(action="create")`` marks them in ``.usage.json``; + manually authored skills must not be inferred from filesystem location. + Bundled / hub skills are maintained by their upstream sources and must + never be pruned here. """ base = _skills_dir() if not base.exists(): @@ -161,6 +164,7 @@ def list_agent_created_skill_names() -> List[str]: bundled = _read_bundled_manifest_names() hub = _read_hub_installed_names() off_limits = bundled | hub + usage = load_usage() names: List[str] = [] # Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md @@ -176,6 +180,8 @@ def list_agent_created_skill_names() -> List[str]: name = _read_skill_name(skill_md, fallback=skill_md.parent.name) if name in off_limits: continue + if not _is_curator_managed_record(usage.get(name)): + continue names.append(name) return sorted(set(names)) @@ -207,12 +213,20 @@ def is_agent_created(skill_name: str) -> bool: return skill_name not in off_limits +def _is_curator_managed_record(record: Any) -> bool: + """Return True when a usage record opts a skill into curator management.""" + if not isinstance(record, dict): + return False + return record.get("created_by") == "agent" or record.get("agent_created") is True + + # --------------------------------------------------------------------------- # Sidecar I/O # --------------------------------------------------------------------------- def _empty_record() -> Dict[str, Any]: return { + "created_by": None, "use_count": 0, "view_count": 0, "last_used_at": None, @@ -287,9 +301,8 @@ def _mutate(skill_name: str, mutator) -> None: """Load, apply *mutator(record)* in place, save. Best-effort. Bundled and hub-installed skills are NEVER recorded in the sidecar. - This keeps .usage.json focused on agent-created skills (the only ones - the curator considers) and prevents stale counters from hanging around - for upstream-managed skills. + Local manual skills may still accrue usage telemetry, but they only + become curator-managed when ``created_by`` is explicitly marked. """ if not skill_name: return @@ -336,6 +349,17 @@ def bump_patch(skill_name: str) -> None: _mutate(skill_name, _apply) +def mark_agent_created(skill_name: str) -> None: + """Opt a skill created by skill_manage into curator management. + + Viewing or invoking a manually authored skill may still create telemetry, + but only this explicit marker makes it eligible for automatic curation. + """ + def _apply(rec: Dict[str, Any]) -> None: + rec["created_by"] = "agent" + _mutate(skill_name, _apply) + + def set_state(skill_name: str, state: str) -> None: """Set lifecycle state. No-op if *state* is invalid.""" if state not in _VALID_STATES: From baf834cc0fb52688908c458fcfab5a1821992b8c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:18:25 -0700 Subject: [PATCH 066/171] chore(release): map cine.dreamer.one@gmail.com to @LeonSGP43 --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 72abbd7ed72..048114428e4 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -459,6 +459,7 @@ AUTHOR_MAP = { "centripetal-star@users.noreply.github.com": "centripetal-star", "LeonSGP43@users.noreply.github.com": "LeonSGP43", "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "cine.dreamer.one@gmail.com": "LeonSGP43", "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", From 3e1559b91057fad4b80f03bcede39ae0afcb4b7e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:28:59 -0700 Subject: [PATCH 067/171] chore(release): AUTHOR_MAP entries for Tier 1c salvage batch Pre-adds author-email mappings for upcoming Tier 1c salvage PRs (small Apr 24-25 fixes). --- scripts/release.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 048114428e4..3bd491d27cd 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -651,6 +651,22 @@ AUTHOR_MAP = { "yanzh.su@gmail.com": "YanzhongSu", "wanderwang@users.noreply.github.com": "WanderWang", "yueheime@gmail.com": "yuehei", + "emidomh@gmail.com": "Emidomenge", + "2642448440@qq.com": "BlackJulySnow", + "4317663+helix4u@users.noreply.github.com": "helix4u", + "floptopbot33@gmail.com": "flobo3", + "dpaluy@users.noreply.github.com": "dpaluy", + "psikonetik@gmail.com": "el-analista", + "chenb19870707@gmail.com": "ms-alan", + "hex-clawd@users.noreply.github.com": "hex-clawd", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "barteq@hacknotes.local": "barteqpl", + "pama0227@gmail.com": "pama0227", + "52785845+ee-blog@users.noreply.github.com": "ee-blog", + "simplenamebox@gmail.com": "simplenamebox-ops", + "balyan.sid@gmail.com": "alt-glitch", + "xdord@xdorddeMac-mini.local": "foreverxdord", + "k2767567815@gmail.com": "QifengKuang", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From e2211b2683d0dacbdb39af9bc5a2b712a742597d Mon Sep 17 00:00:00 2001 From: nftpoetrist Date: Sat, 25 Apr 2026 10:52:17 +0300 Subject: [PATCH 068/171] fix(compressor): reset _summary_failure_cooldown_until in on_session_reset() on_session_reset() cleared _previous_summary, _last_summary_error, and _ineffective_compression_count but left _summary_failure_cooldown_until intact. When a transient summary error sets a 60 s cooldown (or 600 s for a missing-provider RuntimeError) and the user immediately runs /reset or /new, the cooldown carries into the new session. If the new session reaches the compression threshold before the cooldown expires, _generate_summary() returns None early, middle turns are silently dropped without a summary, and the agent continues with no indication that compaction was skipped. Fix: set _summary_failure_cooldown_until = 0.0 in on_session_reset(), matching the value assigned in __init__ and symmetric with the other per-session fields already cleared there. Fixes #15547 --- agent/context_compressor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 45c26b11b0a..6c177b90998 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -344,6 +344,7 @@ class ContextCompressor(ContextEngine): self._last_aux_model_failure_model = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 + self._summary_failure_cooldown_until = 0.0 # transient errors must not block a fresh session def update_model( self, From 83bbe9b458cc19dc7d12c5dbf4e8e68ff37659b2 Mon Sep 17 00:00:00 2001 From: Emilien Domenge Date: Fri, 24 Apr 2026 22:05:40 +0200 Subject: [PATCH 069/171] fix(delegation): pass target_model to resolve_runtime_provider in _resolve_delegation_credentials MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When delegation.model differs from model.default and the provider is opencode-go or opencode-zen, the wrong api_mode is computed because resolve_runtime_provider falls back to model_cfg.get('default') — the main model — instead of the configured delegation model. For example, with model.default=minimax-m2.7 (anthropic_messages) and delegation.model=glm-5.1 (chat_completions), subagents get anthropic_messages, which strips /v1 from the base URL and causes a 404. resolve_runtime_provider already accepts target_model for exactly this purpose; _resolve_delegation_credentials just wasn't passing it. Fixes #15319 Related: #13678 --- tools/delegate_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 55c8ad31c4f..d987385252f 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -2299,7 +2299,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: try: from hermes_cli.runtime_provider import resolve_runtime_provider - runtime = resolve_runtime_provider(requested=configured_provider) + runtime = resolve_runtime_provider(requested=configured_provider, target_model=configured_model) except Exception as exc: raise ValueError( f"Cannot resolve delegation provider '{configured_provider}': {exc}. " From dc63ad0ad2ec73014fb99f6c840a4ddea802b806 Mon Sep 17 00:00:00 2001 From: B1GGersnow <2642448440@qq.com> Date: Sat, 25 Apr 2026 00:37:54 +0800 Subject: [PATCH 070/171] fix(anthropic): cap max_tokens at 65536 for Qwen models via DashScope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DashScope's Anthropic-compatible endpoint enforces max_tokens ∈ [1, 65536]. Adding "qwen3" to _ANTHROPIC_OUTPUT_LIMITS prevents 400 errors that were misclassified as context overflow, triggering premature compression. Co-Authored-By: Claude Opus 4.7 --- agent/anthropic_adapter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 8d8334acd17..7cdac560b19 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -105,6 +105,9 @@ _ANTHROPIC_OUTPUT_LIMITS = { "claude-3-haiku": 4_096, # Third-party Anthropic-compatible providers "minimax": 131_072, + # Qwen models via DashScope Anthropic-compatible endpoint + # DashScope enforces max_tokens ∈ [1, 65536] + "qwen3": 65_536, } # For any model not in the table, assume the highest current limit. From f236cbfec36302905beeea5bcfae8e6f26578e95 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:57:55 -0600 Subject: [PATCH 071/171] fix(tui): declare nanostores dependency --- ui-tui/package-lock.json | 2 +- ui-tui/package.json | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json index 2efd64fe406..0677e8bdc10 100644 --- a/ui-tui/package-lock.json +++ b/ui-tui/package-lock.json @@ -12,6 +12,7 @@ "@nanostores/react": "^1.1.0", "ink": "^6.8.0", "ink-text-input": "^6.0.0", + "nanostores": "^1.2.0", "react": "^19.2.4", "unicode-animations": "^1.0.3" }, @@ -5303,7 +5304,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": "^20.0.0 || >=22.0.0" } diff --git a/ui-tui/package.json b/ui-tui/package.json index 061e3bc4484..2bb1616a0a2 100644 --- a/ui-tui/package.json +++ b/ui-tui/package.json @@ -21,6 +21,7 @@ "@nanostores/react": "^1.1.0", "ink": "^6.8.0", "ink-text-input": "^6.0.0", + "nanostores": "^1.2.0", "react": "^19.2.4", "unicode-animations": "^1.0.3" }, From d7663c78083aff9df70349f36c587ebfc2c3aa59 Mon Sep 17 00:00:00 2001 From: flobo3 Date: Fri, 24 Apr 2026 20:15:28 +0300 Subject: [PATCH 072/171] fix(docker): exclude compose/profile runtime state from build context --- .dockerignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.dockerignore b/.dockerignore index 41999f5ac6e..f4a02484ebf 100644 --- a/.dockerignore +++ b/.dockerignore @@ -25,3 +25,7 @@ ui-tui/packages/hermes-ink/dist/ # Runtime data (bind-mounted at /opt/data; must not leak into build context) data/ + +# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues) +hermes-config/ +runtime/ From e2248045f56430acbc5bb2759938f4fd5300cb0b Mon Sep 17 00:00:00 2001 From: dpaluy Date: Sat, 25 Apr 2026 14:52:38 -0500 Subject: [PATCH 073/171] fix(cron): drop stale env-var override of persisted provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cron jobs were passing os.getenv("HERMES_INFERENCE_PROVIDER") as the "requested" arg to resolve_runtime_provider(), which short-circuited the resolver's own precedence (explicit arg → persisted config → env) and let stale shell/.env values outrank the user's saved provider. Long-lived cron daemons inherit env from the shell that launched them, so a since-changed provider (e.g. DeepSeek) could keep firing for jobs that don't pin provider/model. Same bug class as f0b763c74 fixed for the TUI /model switch. Pass only job.get("provider") and let resolve_requested_provider fall through to persisted config and env in the documented order. Co-Authored-By: Claude Opus 4.7 (1M context) --- cron/scheduler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index 0be6d362392..84c6f2fff5d 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -1004,8 +1004,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: ) from hermes_cli.auth import AuthError try: + # Do not inject HERMES_INFERENCE_PROVIDER here. resolve_runtime_provider() + # already prefers persisted config over stale shell/env overrides when + # no explicit provider is requested. Passing the env var here short- + # circuits that precedence and can resurrect old providers (for + # example DeepSeek) for cron jobs that do not pin provider/model. runtime_kwargs = { - "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"), + "requested": job.get("provider"), } if job.get("base_url"): runtime_kwargs["explicit_base_url"] = job.get("base_url") From 54cd633366cf51810e2efc31e228a5774556b3c3 Mon Sep 17 00:00:00 2001 From: hex-clawd Date: Fri, 24 Apr 2026 17:44:05 +0200 Subject: [PATCH 074/171] fix(cron): skip AI call when script produces no output When a cron job has a pre-run script that runs successfully but produces no output (e.g. email checker with no new mail), the scheduler previously injected "[Script ran successfully but produced no output.]" into the prompt and still called the AI model. This wastes tokens on every cycle. Now _build_job_prompt() returns None when script output is empty, and run_job() short-circuits with a SILENT response - zero API calls when there is nothing to report. --- cron/scheduler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index 84c6f2fff5d..cee1cb40672 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -706,10 +706,8 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: f"{prompt}" ) else: - prompt = ( - "[Script ran successfully but produced no output.]\n\n" - f"{prompt}" - ) + # Script produced no output — nothing to report, skip AI call. + return None else: prompt = ( "## Script Error\n" @@ -869,6 +867,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: return True, silent_doc, SILENT_MARKER, None prompt = _build_job_prompt(job, prerun_script=prerun_script) + if prompt is None: + logger.info("Job '%s': script produced no output, skipping AI call.", job_name) + return True, "", SILENT_MARKER, None origin = _resolve_origin(job) _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" From ad4542bf6dd380c4d73ce7b0666f5e1bda1dd330 Mon Sep 17 00:00:00 2001 From: barteq Date: Sat, 25 Apr 2026 21:00:50 +0000 Subject: [PATCH 075/171] fix(gateway): allow free_response_channels to override DISCORD_IGNORE_NO_MENTION When DISCORD_IGNORE_NO_MENTION is true (default), the bot ignores messages without @mention. However, this check ran before evaluating free_response_channels, so messages in free-response channels were wrongly dropped unless they contained a mention. This change adds a carve-out: if the message lands in a channel that is configured as a free response channel (or its parent category is), the ignore-no-mention rule is skipped. Also removes the unconditional skip_thread for free response channels so that auto_thread still creates threads there unless explicitly disabled via DISCORD_NO_THREAD_CHANNELS. --- gateway/platforms/discord.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 243e81d3e88..ecfa38c7234 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -720,11 +720,22 @@ class DiscordAdapter(BasePlatformAdapter): return # If humans are mentioned but we're not → not for us # (preserves old DISCORD_IGNORE_NO_MENTION=true behavior) + # EXCEPT in free-response channels where the bot should + # answer regardless of who is mentioned. _ignore_no_mention = os.getenv( "DISCORD_IGNORE_NO_MENTION", "true" ).lower() in ("true", "1", "yes") if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned: - return + _channel_id = str(message.channel.id) + _parent_id = None + if hasattr(message.channel, "parent_id") and message.channel.parent_id: + _parent_id = str(message.channel.parent_id) + _free_channels = adapter_self._discord_free_response_channels() + _channel_ids = {_channel_id} + if _parent_id: + _channel_ids.add(_parent_id) + if "*" not in _free_channels and not (_channel_ids & _free_channels): + return await self._handle_message(message) @@ -3797,7 +3808,7 @@ class DiscordAdapter(BasePlatformAdapter): if not is_thread and not isinstance(message.channel, discord.DMChannel): no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "") no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()} - skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel + skip_thread = bool(channel_ids & no_thread_channels) auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes") is_reply_message = getattr(message, "type", None) == discord.MessageType.reply if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message: From f6aa1965d79b2285bb5097979085562876f9a0c7 Mon Sep 17 00:00:00 2001 From: ee-blog <52785845+ee-blog@users.noreply.github.com> Date: Sun, 26 Apr 2026 01:09:55 +0800 Subject: [PATCH 076/171] fix(telegram): fallback to document when photo dimensions exceed limits Telegram's send_photo has dimension limits (sum of width+height <= 10000px). When sending large screenshots or tall images, the API returns 'Photo_invalid_dimensions' error. Fix: Catch this specific error in send_image_file() and automatically fallback to send_document() which has no dimension limits (only 50MB size). This is similar to the existing 5MB URL fallback (commit 542faf22) but handles local files with dimension issues instead of URL size issues. --- gateway/platforms/telegram.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 188038a1adb..247b5fbb932 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2267,6 +2267,23 @@ class TelegramAdapter(BasePlatformAdapter): ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: + error_str = str(e) + # Check for dimension-related errors - fallback to document mode + if "Photo_invalid_dimensions" in error_str or "PHOTO_INVALID_DIMENSIONS" in error_str: + logger.info( + "[%s] Image dimensions exceed Telegram photo limits, sending as document: %s", + self.name, + image_path, + ) + # Fallback to sending as document (file) - no dimension limits, only 50MB size limit + return await self.send_document( + chat_id=chat_id, + file_path=image_path, + caption=caption, + file_name=os.path.basename(image_path), + reply_to=reply_to, + metadata=metadata, + ) logger.error( "[%s] Failed to send Telegram local image, falling back to base adapter: %s", self.name, From ba8337464da1f59888645356d3fe3250f343caaa Mon Sep 17 00:00:00 2001 From: flobo3 Date: Fri, 24 Apr 2026 20:18:06 +0300 Subject: [PATCH 077/171] fix(gemini): extract usageMetadata from streaming chunks for token tracking --- agent/gemini_native_adapter.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py index 5f64636f2ff..2416a6bc891 100644 --- a/agent/gemini_native_adapter.py +++ b/agent/gemini_native_adapter.py @@ -679,7 +679,21 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: finish_reason_raw = str(cand.get("finishReason") or "") if finish_reason_raw: mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw) - chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) + finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped) + # Attach usage from this event's usageMetadata so the streaming + # loop in run_agent.py can record token counts (mirrors the + # non-streaming path in translate_gemini_response). + usage_meta = event.get("usageMetadata") or {} + if usage_meta: + finish_chunk.usage = SimpleNamespace( + prompt_tokens=int(usage_meta.get("promptTokenCount") or 0), + completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0), + total_tokens=int(usage_meta.get("totalTokenCount") or 0), + prompt_tokens_details=SimpleNamespace( + cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0), + ), + ) + chunks.append(finish_chunk) return chunks From 4e2b20b7053b4712362a107f16f56443472bedf2 Mon Sep 17 00:00:00 2001 From: nftpoetrist Date: Sat, 25 Apr 2026 00:45:31 +0300 Subject: [PATCH 078/171] fix(cli): sync use_gateway in _reconfigure_provider for tts, browser, and web MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _reconfigure_provider() updates cloud_provider/backend/tts.provider when switching tool providers via "hermes setup tools → Reconfigure", but did not update the matching use_gateway flag. _configure_provider() (the initial-setup path) sets use_gateway on all three tool categories. The omission in _reconfigure_provider leaves a stale value in config.yaml: switching from a Nous-managed provider (use_gateway=True) to a self-hosted one keeps use_gateway=True, continuing to route requests through the Nous gateway; switching the other way leaves use_gateway unset so the managed feature does not activate. Fix: mirror _configure_provider's use_gateway = bool(managed_feature) assignment in the tts, browser, and web blocks of _reconfigure_provider. Symmetric across all three tool categories. No behavior change for any provider that does not set tts_provider, browser_provider, or web_backend. Fixes #15229 --- hermes_cli/tools_config.py | 14 ++++++++++---- tests/hermes_cli/test_tools_config.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 21439a28c13..14d82caa653 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1920,21 +1920,27 @@ def _reconfigure_provider(provider: dict, config: dict): return if provider.get("tts_provider"): - config.setdefault("tts", {})["provider"] = provider["tts_provider"] + tts_cfg = config.setdefault("tts", {}) + tts_cfg["provider"] = provider["tts_provider"] + tts_cfg["use_gateway"] = bool(managed_feature) _print_success(f" TTS provider set to: {provider['tts_provider']}") if "browser_provider" in provider: bp = provider["browser_provider"] + browser_cfg = config.setdefault("browser", {}) if bp == "local": - config.setdefault("browser", {})["cloud_provider"] = "local" + browser_cfg["cloud_provider"] = "local" _print_success(" Browser set to local mode") elif bp: - config.setdefault("browser", {})["cloud_provider"] = bp + browser_cfg["cloud_provider"] = bp _print_success(f" Browser cloud provider set to: {bp}") + browser_cfg["use_gateway"] = bool(managed_feature) # Set web search backend in config if applicable if provider.get("web_backend"): - config.setdefault("web", {})["backend"] = provider["web_backend"] + web_cfg = config.setdefault("web", {}) + web_cfg["backend"] = provider["web_backend"] + web_cfg["use_gateway"] = bool(managed_feature) _print_success(f" Web backend set to: {provider['web_backend']}") if managed_feature and managed_feature not in ("web", "tts", "browser"): diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index abe211f4fbd..0bde24fc74e 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -2,10 +2,13 @@ from unittest.mock import patch +import pytest + from hermes_cli.tools_config import ( _DEFAULT_OFF_TOOLSETS, _apply_toolset_change, _configure_provider, + _reconfigure_provider, _get_platform_tools, _platform_toolset_summary, _reconfigure_tool, @@ -898,3 +901,27 @@ def test_get_effective_configurable_toolsets_dedupes_bundled_plugins(): assert len(spotify_rows) == 1, spotify_rows # Built-in label wins over the plugin label. assert spotify_rows[0][1] == "🎵 Spotify" + + +@pytest.mark.parametrize("provider,config_key,expected", [ + # managed provider → use_gateway True + ({"name": "T", "tts_provider": "elevenlabs", "managed_nous_feature": "tts", "env_vars": []}, "tts", True), + ({"name": "B", "browser_provider": "browserbase", "managed_nous_feature": "browser", "env_vars": []}, "browser", True), + ({"name": "W", "web_backend": "tavily", "managed_nous_feature": "web", "env_vars": []}, "web", True), + # self-hosted provider → use_gateway False + ({"name": "T", "tts_provider": "elevenlabs", "env_vars": []}, "tts", False), + ({"name": "B", "browser_provider": "browserbase", "env_vars": []}, "browser", False), + ({"name": "W", "web_backend": "tavily", "env_vars": []}, "web", False), +]) +def test_reconfigure_provider_syncs_use_gateway(provider, config_key, expected): + config = {} + _reconfigure_provider(provider, config) + assert config[config_key]["use_gateway"] is expected + + +def test_reconfigure_browser_provider_overwrites_stale_use_gateway(): + # Switching from managed (use_gateway=True) to self-hosted must clear the stale flag. + config = {"browser": {"cloud_provider": "managed-browser", "use_gateway": True}} + provider = {"name": "Browserbase", "browser_provider": "browserbase", "env_vars": []} + _reconfigure_provider(provider, config) + assert config["browser"]["use_gateway"] is False From 6da970f15d78d81dfc6287e54788acc2f869b64c Mon Sep 17 00:00:00 2001 From: analista Date: Sun, 26 Apr 2026 00:13:21 +0900 Subject: [PATCH 079/171] fix(tui): close AIAgent on session teardown to prevent FD leak session.close only closed the slash_worker subprocess but never called agent.close() on the AIAgent instance. In the long-lived TUI gateway process, this left httpx clients for GC to finalize. When the OS recycled a closed FD number for a new active connection, the stale finalizer would close the live socket, causing intermittent [Errno 9] Bad file descriptor on subsequent LLM API calls. Call agent.close() (which properly shuts down the httpx transport pool and TCP sockets) before closing the slash_worker. --- tui_gateway/server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index fe66d3798df..825822aad84 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2439,6 +2439,12 @@ def _(rid, params: dict) -> dict: unregister_gateway_notify(session["session_key"]) except Exception: pass + try: + agent = session.get("agent") + if agent and hasattr(agent, "close"): + agent.close() + except Exception: + pass try: worker = session.get("slash_worker") if worker: From 99faac212ed7e19276bb3766984457ad9c7c4fd3 Mon Sep 17 00:00:00 2001 From: Bart Date: Fri, 24 Apr 2026 22:19:44 +0100 Subject: [PATCH 080/171] fix(tui): prevent trailing space in picker-command completions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commands that open pickers (/model, /skin, /personality) previously received a trailing space in their completions to keep the dropdown visible in the classic CLI. However, the TUI's submit handler applies the completion when Enter is pressed and the result differs from the input — so '/model' + space became '/model ' and the command was never executed. Picker commands now omit the trailing space for exact matches, allowing Enter to submit and open the picker. Non-picker commands (/help, etc.) are unaffected. --- hermes_cli/commands.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 681f228ae93..827e7592c47 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -1126,6 +1126,12 @@ class SlashCommandCompleter(Completer): except Exception: return {} + # Commands that open pickers when run without arguments. + # These should NOT receive a trailing space in completions because: + # - The TUI's submit handler applies completions on Enter if input differs + # - Adding space makes "/model" → "/model " which blocks picker execution + _PICKER_COMMANDS = frozenset({"model", "skin", "personality"}) + @staticmethod def _completion_text(cmd_name: str, word: str) -> str: """Return replacement text for a completion. @@ -1134,8 +1140,17 @@ class SlashCommandCompleter(Completer): returning ``help`` would be a no-op and prompt_toolkit suppresses the menu. Appending a trailing space keeps the dropdown visible and makes backspacing retrigger it naturally. + + However, commands that open pickers (model, skin, personality) should + NOT get a trailing space — the TUI would apply the completion on Enter + and block the picker from opening. """ - return f"{cmd_name} " if cmd_name == word else cmd_name + if cmd_name != word: + return cmd_name + # Don't add space for picker commands — allows Enter to execute them + if cmd_name in SlashCommandCompleter._PICKER_COMMANDS: + return cmd_name + return f"{cmd_name} " @staticmethod def _extract_path_word(text: str) -> str | None: From 7d36533aeb0fe4fd5680d3285ffbeb50f8908035 Mon Sep 17 00:00:00 2001 From: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> Date: Mon, 4 May 2026 02:38:27 -0700 Subject: [PATCH 081/171] fix(pty): default TERM for resize probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preserve explicit caller overrides, but backfill a sensible default TERM=xterm-256color when missing or blank in the spawn env. CI often runs without TERM in the parent process, which makes terminal probes like 'tput cols' fail before winsize reads. Salvage of #15278's core code fix only — the test changes conflict with subsequent test refactors on main that now exercise TIOCGWINSZ directly instead of via 'tput'. Co-authored-by: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> --- hermes_cli/pty_bridge.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hermes_cli/pty_bridge.py b/hermes_cli/pty_bridge.py index 9a8a73baddc..66fdb4ac720 100644 --- a/hermes_cli/pty_bridge.py +++ b/hermes_cli/pty_bridge.py @@ -108,9 +108,14 @@ class PtyBridge: "(or pip install -e '.[pty]')." ) raise PtyUnavailableError("Pseudo-terminals are unavailable.") - # Let caller-supplied env fully override inheritance; if they pass - # None we inherit the server's env (same semantics as subprocess). - spawn_env = os.environ.copy() if env is None else env + # PTY-hosted programs expect TERM to describe the terminal type. + # CI often runs without TERM in the parent process, which makes + # simple terminal probes like `tput cols` fail before winsize reads. + # Preserve explicit caller overrides, but backfill a sensible default + # when TERM is missing or blank. + spawn_env = (os.environ.copy() if env is None else env.copy()) + if not spawn_env.get("TERM"): + spawn_env["TERM"] = "xterm-256color" proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr] list(argv), cwd=cwd, From 2a52e285685750c5f60d785c072fc394a84136a0 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Mon, 4 May 2026 02:41:25 -0700 Subject: [PATCH 082/171] fix(setup): skip AUXILIARY_VISION_MODEL write when input is blank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guard the save_env_value('AUXILIARY_VISION_MODEL', ...) call with 'if _selected_vision_model:' so blank input at the non-OpenAI vision model prompt doesn't nuke existing values in .env. save_env_value has no internal guard against empty strings — it faithfully writes whatever it receives, including empty values that shadow the previously-configured model. Salvage of #15504 (core hunk). Contributor's test was dropped because it collided with subsequent test refactors; the fix stands on its own. Co-authored-by: alt-glitch --- hermes_cli/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index e8c2b3b6fc0..63f5267ddf2 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -964,7 +964,8 @@ def setup_model_provider(config: dict, *, quick: bool = False): ) else: _selected_vision_model = prompt(" Vision model (blank = use main/custom default)").strip() - save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) + if _selected_vision_model: + save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) print_success( f"Vision configured with {_base_url}" + (f" ({_selected_vision_model})" if _selected_vision_model else "") From bff484a51b8f12e8e4663b3a880709f2e8cfc1c1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:41:51 -0700 Subject: [PATCH 083/171] fix(kanban-dashboard): widen drawer, bump body fonts, fix code-block contrast (#19638) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #18576. Addresses three of four complaints from the readability report; live-verified in a dashboard against a seeded task with body, comments, and run history. - Drawer default width 480px → 640px, exposed as the CSS var `--hermes-kanban-drawer-width` so deployments / user themes can override without forking the plugin. - Bump body/meta/pre/log/run-history font sizes from the 0.65-0.75rem cluster to the 0.78-0.85rem cluster. Long paths and code snippets in task bodies, run metadata, and worker logs are legible again instead of requiring a squint. - Fix the black-text-on-dark-theme regression in fenced markdown code blocks. Root cause: themes that don't define `--color-foreground` (NERV, at least) leave `color: var(--color-foreground)` resolving empty on , which then falls back to the UA default (near-black) instead of inheriting from the drawer's . Fix: force `color: inherit` on both inline and fenced code, and give the fenced block background via `currentColor` instead of `--color-foreground` so there's a visible card even when the theme var is absent. Out of scope for this PR (comments added to #18576): - Draggable resize handle (structural JS work; plugin ships built-only, no src/ in-tree). - Live worker-log viewer for running tasks (backend WS + component). - Sibling fix: themes like NERV should define --color-foreground. The current changes make the drawer robust against that gap, but the root fix belongs in the theme layer. --- plugins/kanban/dashboard/dist/style.css | 39 ++++++++++++++++--------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css index 74876aeff5f..c638946ad27 100644 --- a/plugins/kanban/dashboard/dist/style.css +++ b/plugins/kanban/dashboard/dist/style.css @@ -268,7 +268,7 @@ } .hermes-kanban-drawer { - width: min(480px, 92vw); + width: min(var(--hermes-kanban-drawer-width, 640px), 92vw); height: 100vh; background: var(--color-card); border-left: 1px solid var(--color-border); @@ -334,7 +334,7 @@ .hermes-kanban-meta-row { display: flex; gap: 0.5rem; - font-size: 0.72rem; + font-size: 0.8rem; } .hermes-kanban-meta-label { width: 92px; @@ -367,14 +367,15 @@ .hermes-kanban-pre { margin: 0; - padding: 0.45rem 0.55rem; + padding: 0.5rem 0.6rem; white-space: pre-wrap; word-break: break-word; background: color-mix(in srgb, var(--color-foreground) 4%, transparent); border: 1px solid var(--color-border); border-radius: var(--radius-sm, 0.25rem); font-family: var(--font-mono, ui-monospace, monospace); - font-size: 0.72rem; + font-size: 0.8rem; + line-height: 1.5; color: var(--color-foreground); } @@ -605,8 +606,8 @@ /* ---- Markdown rendering -------------------------------------------- */ .hermes-kanban-md { - font-size: 0.8rem; - line-height: 1.55; + font-size: 0.85rem; + line-height: 1.6; color: var(--color-foreground); } .hermes-kanban-md p { margin: 0.25rem 0; } @@ -632,15 +633,22 @@ } .hermes-kanban-md code { font-family: var(--font-mono, ui-monospace, monospace); - font-size: 0.75rem; + font-size: 0.8rem; padding: 0.05rem 0.3rem; background: color-mix(in srgb, var(--color-foreground) 8%, transparent); border-radius: 3px; + color: inherit; } +/* Fenced code block. Set a visible background even when --color-foreground + * is empty (color-mix falls through to transparent in that case), and force + * color: inherit so the text tracks the drawer foreground rather than the + * UA default on elements — otherwise themes that don't set + * --color-foreground leave code text rendering near-black on dark themes + * (see issue #18576). */ .hermes-kanban-md-code { margin: 0.35rem 0; padding: 0.5rem 0.6rem; - background: color-mix(in srgb, var(--color-foreground) 5%, transparent); + background: color-mix(in srgb, currentColor 6%, transparent); border: 1px solid var(--color-border); border-radius: var(--radius-sm, 0.25rem); overflow-x: auto; @@ -648,8 +656,9 @@ .hermes-kanban-md-code code { background: transparent; padding: 0; - font-size: 0.75rem; + font-size: 0.8rem; white-space: pre; + color: inherit; } .hermes-kanban-md strong { font-weight: 600; } @@ -684,11 +693,11 @@ /* ---- Worker log pane ------------------------------------------------ */ .hermes-kanban-log { - max-height: 340px; + max-height: 360px; overflow: auto; white-space: pre; - font-size: 0.7rem; - line-height: 1.45; + font-size: 0.78rem; + line-height: 1.5; } @@ -739,7 +748,8 @@ color: var(--color-muted-foreground); } .hermes-kanban-run-summary { - font-size: 0.75rem; + font-size: 0.82rem; + line-height: 1.5; padding: 0.2rem 0 0; color: var(--color-foreground); } @@ -751,7 +761,8 @@ } .hermes-kanban-run-meta { display: block; - font-size: 0.65rem; + font-size: 0.72rem; + line-height: 1.5; padding: 0.15rem 0 0; color: var(--color-muted-foreground); white-space: pre-wrap; From 3c070f9f9d00a74462980862590b9703fdc002ca Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:42:16 -0700 Subject: [PATCH 084/171] fix(curator): only mark agent-created for background-review sediment (#19621) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tighten the provenance semantics added in #19618: skills a user asks a foreground agent to write via skill_manage(create) now stay invisible to the curator. Only skills the background self-improvement review fork sediments through skill_manage get the created_by=agent marker. - tools/skill_provenance.py — new ContextVar module mirroring the _approval_session_key pattern: set_current_write_origin / reset / get / is_background_review. Default origin is 'foreground'; the review fork sets 'background_review'. - run_agent.py — run_conversation() binds the ContextVar from self._memory_write_origin at the top of each call. The review fork runs on its own thread (fresh context), so foreground and review contexts never cross-contaminate. - tools/skill_manager_tool.py — skill_manage(action='create') now only calls mark_agent_created() when is_background_review(). All other cases (foreground create, patch, edit, write_file, delete) continue as before. - tests: test_skill_provenance.py (6 tests covering the ContextVar surface), split test_full_create_via_dispatcher into foreground vs. review-fork variants, curator status tests now mark-first. Why: the agent routinely edits existing user skills on the user's behalf; those writes must never flip provenance. And when a user explicitly asks the foreground agent to create a skill, that skill belongs to the user. The curator should only be cleaning up after its own autonomous sediment from the review nudge loop. --- run_agent.py | 9 +++ tests/hermes_cli/test_curator_status.py | 10 ++- tests/tools/test_skill_manager_tool.py | 30 ++++++- tests/tools/test_skill_provenance.py | 102 ++++++++++++++++++++++++ tools/skill_manager_tool.py | 9 ++- tools/skill_provenance.py | 78 ++++++++++++++++++ 6 files changed, 234 insertions(+), 4 deletions(-) create mode 100644 tests/tools/test_skill_provenance.py create mode 100644 tools/skill_provenance.py diff --git a/run_agent.py b/run_agent.py index a6ea2b1e728..720bc19dcb8 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10421,6 +10421,15 @@ class AIAgent: from hermes_logging import set_session_context set_session_context(self.session_id) + # Bind the skill write-origin ContextVar for this thread so tool + # handlers (e.g. skill_manage create) can tell whether they are + # running inside the background self-improvement review fork vs. + # a foreground user-directed turn. Set at the top of each call; + # the review fork runs on its own thread with a fresh context, + # so the foreground value here does not leak into it. + from tools.skill_provenance import set_current_write_origin + set_current_write_origin(getattr(self, "_memory_write_origin", "assistant_tool")) + # If the previous turn activated fallback, restore the primary # runtime so this turn gets a fresh attempt with the preferred model. # No-op when _fallback_activated is False (gateway, first turn, etc.). diff --git a/tests/hermes_cli/test_curator_status.py b/tests/hermes_cli/test_curator_status.py index 3be58625920..b4c3548c428 100644 --- a/tests/hermes_cli/test_curator_status.py +++ b/tests/hermes_cli/test_curator_status.py @@ -114,6 +114,12 @@ def test_status_shows_most_and_least_used_sections(curator_status_env): env["make_skill"]("top-dog") env["make_skill"]("middling") env["make_skill"]("never-used") + # Mark all three as agent-created so they enter the curator's catalog. + # Under the provenance-marker semantics, skills must be explicitly opted + # into curator management (normally via the background-review fork when + # it creates a skill through skill_manage). + for n in ("top-dog", "middling", "never-used"): + env["skill_usage"].mark_agent_created(n) # Bump use_count differentially. All three counters (use/view/patch) feed # into activity_count, so bumping use alone is enough to make activity @@ -150,7 +156,9 @@ def test_status_hides_most_active_when_all_zero(curator_status_env): env = curator_status_env env["make_skill"]("a") env["make_skill"]("b") - # No bumps. + # Mark both as agent-created so the catalog lists them. No bumps. + env["skill_usage"].mark_agent_created("a") + env["skill_usage"].mark_agent_created("b") out = _capture_status(env["curator_cli"]) diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 934215d9450..e24e19dea1e 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -531,13 +531,41 @@ class TestSkillManageDispatcher: assert result["success"] is False def test_full_create_via_dispatcher(self, tmp_path): + """Foreground create does NOT mark the skill as agent-created. + + Skills created by user-directed foreground turns belong to the user; + only the background self-improvement review fork should mark its + own sediment as agent-created (so the curator can later consolidate + or prune it). + """ with _skill_dir(tmp_path): raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT) from tools.skill_usage import load_usage usage = load_usage() result = json.loads(raw) assert result["success"] is True - assert usage["test-skill"]["created_by"] == "agent" + # No provenance marker on a foreground create — record either missing + # entirely (telemetry best-effort) or present with created_by unset. + rec = usage.get("test-skill") or {} + assert rec.get("created_by") in (None, "", False) + + def test_create_from_background_review_marks_agent_created(self, tmp_path): + """Background-review fork creates ARE marked as agent-created.""" + from tools.skill_provenance import set_current_write_origin, BACKGROUND_REVIEW + token = set_current_write_origin(BACKGROUND_REVIEW) + try: + with _skill_dir(tmp_path): + raw = skill_manage( + action="create", name="review-sediment", content=VALID_SKILL_CONTENT + ) + from tools.skill_usage import load_usage + usage = load_usage() + finally: + from tools.skill_provenance import reset_current_write_origin + reset_current_write_origin(token) + result = json.loads(raw) + assert result["success"] is True + assert usage["review-sediment"]["created_by"] == "agent" def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path): # Dispatcher must plumb absorbed_into through to _delete_skill so the diff --git a/tests/tools/test_skill_provenance.py b/tests/tools/test_skill_provenance.py new file mode 100644 index 00000000000..77f505bb86a --- /dev/null +++ b/tests/tools/test_skill_provenance.py @@ -0,0 +1,102 @@ +"""Tests for tools/skill_provenance.py — write-origin ContextVar.""" + +import contextvars + +import pytest + + +def test_default_origin_is_foreground(): + from tools.skill_provenance import get_current_write_origin + # In a fresh ContextVar context, default kicks in. + ctx = contextvars.copy_context() + origin = ctx.run(get_current_write_origin) + assert origin == "foreground" + + +def test_set_and_get_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(token) + + +def test_reset_restores_prior_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + outer = set_current_write_origin("assistant_tool") + try: + inner = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(inner) + assert get_current_write_origin() == "assistant_tool" + finally: + reset_current_write_origin(outer) + + +def test_is_background_review_truthy_only_for_review(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + is_background_review, + BACKGROUND_REVIEW, + ) + for origin, expected in ( + ("foreground", False), + ("assistant_tool", False), + ("random_other_value", False), + (BACKGROUND_REVIEW, True), + ): + token = set_current_write_origin(origin) + try: + assert is_background_review() is expected, ( + f"is_background_review() wrong for origin={origin!r}" + ) + finally: + reset_current_write_origin(token) + + +def test_empty_origin_falls_back_to_foreground(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("") + try: + # Empty is coerced to "foreground" at the set() boundary. + assert get_current_write_origin() == "foreground" + finally: + reset_current_write_origin(token) + + +def test_context_isolation_between_copies(): + """ContextVar scoping: modifications in one copy do not leak out.""" + from tools.skill_provenance import ( + set_current_write_origin, + get_current_write_origin, + BACKGROUND_REVIEW, + ) + + # Start at the module default. + original = get_current_write_origin() + + def _run_in_copy(): + set_current_write_origin(BACKGROUND_REVIEW) + return get_current_write_origin() + + ctx = contextvars.copy_context() + inside = ctx.run(_run_in_copy) + assert inside == BACKGROUND_REVIEW + # Parent context unaffected. + assert get_current_write_origin() == original diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index e7d264de67e..58c3fe3d2dc 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -784,11 +784,16 @@ def skill_manage( pass # Curator telemetry: bump patch_count on edit/patch/write_file (the actions # that mutate an existing skill's guidance), drop the record on delete. - # Best-effort; telemetry failures never break the tool. + # Only mark a skill as agent-created when the background self-improvement + # review fork creates it — foreground `skill_manage(create)` calls are + # user-directed, and those skills belong to the user (the curator must + # not touch them). Best-effort; telemetry failures never break the tool. try: from tools.skill_usage import bump_patch, forget, mark_agent_created + from tools.skill_provenance import is_background_review if action == "create": - mark_agent_created(name) + if is_background_review(): + mark_agent_created(name) elif action in ("patch", "edit", "write_file", "remove_file"): bump_patch(name) elif action == "delete": diff --git a/tools/skill_provenance.py b/tools/skill_provenance.py new file mode 100644 index 00000000000..9f43efc3fc5 --- /dev/null +++ b/tools/skill_provenance.py @@ -0,0 +1,78 @@ +"""Skill write-origin provenance — ContextVar for distinguishing agent-sediment skill writes from foreground user-directed writes. + +The curator only consolidates/prunes skills it autonomously created via the +background self-improvement review fork. Skills a user asks a foreground +agent to write belong to the user and must never be auto-curated. + +This module exposes a ContextVar that run_agent.py sets before each tool +loop so tool handlers (e.g. skill_manage create) can check whether they +are executing inside the background-review fork. + +The signal piggybacks on AIAgent._memory_write_origin, which is already +set to "background_review" for review-fork instances (see +_spawn_background_review in run_agent.py) and defaults to "assistant_tool" +for normal (foreground) agents. + +Usage: + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + + token = set_current_write_origin("background_review") + try: + ... # tool runs here + finally: + reset_current_write_origin(token) + + # inside a tool: + if get_current_write_origin() == "background_review": + mark_agent_created(skill_name) +""" + +import contextvars + + +_write_origin: contextvars.ContextVar[str] = contextvars.ContextVar( + "skill_write_origin", + default="foreground", +) + +# The sentinel value the background review fork uses; mirrors +# run_agent.py's AIAgent._memory_write_origin override in +# _spawn_background_review(). +BACKGROUND_REVIEW = "background_review" + + +def set_current_write_origin(origin: str) -> contextvars.Token[str]: + """Bind the active write origin to the current context. + + Returns a Token the caller must pass to reset_current_write_origin + in a finally block. + """ + return _write_origin.set(origin or "foreground") + + +def reset_current_write_origin(token: contextvars.Token[str]) -> None: + """Restore the prior write origin context.""" + _write_origin.reset(token) + + +def get_current_write_origin() -> str: + """Return the active write origin. + + Default: "foreground" — any tool call made by a regular (non-review) + agent, from the CLI, the gateway, cron, or a subagent. + + "background_review" — the self-improvement review fork; only skills + created under this origin should be marked agent-created for curator + management. + """ + return _write_origin.get() + + +def is_background_review() -> bool: + """Convenience: True iff the current write origin is the background + review fork.""" + return get_current_write_origin() == BACKGROUND_REVIEW From 52c539d53a2b4d457ccc5963a09c880cae49812f Mon Sep 17 00:00:00 2001 From: QifengKuang Date: Mon, 4 May 2026 02:42:56 -0700 Subject: [PATCH 085/171] fix(agent): disable SDK retries on per-request OpenAI clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-request OpenAI-wire clients (used by both non-streaming and streaming chat-completions paths in _interruptible_api_call) should not run the SDK's built-in retry loop: the agent's outer loop owns retries with credential rotation, provider fallback, and backoff that the SDK can't see. Leaving SDK retries on (default 2) compounds with our outer retries and lets a single hung provider request stretch to ~3x the per-call timeout before our stale detector reports it. Shared/primary clients and Anthropic / Bedrock paths are unaffected (they don't go through here). Salvage of #15811 core improvement — the timeout push-down in the original PR required scaffolding that has since been refactored on main, so only the max_retries=0 change is preserved. Co-authored-by: QifengKuang --- run_agent.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/run_agent.py b/run_agent.py index 720bc19dcb8..4828780199a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5816,6 +5816,17 @@ class AIAgent: return primary_client with self._openai_client_lock(): request_kwargs = dict(self._client_kwargs) + # Per-request OpenAI-wire clients (used by both the non-streaming + # chat-completions path and the streaming chat-completions path + # in `_interruptible_api_call`) should not run the SDK's built-in + # retry loop: the agent's outer loop owns retries with credential + # rotation, provider fallback, and backoff that the SDK can't + # see. Leaving SDK retries on (default 2) compounds with our outer + # retries and lets a single hung provider request stretch to ~3x + # the per-call timeout before our stale detector reports it. + # Shared/primary clients and Anthropic / Bedrock paths are + # unaffected (they don't go through here). + request_kwargs["max_retries"] = 0 if ( base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com") and self._api_kwargs_have_image_parts(api_kwargs or {}) From 0df7e61d2cc1c1a723f576e701a03f16c0e9edf2 Mon Sep 17 00:00:00 2001 From: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> Date: Sat, 25 Apr 2026 00:53:07 +0800 Subject: [PATCH 086/171] fix(cli): omit empty api_mode when probing custom models --- hermes_cli/main.py | 8 ++++---- tests/hermes_cli/test_custom_provider_model_switch.py | 10 +++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 4fe5ff3508b..6c2544e9059 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3413,10 +3413,10 @@ def _model_flow_named_custom(config, provider_info): print() print("Fetching available models...") - models = fetch_api_models( - api_key, base_url, timeout=8.0, - api_mode=api_mode or None, - ) + fetch_kwargs = {"timeout": 8.0} + if api_mode: + fetch_kwargs["api_mode"] = api_mode + models = fetch_api_models(api_key, base_url, **fetch_kwargs) if models: default_idx = 0 diff --git a/tests/hermes_cli/test_custom_provider_model_switch.py b/tests/hermes_cli/test_custom_provider_model_switch.py index 454337592db..d123120ed83 100644 --- a/tests/hermes_cli/test_custom_provider_model_switch.py +++ b/tests/hermes_cli/test_custom_provider_model_switch.py @@ -56,7 +56,6 @@ class TestCustomProviderModelSwitch: "sk-test", "https://vllm.example.com/v1", timeout=8.0, - api_mode=None, ) def test_can_switch_to_different_model(self, config_home): @@ -141,12 +140,18 @@ class TestCustomProviderModelSwitch: "api_mode": "anthropic_messages", } - with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \ + with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]) as mock_fetch, \ patch.dict("sys.modules", {"simple_term_menu": None}), \ patch("builtins.input", return_value="1"), \ patch("builtins.print"): _model_flow_named_custom({}, provider_info) + mock_fetch.assert_called_once_with( + "***", + "https://proxy.example.com/anthropic", + timeout=8.0, + api_mode="anthropic_messages", + ) config = yaml.safe_load((config_home / "config.yaml").read_text()) or {} model = config.get("model") assert isinstance(model, dict) @@ -215,7 +220,6 @@ class TestCustomProviderModelSwitch: "sk-live-example-provider", "https://api.example-provider.test/v1", timeout=8.0, - api_mode=None, ) config = yaml.safe_load(config_path.read_text()) or {} assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}" From 51dc98d314000d7b326b341bb2b95f23ac6814d4 Mon Sep 17 00:00:00 2001 From: thchen Date: Sat, 25 Apr 2026 13:02:27 +0000 Subject: [PATCH 087/171] fix(agent): detect Qwen3/Ollama inline thinking after tool calls Ollama serves Qwen3 thinking inside the content field as ... blocks rather than in the API-level reasoning_content field. This means _has_structured was False for these responses, so an empty-looking reply after a tool call triggered the nudge instead of the prefill continuation, causing a double-response loop. Fix: detect // in final_response and: 1. Skip the nudge when thinking is present (model is still reasoning) 2. Include _has_inline_thinking in _has_structured so prefill kicks in --- run_agent.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/run_agent.py b/run_agent.py index 4828780199a..6d604f9b599 100644 --- a/run_agent.py +++ b/run_agent.py @@ -13438,9 +13438,22 @@ class AIAgent: m.get("role") == "tool" for m in messages[-5:] # check recent messages ) + # Detect Qwen3/Ollama-style in-content thinking blocks. + # Ollama puts in the content field (not in + # reasoning_content), so _has_structured below would + # miss it. We check here so thinking-only responses + # after tool calls route to prefill instead of nudge. + _has_inline_thinking = bool( + re.search( + r'||', + final_response or "", + re.IGNORECASE, + ) + ) if ( _prior_was_tool and not getattr(self, "_post_tool_empty_retried", False) + and not _has_inline_thinking # thinking model still working — let prefill handle ): self._post_tool_empty_retried = True # Clear stale narration so it doesn't resurface @@ -13480,10 +13493,13 @@ class AIAgent: # continue — the model will see its own reasoning # on the next turn and produce the text portion. # Inspired by clawdbot's "incomplete-text" recovery. + # Also covers Qwen3/Ollama in-content blocks + # (detected above as _has_inline_thinking). _has_structured = bool( getattr(assistant_message, "reasoning", None) or getattr(assistant_message, "reasoning_content", None) or getattr(assistant_message, "reasoning_details", None) + or _has_inline_thinking ) if _has_structured and self._thinking_prefill_retries < 2: self._thinking_prefill_retries += 1 From 08b8465ca9b68de4e5fde406f0e8ca94bab7ced6 Mon Sep 17 00:00:00 2001 From: ms-alan Date: Sat, 25 Apr 2026 00:08:49 +0800 Subject: [PATCH 088/171] fix(email): add required Date header to send_message_tool._send_email Adds RFC 5322 Date header to the _send_email tool path in tools/send_message_tool.py. Issue #15160 noted that both gateway/platforms/email.py and tools/send_message_tool.py construct MIMEMultipart/MIMEText messages without setting a Date header. RFC 5322 requires the Date header; mail filters reject messages that lack it. PR #15207 fixed the gateway/platforms/email.py path but did not cover tools/send_message_tool._send_email, which is used by the send_message tool for cross-channel messaging. This change adds msg["Date"] = formatdate(localtime=True) to _send_email, mirroring the fix applied to the gateway email adapter. Closes #15160 --- tools/send_message_tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index b4de998fe5e..938cb977b6a 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -10,9 +10,10 @@ import json import logging import os import re -from typing import Dict, Optional import ssl import time +from email.utils import formatdate +from typing import Dict, Optional from agent.redact import redact_sensitive_text From c659a168992c97f0b15af2eb7e4add03c21f7a3b Mon Sep 17 00:00:00 2001 From: ms-alan Date: Sat, 25 Apr 2026 00:13:33 +0800 Subject: [PATCH 089/171] fix(cli): detect quoted relative paths in _detect_file_drop Closes #15197 --- cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cli.py b/cli.py index 98370b83838..5f76675b38d 100644 --- a/cli.py +++ b/cli.py @@ -1505,6 +1505,10 @@ def _detect_file_drop(user_input: str) -> "dict | None": or stripped.startswith('"~') or stripped.startswith("'/") or stripped.startswith("'~") + or stripped.startswith('"./') + or stripped.startswith('"../') + or stripped.startswith("'./") + or stripped.startswith("'../") or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha()) ) if not starts_like_path: From 06280047099cad93aadbe528a4aa465a76a90135 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:48:30 -0700 Subject: [PATCH 090/171] docs(model-catalog): rename x-ai/grok-4.20-beta to x-ai/grok-4.20 (#19640) OpenRouter and Nous Portal dropped the -beta suffix from the Grok 4.20 slug. The OpenRouter section already used the new slug; this updates the Nous Portal section and bumps updated_at. --- website/static/api/model-catalog.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/static/api/model-catalog.json b/website/static/api/model-catalog.json index 0845f7339ac..f19beab0749 100644 --- a/website/static/api/model-catalog.json +++ b/website/static/api/model-catalog.json @@ -1,6 +1,6 @@ { "version": 1, - "updated_at": "2026-04-30T03:06:09Z", + "updated_at": "2026-05-04T09:41:25Z", "metadata": { "source": "hermes-agent repo", "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog" @@ -232,7 +232,7 @@ "id": "z-ai/glm-5-turbo" }, { - "id": "x-ai/grok-4.20-beta" + "id": "x-ai/grok-4.20" }, { "id": "nvidia/nemotron-3-super-120b-a12b" From 986ec04048b31759de6bda86ab3360e57606f8ed Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 03:05:34 -0700 Subject: [PATCH 091/171] docs: document /kanban slash command (#19584) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: document /kanban slash command The kanban user guide and slash-commands reference only mentioned the /kanban slash command in passing. Add a proper section covering: - CLI and gateway both expose the full hermes kanban surface via hermes_cli.kanban.run_slash (identical argument surface) - Mid-run usage: /kanban bypasses the running-agent guard, so reads and writes land immediately while an agent is still in a turn - Auto-subscribe on /kanban create from the gateway — originating chat is subscribed to terminal events, with a worked example - Output truncation (~3800 chars) in messaging - Autocomplete hint list vs full subcommand surface Also adds /kanban rows to both slash-command tables (CLI + messaging) in reference/slash-commands.md and moves it into the 'works in both' notes bucket. * docs(kanban): frame the model's tool surface as primary, CLI as the human surface The kanban user guide and CLI reference read as if you drive the board by running `hermes kanban` commands everywhere. In practice: - **You** (human, scripts, cron, dashboard) use the `hermes kanban …` CLI, the `/kanban …` slash command, or the REST/dashboard. - **Workers** spawned by the dispatcher use a dedicated `kanban_*` toolset (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`) and never shell out to the CLI. Changes to `user-guide/features/kanban.md`: - New 'Two surfaces' intro distinguishes the two front doors up front. - Quick-start section re-labelled so each step says who is running it (you vs. orchestrator vs. worker). - 'How workers interact with the board' rewritten: - Lead with "Workers do not shell out to `hermes kanban`." - Tool table extended with required params. - Concrete worker-turn example (`kanban_show` → `kanban_heartbeat` → `kanban_complete`) and an orchestrator fan-out example (`kanban_create` x N with `parents=[...]`). - Moved 'Why tools not CLI' from a defensive aside to a clean follow-up section. - 'Worker skill' section explicitly says the lifecycle is taught in tool calls, not CLI commands. - 'Pinning extra skills' reordered — orchestrator tool form first (the usual case), human/CLI second, dashboard third. - 'Orchestrator skill' now shows a canonical `kanban_create` / `kanban_link` / `kanban_complete` tool-call sequence instead of only describing what the skill teaches. - CLI-command-reference heading now clarifies this is the human surface, with a cross-link to the tool-surface section. - 'Runs — one row per attempt' structured-handoff example replaced: the primary example is now `kanban_complete(summary=..., metadata=...)` (what a worker actually does), with the CLI form retained as "when you, the human, need to close a task a worker can't." Changes to `reference/cli-commands.md`: - `hermes kanban` intro marks itself as the human / scripting surface and links out to the worker tool surface. - Corrected `comment ` description — the next worker reads it via `kanban_show()`, not by running `hermes kanban show`. * docs(kanban-tutorial): reframe worker actions as tool calls Honest answer to Teknium's follow-up: no, the first pass missed the tutorial. The four stories all showed `hermes kanban claim / complete / block / unblock` as if the backend-dev, pm, and reviewer personas were humans running CLI commands. In a real hermes kanban run those agents are dispatcher-spawned workers driving the board through the `kanban_*` tool surface. Changes: - Setup intro now distinguishes the three surfaces up front (dashboard / CLI for you, `kanban_*` tools for workers) and establishes the convention: `bash` blocks are commands *you* run, `# worker tool calls` blocks are what the agent emits. - Story 1 (solo dev schema): 'Claim the schema task, do the work, hand off' block replaced with the dispatcher spawning the backend-dev worker and a `kanban_show → kanban_heartbeat → kanban_complete` tool-call sequence. The 'On the CLI' `hermes kanban show / runs` block re-labelled as 'you peeking at the board' to keep it correct as a human inspection step. - Story 2 (fleet farming): note about structured handoff updated from `--summary` / `--metadata` CLI flags to `kanban_complete(summary=..., metadata=...)` tool form. - Story 3 (role pipeline): the big PM/engineer/reviewer block fully rewritten as three worker tool-call sequences — PM worker completes spec, engineer worker blocks, human/reviewer `hermes kanban unblock` (or `/kanban unblock`), engineer worker respawns and completes. The respawn-as-new-run mechanic is now explicit. - Reviewer paragraph: `build_worker_context` replaced with `kanban_show()` — that's the tool that delivers the parent handoff to the model. - Structured handoff section heading and body updated: `--summary`/`--metadata` → `summary`/`metadata` (tool params), with a note that the tool surface doesn't expose a bulk variant for the same reason the CLI refuses multi-task `complete`. Story 4 (circuit breaker) unchanged — its workers fail to spawn, so there are no tool calls to show; the `hermes kanban create` and `hermes kanban runs` commands in it are correctly human-driven. --- website/docs/reference/cli-commands.md | 4 +- website/docs/reference/slash-commands.md | 4 +- .../user-guide/features/kanban-tutorial.md | 120 +++++++--- website/docs/user-guide/features/kanban.md | 225 ++++++++++++++---- 4 files changed, 271 insertions(+), 82 deletions(-) diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 862c51606e8..c8a9dd66c5a 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -345,6 +345,8 @@ hermes kanban [options] Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace. +**This is the human / scripting surface.** Agent workers spawned by the dispatcher drive the board through a dedicated `kanban_*` [toolset](/docs/user-guide/features/kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_create`, `kanban_link`, `kanban_comment`, `kanban_heartbeat`) instead of shelling to `hermes kanban`. Both surfaces route through the same `kanban_db` layer, so state is consistent either way. + | Action | Purpose | |--------|---------| | `init` | Create `kanban.db` if missing. Idempotent. | @@ -355,7 +357,7 @@ Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode | `link ` | Add a dependency. Cycle-detected. | | `unlink ` | Remove a dependency. | | `claim ` | Atomically claim a ready task. Prints resolved workspace path. | -| `comment ""` | Append a comment. Visible to the next worker that runs the task. | +| `comment ""` | Append a comment. The next worker that claims the task reads it as part of its `kanban_show()` response. | | `complete ` | Mark task done. Flag: `--result ""` (goes into children's parent-result context). | | `block ""` | Mark task blocked. Also appends the reason as a comment. | | `unblock ` | Return a blocked task to ready. | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index ef566cd5ba4..0e71a932772 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -70,6 +70,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/skills` | Search, install, inspect, or manage skills from online registries | | `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) | | `/curator` | Background skill maintenance — `status`, `run`, `pin`, `archive`. See [Curator](/docs/user-guide/features/curator). | +| `/kanban ` | Drive the multi-profile collaboration board without leaving chat. Full `hermes kanban` surface is available: `/kanban list`, `/kanban show t_abc`, `/kanban create "title" --assignee X`, `/kanban comment t_abc "text"`, `/kanban unblock t_abc`, `/kanban dispatch`, etc. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config.yaml | | `/reload` | Reload `.env` variables into the running session (picks up new API keys without restarting) | | `/plugins` | List installed plugins and their status | @@ -157,6 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/goal ` | Set a standing goal Hermes works toward across turns — our take on the Ralph loop. A judge model checks after each turn; if not done, Hermes auto-continues until it is, you pause/clear it, or the turn budget (default 20) is hit. Subcommands: `/goal status`, `/goal pause`, `/goal resume`, `/goal clear`. Safe to run mid-agent for status/pause/clear; setting a new goal requires `/stop` first. See [Persistent Goals](/docs/user-guide/features/goals). | | `/footer [on\|off\|status]` | Toggle the runtime-metadata footer on final replies (shows model, tool counts, timing). | | `/curator [status\|run\|pin\|archive]` | Background skill maintenance controls. | +| `/kanban ` | Drive the multi-profile collaboration board from chat — identical argument surface to the CLI. Bypasses the running-agent guard, so `/kanban unblock t_abc`, `/kanban comment t_abc "…"`, `/kanban list --mine`, etc. work mid-turn. `/kanban create …` auto-subscribes the originating chat to the new task's terminal events. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config. | | `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | | `/commands [page]` | Browse all commands and skills (paginated). | @@ -173,5 +175,5 @@ The messaging gateway supports the following built-in commands inside Telegram, - `/skin`, `/snapshot`, `/gquota`, `/reload`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, `/plugins`, `/busy`, `/indicator`, `/redraw`, `/clear`, `/history`, `/save`, `/copy`, and `/quit` are **CLI-only** commands. - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config. - `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. -- `/status`, `/background`, `/queue`, `/steer`, `/voice`, `/reload-mcp`, `/rollback`, `/debug`, `/fast`, `/footer`, `/curator`, and `/yolo` work in **both** the CLI and the messaging gateway. +- `/status`, `/background`, `/queue`, `/steer`, `/voice`, `/reload-mcp`, `/rollback`, `/debug`, `/fast`, `/footer`, `/curator`, `/kanban`, and `/yolo` work in **both** the CLI and the messaging gateway. - `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord. diff --git a/website/docs/user-guide/features/kanban-tutorial.md b/website/docs/user-guide/features/kanban-tutorial.md index 89b5c1c0247..08f3d7ccc4d 100644 --- a/website/docs/user-guide/features/kanban-tutorial.md +++ b/website/docs/user-guide/features/kanban-tutorial.md @@ -10,7 +10,9 @@ hermes dashboard # opens http://127.0.0.1:9119 in your browser # click Kanban in the left nav ``` -The dashboard is the most comfortable place to learn the system. Everything you see here is also available via `hermes kanban ` on the CLI — the two surfaces share the same SQLite database at `~/.hermes/kanban.db`. +The dashboard is the most comfortable place for **you** to watch the system. Agent workers the dispatcher spawns never see the dashboard or the CLI — they drive the board through a dedicated `kanban_*` [toolset](./kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`). All three surfaces — dashboard, CLI, worker tools — route through the same `~/.hermes/kanban.db`, so the board is consistent no matter which side of the fence a change came from. + +Throughout the tutorial, **code blocks labelled `bash` are commands *you* run.** Code blocks labelled `# worker tool calls` are what the spawned worker's model emits as tool calls — shown here so you can see the loop end-to-end, not because you'd ever run them yourself. ## The board at a glance @@ -57,22 +59,32 @@ hermes kanban create "Write auth integration tests" \ Because `API` has `SCHEMA` as its parent, and `tests` has `API` as its parent, only `SCHEMA` starts in `ready`. The other two sit in `todo` until their parents complete. This is the dependency promotion engine doing its job — no other worker will pick up the test-writing until there's an API to test. -Claim the schema task, do the work, hand off: +On the next dispatcher tick (60s by default, or immediately if you hit **Nudge dispatcher**) the `backend-dev` profile spawns as a worker with `HERMES_KANBAN_TASK=$SCHEMA` in its env. Here's what the worker's tool-call loop looks like from inside the agent: -```bash -hermes kanban claim $SCHEMA +```python +# worker tool calls — NOT commands you run +kanban_show() +# → returns title, body, worker_context, parents, prior attempts, comments -# (you design the schema, commit, etc.) +# (worker reads worker_context, uses terminal/file tools to design the schema, +# write migrations, run its own checks, commit — the real work happens here) -hermes kanban complete $SCHEMA \ - --summary "users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens stored as sessions with type='refresh'" \ - --metadata '{ +kanban_heartbeat(note="schema drafted, writing migrations now") + +kanban_complete( + summary="users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); " + "refresh tokens stored as sessions with type='refresh'", + metadata={ "changed_files": ["migrations/001_users.sql", "migrations/002_sessions.sql"], - "decisions": ["bcrypt for hashing", "JWT for session tokens", "7-day refresh, 15-min access"] - }' + "decisions": ["bcrypt for hashing", "JWT for session tokens", + "7-day refresh, 15-min access"], + }, +) ``` -When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will read `SCHEMA`'s summary and metadata in its context — so it knows the schema decisions without re-reading a long design doc. +`kanban_show` defaults `task_id` to `$HERMES_KANBAN_TASK`, so the worker doesn't need to know its own id. `kanban_complete` writes the summary + metadata onto the current `task_runs` row, closes that run, and transitions the task to `done` — all in one atomic hop through `kanban_db`. + +When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will call `kanban_show()` and see `SCHEMA`'s summary and metadata attached to the parent handoff — so it knows the schema decisions without re-reading a long design doc. Click the completed schema task on the board and the drawer shows everything: @@ -80,7 +92,7 @@ Click the completed schema task on the board and the drawer shows everything: The Run History section at the bottom is the key addition. One attempt: outcome `completed`, worker `@backend-dev`, duration, timestamp, and the handoff summary in full. The metadata blob (`changed_files`, `decisions`) is stored on the run too and surfaced to any downstream worker that reads this parent. -On the CLI: +You can inspect the same data from your terminal at any time — these commands are **you** peeking at the board, not the worker: ```bash hermes kanban show $SCHEMA @@ -125,7 +137,7 @@ Now filter the board to `content-ops` (or just search for "Transcribe") and you Two transcribes done, one running, two ready waiting for the next dispatcher tick. The In Progress column is grouped by profile (the "Lanes by profile" default) so you see each worker's active task without scanning a mixed list. The dispatcher will promote the next ready task to running as soon as the current one completes. With three daemons working on three assignee pools in parallel, the whole content queue drains without further human input. -**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call can pass `--summary "translated 4 pages, style matched existing marketing voice"` and `--metadata '{"duration_seconds": 720, "tokens_used": 2100}'` — useful for analytics and for any downstream task that depends on this one. +**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call emits `kanban_complete(summary="translated 4 pages, style matched existing marketing voice", metadata={"duration_seconds": 720, "tokens_used": 2100})` — useful for analytics and for any downstream task that depends on this one. ## Story 3 — Role pipeline with retry @@ -137,32 +149,64 @@ The dashboard view, filtered by `auth-project`: Three-stage chain visible at once: `Spec: password reset flow` (DONE, pm), `Implement password reset flow` (DONE, backend-dev), `Review password reset PR` (READY, reviewer). Each has its parent in green at the bottom and children as dependencies. -The interesting one is the implementation task, because it was blocked and retried: +The interesting one is the implementation task, because it was blocked and retried. Here's the full three-agent choreography, shown as the tool calls each worker's model makes: -```bash -# PM completes the spec with acceptance criteria in metadata -hermes kanban complete $SPEC \ - --summary "spec approved; POST /forgot-password sends email, GET /reset/:token renders form, POST /reset applies new password" \ - --metadata '{"acceptance": [ +```python +# --- PM worker spawns on $SPEC and writes the acceptance criteria --- +# worker tool calls +kanban_show() +kanban_complete( + summary="spec approved; POST /forgot-password sends email, " + "GET /reset/:token renders form, POST /reset applies new password", + metadata={"acceptance": [ "expired token returns 410", "reused last-3 password returns 400 with message", - "successful reset invalidates all active sessions" - ]}' + "successful reset invalidates all active sessions", + ]}, +) +# → $SPEC is done; $IMPL auto-promotes from todo to ready -# Engineer claims + implements, but review blocks it for missing strength check -hermes kanban claim $IMPL -hermes kanban block $IMPL "Review: password strength check missing, reset link isn't single-use (can be replayed within 30min)" +# --- Engineer worker spawns on $IMPL (first attempt) --- +# worker tool calls +kanban_show() # reads $SPEC's summary + acceptance metadata in worker_context +# (engineer writes code, runs tests, opens PR) +# Reviewer feedback arrives — engineer decides the concerns are valid and blocks +kanban_block( + reason="Review: password strength check missing, reset link isn't " + "single-use (can be replayed within 30min)", +) +# → $IMPL transitions to blocked; run 1 closes with outcome='blocked' +``` -# Engineer iterates, resolves, completes +Now you (the human, or a separate reviewer profile) read the block reason, decide the fix direction is clear, and unblock from the dashboard's "Unblock" button — or from the CLI / slash command: + +```bash hermes kanban unblock $IMPL -hermes kanban claim $IMPL -hermes kanban complete $IMPL \ - --summary "added zxcvbn strength check, reset tokens are now single-use (stored + deleted on success)" \ - --metadata '{ - "changed_files": ["auth/reset.py", "auth/tests/test_reset.py", "migrations/003_single_use_reset_tokens.sql"], +# or from a chat: /kanban unblock $IMPL +``` + +The dispatcher promotes `$IMPL` back to `ready` and, on the next tick, respawns the `backend-dev` worker. This second spawn is a **new run** on the same task: + +```python +# --- Engineer worker spawns on $IMPL (second attempt) --- +# worker tool calls +kanban_show() +# → worker_context now includes the run 1 block reason, so this worker knows +# which two things to fix instead of re-reading the whole spec +# (engineer adds zxcvbn check, makes reset tokens single-use, re-runs tests) +kanban_complete( + summary="added zxcvbn strength check, reset tokens are now single-use " + "(stored + deleted on success)", + metadata={ + "changed_files": [ + "auth/reset.py", + "auth/tests/test_reset.py", + "migrations/003_single_use_reset_tokens.sql", + ], "tests_run": 11, - "review_iteration": 2 - }' + "review_iteration": 2, + }, +) ``` Click the implementation task. The drawer shows **two attempts**: @@ -178,7 +222,7 @@ The reviewer picks up next. When they open `Review password reset PR`, they see: ![Reviewer's drawer view of the pipeline](/img/kanban-tutorial/09-drawer-pipeline-review.png) -The parent link is the completed implementation. When the reviewer's worker calls `build_worker_context`, it pulls the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff. +The parent link is the completed implementation. When the reviewer's worker spawns on `Review password reset PR` and calls `kanban_show()`, the returned `worker_context` includes the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff. ## Story 4 — Circuit breaker and crash recovery @@ -234,18 +278,18 @@ The drawer shows the full two-attempt history: Run 1 — `crashed`, with the error `OOM kill at row 2.3M (process 99999 gone)`. Run 2 — `completed`, with `"strategy": "chunked with LIMIT + WHERE id > last_id"` in its metadata. The retrying worker saw the crash of run 1 in its context and picked a safer strategy; the metadata makes it obvious to a future observer (or postmortem writer) what changed. -## Structured handoff — why `--summary` and `--metadata` matter +## Structured handoff — why `summary` and `metadata` matter -In every story above, workers passed `--summary` and `--metadata` on completion. That's not decoration — it's the primary handoff channel between stages of a workflow. +In every story above, workers called `kanban_complete(summary=..., metadata=...)` at the end. That's not decoration — it's the primary handoff channel between stages of a workflow. -When a worker on task B reads its context, it gets: +When a worker on task B is spawned and calls `kanban_show()`, the `worker_context` it gets back includes: - B's **prior attempts** (previous runs: outcome, summary, error, metadata) so a retrying worker doesn't repeat a failed path. - **Parent task results** — for each parent, the most-recent completed run's summary and metadata — so downstream workers see why and how the upstream work was done. -This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff. +This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally in the parent handoff. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff. -The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case. +The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` (you, from the CLI) is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case. The tool surface doesn't expose a bulk variant at all; `kanban_complete` is always single-task-at-a-time for the same reason. ## Inspecting a task currently running diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md index 7bbea348552..adf52f4a89d 100644 --- a/website/docs/user-guide/features/kanban.md +++ b/website/docs/user-guide/features/kanban.md @@ -10,6 +10,15 @@ description: "Durable SQLite-backed task board for coordinating multiple Hermes Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity. +### Two surfaces: the model talks through tools, you talk through the CLI + +The board has two front doors, both backed by the same `~/.hermes/kanban.db`: + +- **Agents drive the board through a dedicated `kanban_*` toolset** — `kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`. The dispatcher spawns each worker with these tools already in its schema; the model reads its task and hands work off by calling them directly, *not* by shelling out to `hermes kanban`. See [How workers interact with the board](#how-workers-interact-with-the-board) below. +- **You (and scripts, and cron) drive the board through `hermes kanban …`** on the CLI, `/kanban …` as a slash command, or the dashboard. These are for humans and automation — the places without a tool-calling model behind them. + +Both surfaces route through the same `kanban_db` layer, so reads see a consistent view and writes can't drift. The rest of this page shows CLI examples because they're easy to copy-paste, but every CLI verb has a tool-call equivalent the model uses. + This is the shape that covers the workloads `delegate_task` can't: - **Research triage** — parallel researchers + analyst + writer, human-in-the-loop. @@ -57,24 +66,28 @@ They coexist: a kanban worker may call `delegate_task` internally during its run ## Quick start +The commands below are **you** (the human) setting up the board and creating tasks. Once a task is assigned, the dispatcher spawns the assigned profile as a worker, and from there **the model drives the task through `kanban_*` tool calls, not CLI commands** — see [How workers interact with the board](#how-workers-interact-with-the-board). + ```bash -# 1. Create the board +# 1. Create the board (you) hermes kanban init # 2. Start the gateway (hosts the embedded dispatcher) hermes gateway start -# 3. Create a task +# 3. Create a task (you — or an orchestrator agent via kanban_create) hermes kanban create "research AI funding landscape" --assignee researcher -# 4. Watch activity live +# 4. Watch activity live (you) hermes kanban watch -# 5. See the board +# 5. See the board (you) hermes kanban list hermes kanban stats ``` +When the dispatcher picks up `t_abcd` and spawns the `researcher` profile, the very first thing that worker's model does is call `kanban_show()` to read its task. It doesn't run `hermes kanban show t_abcd`. + ### Gateway-embedded dispatcher (default) The dispatcher runs inside the gateway process. Nothing to install, no @@ -127,22 +140,61 @@ hermes kanban block t_abc "need input" --ids t_def t_hij ## How workers interact with the board -When the dispatcher spawns a worker, it sets `HERMES_KANBAN_TASK` in the child's env. That env var is the gate for a dedicated **kanban toolset** — 7 tools that the normal agent schema never sees: +**Workers do not shell out to `hermes kanban`.** When the dispatcher spawns a worker it sets `HERMES_KANBAN_TASK=t_abcd` in the child's env, and that env var flips on a dedicated **kanban toolset** in the model's schema — seven tools that read and mutate the board directly via the Python `kanban_db` layer, same as the CLI does. A running worker calls these like any other tool; it never sees or needs the `hermes kanban` CLI. -| Tool | Purpose | -|---|---| -| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full `worker_context`). Defaults to the env's task id. | -| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. | -| `kanban_block` | Escalate for human input. | -| `kanban_heartbeat` | Signal liveness during long operations. | -| `kanban_comment` | Append to the task thread. | -| `kanban_create` | (Orchestrators) fan out into child tasks. | -| `kanban_link` | (Orchestrators) add dependency edges after the fact. | +| Tool | Purpose | Required params | +|---|---|---| +| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full pre-formatted `worker_context`). Defaults to the env's task id. | — | +| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. | at least one of `summary` / `result` | +| `kanban_block` | Escalate for human input with a `reason`. | `reason` | +| `kanban_heartbeat` | Signal liveness during long operations. Pure side-effect. | — | +| `kanban_comment` | Append a durable note to the task thread. | `task_id`, `body` | +| `kanban_create` | (Orchestrators) fan out into child tasks with an `assignee`, optional `parents`, `skills`, etc. | `title`, `assignee` | +| `kanban_link` | (Orchestrators) add a `parent_id → child_id` dependency edge after the fact. | `parent_id`, `child_id` | -**Why tools and not just shelling to `hermes kanban`?** Three reasons: +A typical worker turn looks like: -1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` inside the container where `hermes` isn't installed and the DB isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend. -2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it. +``` +# Model's tool calls, in order: +kanban_show() # no args — uses HERMES_KANBAN_TASK +# (model reads the returned worker_context, does the work via terminal/file tools) +kanban_heartbeat(note="halfway through — 4 of 8 files transformed") +# (more work) +kanban_complete( + summary="migrated limiter.py to token-bucket; added 14 tests, all pass", + metadata={"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}, +) +``` + +An **orchestrator** worker fans out instead: + +``` +kanban_show() +kanban_create( + title="research ICP funding 2024-2026", + assignee="researcher-a", + body="focus on seed + series A, North America, AI-adjacent", +) +# → returns {"task_id": "t_r1", ...} +kanban_create(title="research ICP funding — EU angle", assignee="researcher-b", body="…") +# → returns {"task_id": "t_r2", ...} +kanban_create( + title="synthesize findings into launch brief", + assignee="writer", + parents=["t_r1", "t_r2"], # promotes to ready when both complete + body="one-pager, 300 words, neutral tone", +) +kanban_complete(summary="decomposed into 2 research tasks + 1 writer; linked dependencies") +``` + +The three "(Orchestrators)" tools — `kanban_create`, `kanban_link`, and `kanban_comment` on foreign tasks — are available to every worker; the convention (enforced by the `kanban-orchestrator` skill) is that worker profiles don't fan out and orchestrator profiles don't execute. + +### Why tools instead of shelling to `hermes kanban` + +Three reasons: + +1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` *inside* the container, where `hermes` isn't installed and `~/.hermes/kanban.db` isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend. +2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it entirely. 3. **Better errors.** Tool results are structured JSON the model can reason about, not stderr strings it has to parse. **Zero schema footprint on normal sessions.** A regular `hermes chat` session has zero `kanban_*` tools in its schema. The `check_fn` on each tool only returns True when `HERMES_KANBAN_TASK` is set, which only happens when the dispatcher spawned this process. No tool bloat for users who never touch kanban. @@ -151,14 +203,14 @@ The `kanban-worker` and `kanban-orchestrator` skills teach the model which tool ### The worker skill -Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle: +Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle in **tool calls**, not CLI commands: 1. On spawn, call `kanban_show()` to read title + body + parent handoffs + prior attempts + full comment thread. -2. `cd $HERMES_KANBAN_WORKSPACE` and do the work there. +2. `cd $HERMES_KANBAN_WORKSPACE` (via the terminal tool) and do the work there. 3. Call `kanban_heartbeat(note="...")` every few minutes during long operations. 4. Complete with `kanban_complete(summary="...", metadata={...})`, or `kanban_block(reason="...")` if stuck. -Load it with: +Load it with (this one is **you**, installing into a profile — not a tool call): ```bash hermes skills install devops/kanban-worker @@ -168,22 +220,9 @@ The dispatcher also auto-passes `--skills kanban-worker` when spawning every wor ### Pinning extra skills to a specific task -Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task: +Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task. -```bash -# CLI — repeat --skill for each extra skill -hermes kanban create "translate README to Japanese" \ - --assignee linguist \ - --skill translation - -# Multiple skills -hermes kanban create "audit auth flow" \ - --assignee reviewer \ - --skill security-pr-audit \ - --skill github-code-review -``` - -From the dashboard's inline create form, type the skills comma-separated into the **skills** field. From another agent (orchestrator pattern), use `kanban_create(skills=[...])`: +**From an orchestrator agent** (the usual case — one agent routing work to another), use the `kanban_create` tool's `skills` array: ``` kanban_create( @@ -191,13 +230,53 @@ kanban_create( assignee="linguist", skills=["translation"], ) + +kanban_create( + title="audit auth flow", + assignee="reviewer", + skills=["security-pr-audit", "github-code-review"], +) ``` +**From a human (CLI / slash command)**, repeat `--skill` for each one: + +```bash +hermes kanban create "translate README to Japanese" \ + --assignee linguist \ + --skill translation + +hermes kanban create "audit auth flow" \ + --assignee reviewer \ + --skill security-pr-audit \ + --skill github-code-review +``` + +**From the dashboard**, type the skills comma-separated into the **skills** field of the inline create form. + These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills ` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install. ### The orchestrator skill -A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook. +A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this as tool-call patterns: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook keyed on `kanban_create` / `kanban_link` / `kanban_comment`. + +A canonical orchestrator turn (two parallel researchers handing off to a writer): + +``` +# Goal from user: "draft a launch post on the ICP funding landscape" +kanban_create(title="research ICP funding, NA angle", assignee="researcher-a", body="…") # → t_r1 +kanban_create(title="research ICP funding, EU angle", assignee="researcher-b", body="…") # → t_r2 +kanban_create( + title="synthesize ICP funding research into launch post draft", + assignee="writer", + parents=["t_r1", "t_r2"], # promoted to 'ready' when both researchers complete + body="one-pager, neutral tone, cite sources inline", +) # → t_w1 +# Optional: add cross-cutting deps discovered later without re-creating tasks +kanban_link(parent_id="t_r1", child_id="t_followup") +kanban_complete( + summary="decomposed into 2 parallel research tasks → 1 synthesis task; writer starts when both researchers finish", +) +``` Load it into your orchestrator profile: @@ -324,6 +403,8 @@ The GUI is deliberately thin. Everything the plugin does is reachable from the C ## CLI command reference +This is the surface **you** (or scripts, cron, the dashboard) use to drive the board. Workers running inside the dispatcher use the `kanban_*` [tool surface](#how-workers-interact-with-the-board) for the same operations — the CLI here and the tools there both route through `kanban_db`, so the two surfaces agree by construction. + ``` hermes kanban init # create kanban.db + print daemon hint hermes kanban create "" [--body ...] [--assignee <profile>] @@ -369,7 +450,57 @@ hermes kanban gc [--event-retention-days N] # workspaces + old events [--log-retention-days N] ``` -All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting. +All commands are also available as a slash command in the interactive CLI and in the messaging gateway (see [`/kanban` slash command](#kanban-slash-command) below). + +## `/kanban` slash command {#kanban-slash-command} + +Every `hermes kanban <action>` verb is also reachable as `/kanban <action>` — from inside an interactive `hermes chat` session **and** from any gateway platform (Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Mattermost, email, SMS). Both surfaces call the exact same `hermes_cli.kanban.run_slash()` entry point that reuses the `hermes kanban` argparse tree, so the argument surface, flags, and output format are identical across CLI, `/kanban`, and `hermes kanban`. You don't have to leave the chat to drive the board. + +``` +/kanban list +/kanban show t_abcd +/kanban create "write launch post" --assignee writer --parent t_research +/kanban comment t_abcd "looks good, ship it" +/kanban unblock t_abcd +/kanban dispatch --max 3 +``` + +Quote multi-word arguments the same way you would on a shell — `run_slash` parses the rest of the line with `shlex.split`, so `"..."` and `'...'` both work. + +### Mid-run usage: `/kanban` bypasses the running-agent guard + +The gateway normally queues slash commands and user messages while an agent is still thinking — that's what stops you from accidentally starting a second turn while the first is in flight. **`/kanban` is explicitly exempted from this guard.** The board lives in `~/.hermes/kanban.db`, not in the running agent's state, so reads (`list`, `show`, `context`, `tail`, `watch`, `stats`, `runs`) and writes (`comment`, `unblock`, `block`, `assign`, `archive`, `create`, `link`, …) all go through immediately, even mid-turn. + +This is the whole point of the separation: + +- A worker blocks waiting on a peer → you send `/kanban unblock t_abcd` from your phone and the dispatcher picks the peer up on its next tick. The blocked worker isn't interrupted — it just stops being blocked. +- You spot a card that needs human context → `/kanban comment t_xyz "use the 2026 schema, not 2025"` lands on the task thread and the *next* run of that task will read it in `kanban_show()`. +- You want to know what your fleet is doing without stopping the orchestrator → `/kanban list --mine` or `/kanban stats` inspects the board without touching your main conversation. + +### Auto-subscribe on `/kanban create` (gateway only) + +When you create a task from the gateway with `/kanban create "…"`, the originating chat (platform + chat id + thread id) is automatically subscribed to that task's terminal events (`completed`, `blocked`, `gave_up`, `crashed`, `timed_out`). You'll get one message back per terminal event — including the first line of the worker's result summary on `completed` — without having to poll or remember the task id. + +``` +you> /kanban create "transcribe today's podcast" --assignee transcriber +bot> Created t_9fc1a3 (ready, assignee=transcriber) + (subscribed — you'll be notified when t_9fc1a3 completes or blocks) + +… ~8 minutes later … + +bot> ✓ t_9fc1a3 completed by transcriber + transcribed 42 minutes, saved to podcast/2026-05-04.md +``` + +Subscriptions auto-remove themselves once the task reaches `done` or `archived`. If you script a create with `--json` (machine output) the auto-subscribe is skipped — the assumption is that scripted callers want to manage subscriptions explicitly via `/kanban notify-subscribe`. + +### Output truncation in messaging + +Gateway platforms have practical message-length caps. If `/kanban list`, `/kanban show`, or `/kanban tail` produce more than ~3800 characters of output, the response is truncated with a `… (truncated; use \`hermes kanban …\` in your terminal for full output)` footer. The CLI surface has no such cap. + +### Autocomplete + +In the interactive CLI, typing `/kanban ` and hitting Tab cycles through the built-in subcommand list (`list`, `ls`, `show`, `create`, `assign`, `link`, `unlink`, `claim`, `comment`, `complete`, `block`, `unblock`, `archive`, `tail`, `dispatch`, `context`, `init`, `gc`). The remaining verbs listed in the CLI reference above (`watch`, `stats`, `runs`, `log`, `assignees`, `heartbeat`, `notify-subscribe`, `notify-list`, `notify-unsubscribe`, `daemon`) also work — they're just not in the autocomplete hint list yet. ## Collaboration patterns @@ -424,16 +555,26 @@ A task is a logical unit of work; a **run** is one attempt to execute it. When t Why two tables instead of just mutating the task: you need **full attempt history** for real-world postmortems ("the second reviewer attempt got to approve, the third merged"), and you need a clean place to hang per-attempt metadata — which files changed, which tests ran, which findings a reviewer noted. Those are run facts, not task facts. -Runs are also where **structured handoff** lives. When a worker completes a task it can pass: +Runs are also where **structured handoff** lives. When a worker completes a task (via `kanban_complete(...)`) it can pass: -- `--result "<short log line>"` — goes on the task row as before (for back-compat). -- `--summary "<human handoff>"` — goes on the run; downstream children see it in their `build_worker_context`. -- `--metadata '{"changed_files": [...], "tests_run": 12}'` — JSON dict on the run; children see it serialized alongside the summary. +- `summary` (tool param) / `--summary` (CLI) — human handoff; goes on the run; downstream children see it in their `build_worker_context`. +- `metadata` (tool param) / `--metadata` (CLI) — free-form JSON dict on the run; children see it serialized alongside the summary. +- `result` (tool param) / `--result` (CLI) — short log line that goes on the task row (legacy field, kept for back-compat). Downstream children read the most recent completed run's summary + metadata for each parent. Retrying workers read the prior attempts on their own task (outcome, summary, error) so they don't repeat a path that already failed. +``` +# What a worker actually does — a tool call, from inside the agent loop: +kanban_complete( + summary="implemented token bucket, keys on user_id with IP fallback, all tests pass", + metadata={"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}, + result="rate limiter shipped", +) +``` + +The same handoff is reachable from the CLI when you (the human) need to close out a task a worker can't — e.g. a task that was abandoned, or one you marked done manually from the dashboard: + ```bash -# Worker completes with a structured handoff: hermes kanban complete t_abcd \ --result "rate limiter shipped" \ --summary "implemented token bucket, keys on user_id with IP fallback, all tests pass" \ From 06a6d6967a0489293c479ea843330fc19ea82a89 Mon Sep 17 00:00:00 2001 From: taeng0204 <taeng02@icloud.com> Date: Sat, 2 May 2026 13:49:26 +0900 Subject: [PATCH 092/171] fix(dashboard): defer unknown-route redirect while dashboard plugins load --- web/src/App.tsx | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/web/src/App.tsx b/web/src/App.tsx index 813f48cc5fc..7598e169bc2 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -80,6 +80,25 @@ function RootRedirect() { return <Navigate to="/sessions" replace />; } +function UnknownRouteFallback({ pluginsLoading }: { pluginsLoading: boolean }) { + if (!pluginsLoading) { + return <Navigate to="/sessions" replace />; + } + + return ( + <div + className="flex min-h-[16rem] min-w-0 items-center justify-center" + aria-busy="true" + aria-live="polite" + > + <div className="flex items-center gap-2 text-sm text-muted-foreground"> + <Spinner /> + <span>Loading dashboard plugins…</span> + </div> + </div> + ); +} + const CHAT_NAV_ITEM: NavItem = { path: "/chat", labelKey: "chat", @@ -582,7 +601,9 @@ export default function App() { ))} <Route path="*" - element={<Navigate to="/sessions" replace />} + element={ + <UnknownRouteFallback pluginsLoading={pluginsLoading} /> + } /> </Routes> From 20a06c586f4f8eacb7a48aa25f9494aa33dfa9f1 Mon Sep 17 00:00:00 2001 From: taeng0204 <taeng02@icloud.com> Date: Mon, 4 May 2026 10:02:24 +0900 Subject: [PATCH 093/171] fix(dashboard): render null instead of flashing spinner during plugin load --- web/src/App.tsx | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/web/src/App.tsx b/web/src/App.tsx index 7598e169bc2..7e1ca19f134 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -81,22 +81,11 @@ function RootRedirect() { } function UnknownRouteFallback({ pluginsLoading }: { pluginsLoading: boolean }) { - if (!pluginsLoading) { - return <Navigate to="/sessions" replace />; + if (pluginsLoading) { + // Render nothing during the plugin-load window — a spinner here would just flash. + return null; } - - return ( - <div - className="flex min-h-[16rem] min-w-0 items-center justify-center" - aria-busy="true" - aria-live="polite" - > - <div className="flex items-center gap-2 text-sm text-muted-foreground"> - <Spinner /> - <span>Loading dashboard plugins…</span> - </div> - </div> - ); + return <Navigate to="/sessions" replace />; } const CHAT_NAV_ITEM: NavItem = { From 64b39d835edc158140323b0b2dd7007488341a90 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 03:07:18 -0700 Subject: [PATCH 094/171] chore(release): AUTHOR_MAP entries for Tier 1d salvage batch --- scripts/release.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 3bd491d27cd..245badbe6c6 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -667,6 +667,21 @@ AUTHOR_MAP = { "balyan.sid@gmail.com": "alt-glitch", "xdord@xdorddeMac-mini.local": "foreverxdord", "k2767567815@gmail.com": "QifengKuang", + "88077783+jjjojoj@users.noreply.github.com": "jjjojoj", + "valda@underscore.jp": "valda", + "lling486@163.com": "M3RCUR2Y", + "buraysandro9@gmail.com": "ygd58", + "ideathinklab01-source@users.noreply.github.com": "ideathinklab01-source", + "27987889@qq.com": "zng8418", + "daniuxie88@proton.me": "DaniuXie", + "panchanler@gmail.com": "ChanlerDev", + "252620095+briandevans@users.noreply.github.com": "briandevans", + "141889580+h0tp-ftw@users.noreply.github.com": "h0tp-ftw", + "chinadbo@foxmail.com": "chinadbo", + "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "xyywtt@gmail.com": "xyiy001", + "charliekerfoot@gmail.com": "CharlieKerfoot", + "grey0202@users.noreply.github.com": "Grey0202", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From 9c64d09610509560aee01e7c9e6efd03a3ff9e8a Mon Sep 17 00:00:00 2001 From: jjjojoj <88077783+jjjojoj@users.noreply.github.com> Date: Mon, 27 Apr 2026 00:39:52 +0800 Subject: [PATCH 095/171] fix(status): show NVIDIA NIM api key status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hermes status was missing NVIDIA API key from its API keys display. Now shows NVIDIA NIM ✓/✗ with key hash like other providers. Fixes #16082 --- hermes_cli/status.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 38b22a03eb7..3a4219fd4b5 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -140,6 +140,7 @@ def show_status(args): "WandB": "WANDB_API_KEY", "ElevenLabs": "ELEVENLABS_API_KEY", "GitHub": "GITHUB_TOKEN", + "NVIDIA NIM": "NVIDIA_API_KEY", } for name, env_var in keys.items(): From 089694438204524fb300162b08b2ed0f901235a2 Mon Sep 17 00:00:00 2001 From: YAMAGUCHI Seiji <valda@underscore.jp> Date: Sat, 25 Apr 2026 11:23:47 +0900 Subject: [PATCH 096/171] fix(cronjob): advertise 'custom:<name>' provider format in tool schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `provider` field in CRONJOB_SCHEMA only showed examples like 'openrouter' and 'anthropic', with no mention of the canonical 'custom:<name>' form required for custom_providers entries. When the user has custom providers configured, LLMs tend to write the bare type name ('custom') because the schema does not advertise the ':<name>' suffix. The bare value then serializes into jobs.json and causes the cron job to fail silently at run time — `_resolve_model_override` treats it as a user-specified provider and skips the pin-to-current fallback, but no provider ever resolves from the bare 'custom' string. Clarifying the schema so the canonical form is discoverable addresses the root cause at the tool-definition boundary. --- tools/cronjob_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 53e778a7dbf..d0ef994aeca 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -513,7 +513,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr "properties": { "provider": { "type": "string", - "description": "Provider name (e.g. 'openrouter', 'anthropic'). Omit to use and pin the current provider." + "description": "Provider name (e.g. 'openrouter', 'anthropic', or 'custom:<name>' for a provider defined in custom_providers config — always include the ':<name>' suffix, never pass the bare 'custom'). Omit to use and pin the current provider." }, "model": { "type": "string", From 145a38a875c0d352c4e14e91ce9992b330240476 Mon Sep 17 00:00:00 2001 From: LLing486 <lling486@163.com> Date: Mon, 27 Apr 2026 01:35:05 +0800 Subject: [PATCH 097/171] fix(agent): preserve dots in model names for Xiaomi MiMo provider Add 'xiaomi' to the _anthropic_preserve_dots() provider whitelist and 'xiaomimimo.com' to the URL-based fallback check. Without this, normalize_model_name() converts mimo-v2.5 to mimo-v2-5, which the Xiaomi API rejects with HTTP 400. Fixes #16156 --- run_agent.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/run_agent.py b/run_agent.py index 6d604f9b599..e3823551dc1 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8203,6 +8203,7 @@ class AIAgent: """True when using an anthropic-compatible endpoint that preserves dots in model names. Alibaba/DashScope keeps dots (e.g. qwen3.5-plus). MiniMax keeps dots (e.g. MiniMax-M2.7). + Xiaomi MiMo keeps dots (e.g. mimo-v2.5, mimo-v2.5-pro). OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free). ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1). AWS Bedrock uses dotted inference-profile IDs @@ -8216,6 +8217,7 @@ class AIAgent: "alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai", "bedrock", + "xiaomi", }: return True base = (getattr(self, "base_url", "") or "").lower() @@ -8225,6 +8227,7 @@ class AIAgent: or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base + or "xiaomimimo.com" in base # AWS Bedrock runtime endpoints — defense-in-depth when # ``provider`` is unset but ``base_url`` still names Bedrock. or "bedrock-runtime." in base From 2d3d1d97361371f519bb369b6618edb67f7aa87e Mon Sep 17 00:00:00 2001 From: ygd58 <buraysandro9@gmail.com> Date: Sun, 26 Apr 2026 17:45:10 +0200 Subject: [PATCH 098/171] fix(tui): use --outdir instead of --outfile in hermes-ink build script esbuild raises 'Must use outdir when there are multiple input files' on Android/Termux ARM64 with esbuild >=0.25. The build script used --outfile=dist/ink-bundle.js which is only valid for a single entry point with no code splitting. Switching to --outdir=dist fixes the error and names the output file dist/entry-exports.js (matching the input file name). Update index.js to import from the new path. Fixes #16072 --- ui-tui/packages/hermes-ink/index.js | 2 +- ui-tui/packages/hermes-ink/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ui-tui/packages/hermes-ink/index.js b/ui-tui/packages/hermes-ink/index.js index 758fef3073d..8c0fa9c5b50 100644 --- a/ui-tui/packages/hermes-ink/index.js +++ b/ui-tui/packages/hermes-ink/index.js @@ -1 +1 @@ -export * from './dist/ink-bundle.js' +export * from './dist/entry-exports.js' diff --git a/ui-tui/packages/hermes-ink/package.json b/ui-tui/packages/hermes-ink/package.json index 8e234913101..8df3c02a4a5 100644 --- a/ui-tui/packages/hermes-ink/package.json +++ b/ui-tui/packages/hermes-ink/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "scripts": { - "build": "esbuild src/entry-exports.ts --bundle --platform=node --format=esm --packages=external --outfile=dist/ink-bundle.js" + "build": "esbuild src/entry-exports.ts --bundle --platform=node --format=esm --packages=external --outdir=dist" }, "sideEffects": true, "main": "./index.js", From d17eff29d5fe6a09e0a0cef49df999d4a70eb073 Mon Sep 17 00:00:00 2001 From: ideathinklab01-source <ideathinklab01-source@users.noreply.github.com> Date: Mon, 27 Apr 2026 11:48:01 +0800 Subject: [PATCH 099/171] fix(delegate): guard _load_config() against delegation: null in config.yaml YAML parses `delegation: null` as Python None. `dict.get(key, {})` only uses the default when the key is *missing*, not when it exists with a None value, so `cfg.get("max_concurrent_children")` crashes with `'NoneType' object has no attribute 'get'`. Same pattern as fd9b692d (fix(tui): tolerate null top-level sections). Use `dict.get(key) or {}` to handle both missing and None-valued keys. Closes: delegation null config crash (same class as #7215, #7346) --- tools/delegate_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index d987385252f..56556316625 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -2337,7 +2337,7 @@ def _load_config() -> dict: try: from cli import CLI_CONFIG - cfg = CLI_CONFIG.get("delegation", {}) + cfg = CLI_CONFIG.get("delegation") or {} if cfg: return cfg except Exception: @@ -2346,7 +2346,7 @@ def _load_config() -> dict: from hermes_cli.config import load_config full = load_config() - return full.get("delegation", {}) + return full.get("delegation") or {} except Exception: return {} From d2ea959fe9a86c0d3d90d552e35b191c67fcc8db Mon Sep 17 00:00:00 2001 From: zng8418 <27987889@qq.com> Date: Sat, 25 Apr 2026 12:38:51 +0800 Subject: [PATCH 100/171] fix(doctor): skip /models health check for MiniMax CN (returns 404) MiniMax China (api.minimaxi.com) does not expose a /v1/models endpoint. The doctor command was probing it and reporting HTTP 404 as a warning, even though the API works correctly for chat completions. Set supports_health_check=False for MiniMax CN so doctor shows "(key configured)" instead of the false 404 warning. Refs #12768, #13757 --- hermes_cli/doctor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 122ed141cc7..7189b42ef4a 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -1096,9 +1096,10 @@ def run_doctor(args): ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), - # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. + # MiniMax global: /v1 endpoint supports /models. ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), - ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True), + # MiniMax CN: /v1 endpoint does NOT support /models (returns 404). + ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", False), ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), From a45bd28598cc4bf24be3025de6fd8814f210945d Mon Sep 17 00:00:00 2001 From: DaniuXie <daniuxie88@proton.me> Date: Sun, 26 Apr 2026 23:10:19 +0800 Subject: [PATCH 101/171] fix(wecom): set SUPPORTS_MESSAGE_EDITING=False to prevent broken streaming --- gateway/platforms/wecom.py | 1 + tests/gateway/test_wecom.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index 453b95a7178..873284de796 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -142,6 +142,7 @@ class WeComAdapter(BasePlatformAdapter): """WeCom AI Bot adapter backed by a persistent WebSocket connection.""" MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH + SUPPORTS_MESSAGE_EDITING = False # Threshold for detecting WeCom client-side message splits. # When a chunk is near the 4000-char limit, a continuation is almost certain. _SPLIT_THRESHOLD = 3900 diff --git a/tests/gateway/test_wecom.py b/tests/gateway/test_wecom.py index 3c4ec357bca..18de405e393 100644 --- a/tests/gateway/test_wecom.py +++ b/tests/gateway/test_wecom.py @@ -36,6 +36,11 @@ class TestWeComRequirements: class TestWeComAdapterInit: + def test_declares_non_editable_message_capability(self): + from gateway.platforms.wecom import WeComAdapter + + assert WeComAdapter.SUPPORTS_MESSAGE_EDITING is False + def test_reads_config_from_extra(self): from gateway.platforms.wecom import WeComAdapter From 6b88f46c54ac6e43a8a1ba40b7f5175cc7be5f7f Mon Sep 17 00:00:00 2001 From: pander <> Date: Mon, 27 Apr 2026 00:03:25 +0800 Subject: [PATCH 102/171] fix(compressor): trigger fallback on timeout errors alongside model-not-found Previously only HTTP 404/503 and specific error strings triggered a fallback to the main model when the summary model was unavailable. Timeout errors (HTTP 408/429/502/504, or error strings containing 'timeout') entered a short cooldown instead, leaving context to grow unbounded for the rest of the session. Add _is_timeout detection alongside _is_model_not_found so that transient timeout errors on the summary model also trigger immediate fallback to the main model, preventing compression failure from cascading. Closes #15935 --- agent/context_compressor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 6c177b90998..44d54d530c3 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -906,15 +906,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio or "does not exist" in _err_str or "no available channel" in _err_str ) + _is_timeout = ( + _status in (408, 429, 502, 504) + or "timeout" in _err_str + ) if ( - _is_model_not_found + (_is_model_not_found or _is_timeout) and self.summary_model and self.summary_model != self.model and not getattr(self, "_summary_model_fallen_back", False) ): self._summary_model_fallen_back = True logging.warning( - "Summary model '%s' not available (%s). " + "Summary model '%s' unavailable (%s). " "Falling back to main model '%s' for compression.", self.summary_model, e, self.model, ) From cba86b7303fa8f9470dd68daa0c6126c6d8760b8 Mon Sep 17 00:00:00 2001 From: YAMAGUCHI Seiji <valda@underscore.jp> Date: Sat, 25 Apr 2026 11:50:17 +0900 Subject: [PATCH 103/171] fix(cronjob): treat bare 'custom' provider as unspecified in override MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_resolve_model_override` treated any non-empty `provider` string from the LLM as user-specified and skipped the pin-to-current-provider fallback. When the LLM wrote bare `'custom'` (instead of the canonical `'custom:<name>'` referring to a custom_providers entry), the value serialized into jobs.json as `"provider": "custom"` and the scheduler could never resolve a provider from it — the cron job failed silently at run time. Treat bare `'custom'` as "no provider supplied" so the current main provider gets pinned instead, matching behaviour for the omitted case. Defence-in-depth complement to a schema-description fix (#15477) that discourages the LLM from emitting bare `'custom'` in the first place. --- tools/cronjob_tools.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index d0ef994aeca..ec4b41b3c7c 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -128,6 +128,15 @@ def _resolve_model_override(model_obj: Optional[Dict[str, Any]]) -> tuple: return (None, None) model_name = (model_obj.get("model") or "").strip() or None provider_name = (model_obj.get("provider") or "").strip() or None + # Bare "custom" is an incomplete spec — the canonical form is + # "custom:<name>" matching a custom_providers entry. LLMs frequently + # supply the bare type because the schema does not advertise the + # ":<name>" suffix, which used to bypass the pinning path below and + # leave the job stored with an unresolvable "custom" provider. Treat + # the bare value as "no provider supplied" so the current main + # provider gets pinned instead. + if provider_name == "custom": + provider_name = None if model_name and not provider_name: # Pin to the current main provider so the job is stable try: From e3461e0b2ac7aaef91dc0ab4103e0e52341056f1 Mon Sep 17 00:00:00 2001 From: ChanlerDev <panchanler@gmail.com> Date: Mon, 27 Apr 2026 02:44:48 +0800 Subject: [PATCH 104/171] fix(cli): remove dead 'q' check from quit command resolution The 'q' alias is defined for 'queue' command in commands.py:93. The hardcoded 'q' in cli.py:5910 was dead code - resolve_command('q') returns the queue CommandDef, so canonical would never be 'q'. Removes the misleading check without changing any behavior: - /quit and /exit still exit (defined aliases) - /q still maps to queue (as intended) --- cli.py | 2 +- tests/cli/test_cli_init.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cli.py b/cli.py index 5f76675b38d..617e5c15b17 100644 --- a/cli.py +++ b/cli.py @@ -6282,7 +6282,7 @@ class HermesCLI: _cmd_def = _resolve_cmd(_base_word) canonical = _cmd_def.name if _cmd_def else _base_word - if canonical in ("quit", "exit", "q"): + if canonical in ("quit", "exit"): return False elif canonical == "help": self.show_help() diff --git a/tests/cli/test_cli_init.py b/tests/cli/test_cli_init.py index e0fa9e4c23a..d2d6398b969 100644 --- a/tests/cli/test_cli_init.py +++ b/tests/cli/test_cli_init.py @@ -123,6 +123,13 @@ class TestBusyInputMode: cli.process_command("/queue follow up") assert cli._pending_input.get_nowait() == "follow up" + def test_q_alias_queues_prompt(self): + """The /q alias should resolve to /queue, not /quit.""" + cli = _make_cli() + cli._agent_running = False + assert cli.process_command("/q follow up") is True + assert cli._pending_input.get_nowait() == "follow up" + def test_queue_mode_routes_busy_enter_to_pending(self): """In queue mode, Enter while busy should go to _pending_input, not _interrupt_queue.""" cli = _make_cli(config_overrides={"display": {"busy_input_mode": "queue"}}) From c8ecb56f27b034187ce8dd24156497997d247c76 Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:44:28 -0700 Subject: [PATCH 105/171] fix(cli): reject invalid argv values from -p/--profile before resolving MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_apply_profile_override()` scans `sys.argv` for `-p / --profile` at module import time. When `hermes_cli.main` is imported inside pytest with `-p no:xdist` on the command line, it picks up `'no:xdist'` as a profile name candidate, then passes it to `resolve_profile_env()` which raises `ValueError` (invalid format), and the function calls `sys.exit(1)` — aborting test collection with an INTERNALERROR before any test runs. The same conflict affects any tool or wrapper that uses `-p` for its own flag and then imports `hermes_cli.main`. Fix: add a format guard immediately after step 1 (explicit flag scan). If `consume == 2` (the value came from `-p <value>`, not `--profile=value`) and the candidate doesn't match the canonical profile-name pattern `[a-z0-9][a-z0-9_-]{0,63}` (mirrored from `hermes_cli.profiles._PROFILE_ID_RE`), discard it and continue as if no `-p` flag was found. The `active_profile` file-based fallback (step 2) only reads a file written by hermes itself, so it always produces valid names and needs no guard. Regression guard: with the guard reverted, importing `hermes_cli.main` with `sys.argv = ['pytest', '-p', 'no:xdist', ...]` raises `SystemExit(1)`. With the guard in place, the import succeeds and `sys.argv` is left intact for pytest. Legitimate `-p coder` still flows through to `resolve_profile_env()` unchanged. Rebased onto current `origin/main` (`e5dad4ac5`) — the prior branch base (`4fade39c9`) was 824 commits behind and the PR was DIRTY / CONFLICTING. The 1.5 HERMES_HOME-set early-return block has since landed between the original insertion point and step 2; the new guard is positioned correctly before the early return so a bogus `-p` value no longer prevents the early return from kicking in. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- hermes_cli/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 6c2544e9059..89cc2e40d98 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -114,6 +114,16 @@ def _apply_profile_override() -> None: consume = 1 break + # 1b. Reject values that can't be valid profile names (e.g. pytest's + # "-p no:xdist" would be misread as profile "no:xdist" otherwise). + # Mirrors hermes_cli.profiles._PROFILE_ID_RE so we never call + # resolve_profile_env() with a value it must reject + sys.exit on. + if profile_name is not None and consume == 2: + import re as _re + if not _re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", profile_name): + profile_name = None + consume = 0 + # 1.5 If HERMES_HOME is already set and no explicit flag was given, trust it. # This lets child processes (relaunch, subprocess) inherit the parent's # profile choice without having to pass --profile again. From 46072425fe286407b552ae25dd9e808bcff948ea Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 26 Apr 2026 13:48:16 +0530 Subject: [PATCH 106/171] fix(model-picker): exclude providers with empty credential pool entries The auth check in list_authenticated_providers used mere key presence in credential_pool to conclude a provider is authenticated. An empty entry (pool_store key with no actual credentials) caused providers like ollama-cloud to appear as authenticated in the model picker even when no OLLAMA_API_KEY was set. The user's picker then offered nemotron-3-super under Ollama Cloud; selecting it routed every subsequent turn to https://ollama.com/v1, which rejected the requests with HTTP 400. Fix: drop the pool_store key-existence check from both section 2 (HERMES_OVERLAYS) and section 2b (CANONICAL_PROVIDERS). The following load_pool().has_credentials() call already handles the legitimate pooled- credential case; checking for an empty key just ahead of it was redundant and actively harmful. --- hermes_cli/model_switch.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 61693b73947..c7edca0a07d 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -1264,11 +1264,7 @@ def list_authenticated_providers( from hermes_cli.auth import _load_auth_store store = _load_auth_store() providers_store = store.get("providers", {}) - pool_store = store.get("credential_pool", {}) - if store and ( - pid in providers_store or hermes_slug in providers_store - or pid in pool_store or hermes_slug in pool_store - ): + if store and (pid in providers_store or hermes_slug in providers_store): has_creds = True except Exception as exc: logger.debug("Auth store check failed for %s: %s", pid, exc) @@ -1364,11 +1360,7 @@ def list_authenticated_providers( from hermes_cli.auth import _load_auth_store _cp_store = _load_auth_store() _cp_providers_store = _cp_store.get("providers", {}) - _cp_pool_store = _cp_store.get("credential_pool", {}) - if _cp_store and ( - _cp.slug in _cp_providers_store - or _cp.slug in _cp_pool_store - ): + if _cp_store and _cp.slug in _cp_providers_store: _cp_has_creds = True except Exception: pass From e69d11d30c9d24de3d7a39551679471c40daa6be Mon Sep 17 00:00:00 2001 From: xyiy001 <xyywtt@gmail.com> Date: Sun, 26 Apr 2026 21:52:45 +0800 Subject: [PATCH 107/171] fix(browser): allow CDP override to pass requirement checks Treat explicit CDP override mode as a valid browser backend even when agent-browser is absent, and add a regression test to prevent false-negative availability gating. --- tests/tools/test_browser_homebrew_paths.py | 7 +++++++ tools/browser_tool.py | 7 ++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py index eb4a699851c..221d2e6602a 100644 --- a/tests/tools/test_browser_homebrew_paths.py +++ b/tests/tools/test_browser_homebrew_paths.py @@ -209,6 +209,13 @@ class TestFindAgentBrowser: class TestBrowserRequirements: + def test_cdp_override_does_not_require_agent_browser_cli(self, monkeypatch): + monkeypatch.setenv("BROWSER_CDP_URL", "ws://127.0.0.1:9222/devtools/browser/test") + monkeypatch.setattr("tools.browser_tool._is_camofox_mode", lambda: False) + monkeypatch.setattr("tools.browser_tool._find_agent_browser", lambda: (_ for _ in ()).throw(FileNotFoundError("not found"))) + + assert check_browser_requirements() is True + def test_termux_requires_real_agent_browser_install_not_npx_fallback(self, monkeypatch): monkeypatch.setenv("TERMUX_VERSION", "0.118.3") monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") diff --git a/tools/browser_tool.py b/tools/browser_tool.py index f9ca1a0af1a..768cec7f714 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2840,7 +2840,12 @@ def check_browser_requirements() -> bool: if _is_camofox_mode(): return True - # The agent-browser CLI is always required + # CDP override mode can connect to an existing remote/local browser endpoint + # without requiring the local agent-browser binary on PATH. + if _get_cdp_override(): + return True + + # The agent-browser CLI is required for local launch and cloud-provider flows. try: browser_cmd = _find_agent_browser() except FileNotFoundError: From 055fde40e0470ead938678702135a066008f960b Mon Sep 17 00:00:00 2001 From: ms-alan <chenb19870707@gmail.com> Date: Mon, 27 Apr 2026 00:02:56 +0800 Subject: [PATCH 108/171] fix(doctor): check global agent-browser when local install not found When agent-browser is globally installed via 'npm install -g agent-browser' but not present in the local node_modules, doctor falsely warns that it's not installed. Add shutil.which('agent-browser') as a fallback check after the local path check. Closes #15951 --- hermes_cli/doctor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 7189b42ef4a..446f576a612 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -935,6 +935,8 @@ def run_doctor(args): agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser" if agent_browser_path.exists(): check_ok("agent-browser (Node.js)", "(browser automation)") + elif shutil.which("agent-browser"): + check_ok("agent-browser", "(browser automation)") else: if _is_termux(): check_info("agent-browser is not installed (expected in the tested Termux path)") From f720751d796aa0b4658a5bce49995bff2b7a9954 Mon Sep 17 00:00:00 2001 From: Exx <exx@example.com> Date: Mon, 4 May 2026 06:20:19 +0000 Subject: [PATCH 109/171] feat(cli,gateway): /new accepts optional session name argument Allow users to start a fresh session and immediately set its title by passing a name to /new (or /reset): /new Refactor auth module Changes: - hermes_cli/commands.py: add args_hint='[name]' to /new command - cli.py: parse title argument in process_command(), pass to new_session() - cli.py: new_session() accepts title=None, sets title via SessionDB - gateway/run.py: _handle_reset_command() parses title, sets on new entry - gateway/session.py: reset_session() accepts optional display_name - tests: add test_new_session_with_title, test_reset_command_with_title, test_new_command_in_help_output All 36 affected tests pass. --- cli.py | 20 ++++++- gateway/run.py | 11 ++++ gateway/session.py | 4 +- hermes_cli/commands.py | 2 +- tests/cli/test_cli_new_session.py | 21 ++++++- tests/gateway/test_title_command.py | 89 ++++++++++++++++++++++++++++- 6 files changed, 138 insertions(+), 9 deletions(-) diff --git a/cli.py b/cli.py index 617e5c15b17..f426fab2bd7 100644 --- a/cli.py +++ b/cli.py @@ -4932,7 +4932,7 @@ class HermesCLI: except Exception: pass - def new_session(self, silent=False): + def new_session(self, silent=False, title=None): """Start a fresh session with a new session ID and cleared agent state.""" if self.agent and self.conversation_history: # Trigger memory extraction on the old session before session_id rotates. @@ -4987,6 +4987,15 @@ class HermesCLI: self.agent._session_db_created = True except Exception: pass + if title and self._session_db: + try: + from hermes_state import SessionDB + sanitized = SessionDB.sanitize_title(title) + if sanitized: + self._session_db.set_session_title(self.session_id, sanitized) + self._pending_title = None + except Exception: + pass # Notify memory providers that session_id rotated to a fresh # conversation. reset=True signals providers to flush accumulated # per-session state (_session_turns, _turn_counter, _document_id). @@ -5006,7 +5015,10 @@ class HermesCLI: self._notify_session_boundary("on_session_reset") if not silent: - print("(^_^)v New session started!") + if title: + print(f"(^_^)v New session started: {title}") + else: + print("(^_^)v New session started!") def _handle_resume_command(self, cmd_original: str) -> None: """Handle /resume <session_id_or_title> — switch to a previous session mid-conversation.""" @@ -6418,7 +6430,9 @@ class HermesCLI: else: _cprint(" Session database not available.") elif canonical == "new": - self.new_session() + parts = cmd_original.split(maxsplit=1) + title = parts[1].strip() if len(parts) > 1 else None + self.new_session(title=title) elif canonical == "resume": self._handle_resume_command(cmd_original) elif canonical == "model": diff --git a/gateway/run.py b/gateway/run.py index f023b0d3497..8c7863c07a6 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6896,6 +6896,17 @@ class GatewayRunner: new_entry = self.session_store.get_or_create_session(source, force_new=True) header = "✨ New session started!" + # Set session title if provided with /new <title> + _title_arg = event.get_command_args().strip() + if _title_arg and self._session_db and new_entry: + try: + from hermes_state import SessionDB + sanitized = SessionDB.sanitize_title(_title_arg) + if sanitized: + self._session_db.set_session_title(new_entry.session_id, sanitized) + except Exception: + pass + # Fire plugin on_session_reset hook (new session guaranteed to exist) try: from hermes_cli.plugins import invoke_hook as _invoke_hook diff --git a/gateway/session.py b/gateway/session.py index 3129f7a325e..16de296e0e7 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1121,7 +1121,7 @@ class SessionStore: self._save() return count - def reset_session(self, session_key: str) -> Optional[SessionEntry]: + def reset_session(self, session_key: str, display_name: Optional[str] = None) -> Optional[SessionEntry]: """Force reset a session, creating a new session ID.""" db_end_session_id = None db_create_kwargs = None @@ -1145,7 +1145,7 @@ class SessionStore: created_at=now, updated_at=now, origin=old_entry.origin, - display_name=old_entry.display_name, + display_name=display_name if display_name is not None else old_entry.display_name, platform=old_entry.platform, chat_type=old_entry.chat_type, is_fresh_reset=True, diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 827e7592c47..c7ddfa0fa05 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -64,7 +64,7 @@ class CommandDef: COMMAND_REGISTRY: list[CommandDef] = [ # Session CommandDef("new", "Start a new session (fresh session ID + history)", "Session", - aliases=("reset",)), + aliases=("reset",), args_hint="[name]"), CommandDef("clear", "Clear screen and start a new session", "Session", cli_only=True), CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session", diff --git a/tests/cli/test_cli_new_session.py b/tests/cli/test_cli_new_session.py index 63d07d26d22..b2763d9b4fb 100644 --- a/tests/cli/test_cli_new_session.py +++ b/tests/cli/test_cli_new_session.py @@ -5,7 +5,7 @@ from __future__ import annotations import importlib import os import sys -from datetime import timedelta +from datetime import datetime, timedelta from unittest.mock import MagicMock, patch from hermes_state import SessionDB @@ -219,3 +219,22 @@ def test_new_session_resets_token_counters(tmp_path): assert comp.last_total_tokens == 0 assert comp.compression_count == 0 assert comp._context_probed is False + + +def test_new_session_with_title(capsys): + """new_session(title=...) creates a session and sets the title.""" + cli = _make_cli() + cli._session_db = MagicMock() + cli.agent = _FakeAgent("old_session_id", datetime.now()) + cli.conversation_history = [] + + cli.new_session(title="My Test Session") + + # Assert set_session_title was called with the new session ID and sanitized title + cli._session_db.set_session_title.assert_called_once() + call_args = cli._session_db.set_session_title.call_args + assert call_args[0][0] == cli.session_id + assert call_args[0][1] == "My Test Session" + + captured = capsys.readouterr() + assert "My Test Session" in captured.out diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py index d5bad6c57a6..4a57771e7dd 100644 --- a/tests/gateway/test_title_command.py +++ b/tests/gateway/test_title_command.py @@ -5,11 +5,12 @@ across all gateway messenger platforms. """ import os -from unittest.mock import MagicMock, patch +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch import pytest -from gateway.config import Platform +from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.base import MessageEvent from gateway.session import SessionSource @@ -206,3 +207,87 @@ class TestTitleInHelp: import inspect source = inspect.getsource(GatewayRunner._handle_message) assert '"title"' in source + + +# --------------------------------------------------------------------------- +# /new with title +# --------------------------------------------------------------------------- + + +class TestResetCommandWithTitle: + """Tests for GatewayRunner._handle_reset_command with a title argument.""" + + @pytest.mark.asyncio + async def test_reset_command_with_title(self): + """Sending /new <title> resets session and sets the title.""" + from datetime import datetime + + from gateway.run import GatewayRunner + from gateway.session import SessionEntry, SessionSource, build_session_key + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + adapter = MagicMock() + adapter.send = AsyncMock() + runner.adapters = {Platform.TELEGRAM: adapter} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner._session_model_overrides = {} + runner._pending_model_notes = {} + runner._background_tasks = set() + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="12345", + chat_id="67890", + user_name="testuser", + ) + session_key = build_session_key(source) + new_session_entry = SessionEntry( + session_key=session_key, + session_id="sess-new", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = new_session_entry + runner.session_store.reset_session.return_value = new_session_entry + runner.session_store._entries = {session_key: new_session_entry} + runner.session_store._generate_session_key.return_value = session_key + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = MagicMock() + runner._agent_cache = {} + runner._agent_cache_lock = None + runner._is_user_authorized = lambda _source: True + runner._format_session_info = lambda: "" + + event = _make_event(text="/new Custom Name") + result = await runner._handle_reset_command(event) + + runner.session_store.reset_session.assert_called_once() + runner._session_db.set_session_title.assert_called_once_with( + "sess-new", "Custom Name" + ) + + +# --------------------------------------------------------------------------- +# /new in help output +# --------------------------------------------------------------------------- + + +class TestNewInHelp: + """Verify /new appears in help text with the [name] args hint.""" + + def test_new_command_in_help_output(self): + """The gateway help output includes /new with the [name] hint.""" + from hermes_cli.commands import gateway_help_lines + lines = gateway_help_lines() + new_line = next((line for line in lines if line.startswith("`/new ")), None) + assert new_line is not None + assert "[name]" in new_line From 5b6d4134765ecfd284a63f3f12d3d3eecc0beaaf Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 02:38:24 -0700 Subject: [PATCH 110/171] fix(cli,gateway): surface title errors from /new <name> The contributor's PR silently swallowed ValueError from SessionDB.set_session_title() with bare except Exception: pass. Users typing /new <title> with an already-in-use title got an untitled session and no feedback. Changes: - cli.py: catch ValueError from both sanitize_title() and set_session_title(); print the error and mark the session untitled in the banner (never echo the rejected title back). - gateway/run.py: append a warning note to the reset reply on title rejection; reflect the accepted title in the header. - Add regression tests for the duplicate-title path in CLI and gateway. Also map exx@example.com -> @exxmen in scripts/release.py. --- cli.py | 21 ++++++++-- gateway/run.py | 20 +++++++-- scripts/release.py | 1 + tests/cli/test_cli_new_session.py | 37 ++++++++++++++++ tests/gateway/test_title_command.py | 65 +++++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 8 deletions(-) diff --git a/cli.py b/cli.py index f426fab2bd7..e8c804a9e89 100644 --- a/cli.py +++ b/cli.py @@ -4988,14 +4988,27 @@ class HermesCLI: except Exception: pass if title and self._session_db: + from hermes_state import SessionDB try: - from hermes_state import SessionDB sanitized = SessionDB.sanitize_title(title) - if sanitized: + except ValueError as e: + _cprint(f" Title rejected: {e}") + sanitized = None + title = None + if sanitized: + try: self._session_db.set_session_title(self.session_id, sanitized) self._pending_title = None - except Exception: - pass + title = sanitized + except ValueError as e: + _cprint(f" {e} — session started untitled.") + title = None + except Exception: + title = None + elif title is not None: + # sanitize_title returned empty (whitespace-only / unprintable) + _cprint(" Title is empty after cleanup — session started untitled.") + title = None # Notify memory providers that session_id rotated to a fresh # conversation. reset=True signals providers to flush accumulated # per-session state (_session_turns, _turn_counter, _document_id). diff --git a/gateway/run.py b/gateway/run.py index 8c7863c07a6..4ca4711cdd0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6898,14 +6898,26 @@ class GatewayRunner: # Set session title if provided with /new <title> _title_arg = event.get_command_args().strip() + _title_note = "" if _title_arg and self._session_db and new_entry: + from hermes_state import SessionDB try: - from hermes_state import SessionDB sanitized = SessionDB.sanitize_title(_title_arg) - if sanitized: + except ValueError as e: + sanitized = None + _title_note = f"\n⚠️ Title rejected: {e}" + if sanitized: + try: self._session_db.set_session_title(new_entry.session_id, sanitized) - except Exception: - pass + header = f"✨ New session started: {sanitized}" + except ValueError as e: + _title_note = f"\n⚠️ {e} — session started untitled." + except Exception: + pass + elif not _title_note: + # sanitize_title returned empty (whitespace-only / unprintable) + _title_note = "\n⚠️ Title is empty after cleanup — session started untitled." + header = header + _title_note # Fire plugin on_session_reset hook (new session guaranteed to exist) try: diff --git a/scripts/release.py b/scripts/release.py index 245badbe6c6..2a4965f0237 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -513,6 +513,7 @@ AUTHOR_MAP = { "nftpoetrist@gmail.com": "nftpoetrist", # PR #18982 "millerc79@users.noreply.github.com": "millerc79", # PR #19033 "hermes@example.com": "shellybotmoyer", # PR #18915 (bot-committed) + "exx@example.com": "exxmen", # PR #19555 "hypnosis.mda@gmail.com": "Hypn0sis", "ywt000818@gmail.com": "OwenYWT", "dhandhalyabhavik@gmail.com": "v1k22", diff --git a/tests/cli/test_cli_new_session.py b/tests/cli/test_cli_new_session.py index b2763d9b4fb..4f453fea32a 100644 --- a/tests/cli/test_cli_new_session.py +++ b/tests/cli/test_cli_new_session.py @@ -238,3 +238,40 @@ def test_new_session_with_title(capsys): captured = capsys.readouterr() assert "My Test Session" in captured.out + + +def test_new_session_with_duplicate_title_surfaces_error(capsys): + """new_session(title=...) handles ValueError from a duplicate-title conflict. + + The session is still created; the title assignment fails; the success banner + must not claim the rejected title as the session name. + """ + cli = _make_cli() + cli._session_db = MagicMock() + cli._session_db.set_session_title.side_effect = ValueError( + "Title 'Dup' is already in use by session abc-123" + ) + cli.agent = _FakeAgent("old_session_id", datetime.now()) + cli.conversation_history = [] + + # Capture warnings printed via cli._cprint. After importlib.reload(), + # the method's __globals__ dict is the one from the live module — patch + # the exact dict the method will read. + warnings: list[str] = [] + method_globals = cli.new_session.__globals__ + original = method_globals["_cprint"] + method_globals["_cprint"] = lambda msg: warnings.append(msg) + try: + cli.new_session(title="Dup") + finally: + method_globals["_cprint"] = original + + cli._session_db.set_session_title.assert_called_once() + joined = "\n".join(warnings) + assert "already in use" in joined + assert "session started untitled" in joined + + # The success banner must NOT claim the rejected title as the session name. + captured = capsys.readouterr() + assert "New session started: Dup" not in captured.out + assert "New session started!" in captured.out diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py index 4a57771e7dd..c09a2202f48 100644 --- a/tests/gateway/test_title_command.py +++ b/tests/gateway/test_title_command.py @@ -274,6 +274,71 @@ class TestResetCommandWithTitle: runner._session_db.set_session_title.assert_called_once_with( "sess-new", "Custom Name" ) + # Header reflects the applied title + assert "Custom Name" in str(result) + + @pytest.mark.asyncio + async def test_reset_command_duplicate_title_surfaces_warning(self): + """/new <title> with an already-in-use title returns a warning in the reply.""" + from datetime import datetime + + from gateway.run import GatewayRunner + from gateway.session import SessionEntry, SessionSource, build_session_key + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + adapter = MagicMock() + adapter.send = AsyncMock() + runner.adapters = {Platform.TELEGRAM: adapter} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner._session_model_overrides = {} + runner._pending_model_notes = {} + runner._background_tasks = set() + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="12345", + chat_id="67890", + user_name="testuser", + ) + session_key = build_session_key(source) + new_session_entry = SessionEntry( + session_key=session_key, + session_id="sess-new", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = new_session_entry + runner.session_store.reset_session.return_value = new_session_entry + runner.session_store._entries = {session_key: new_session_entry} + runner.session_store._generate_session_key.return_value = session_key + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = MagicMock() + runner._session_db.set_session_title.side_effect = ValueError( + "Title 'Dup' is already in use by session abc-123" + ) + runner._agent_cache = {} + runner._agent_cache_lock = None + runner._is_user_authorized = lambda _source: True + runner._format_session_info = lambda: "" + + event = _make_event(text="/new Dup") + result = await runner._handle_reset_command(event) + + runner._session_db.set_session_title.assert_called_once() + reply = str(result) + assert "already in use" in reply + assert "session started untitled" in reply + # Header must NOT claim the rejected title as the session name + assert "New session started: Dup" not in reply # --------------------------------------------------------------------------- From e50809b771de3bf057fa494f02f78fb613d8a926 Mon Sep 17 00:00:00 2001 From: Ioodu <chinadbo@foxmail.com> Date: Mon, 27 Apr 2026 19:09:30 +0800 Subject: [PATCH 111/171] fix(file-tools): cap read_file result size to prevent context window overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set max_result_size_chars=100_000 on the read_file registry entry (was float('inf')), closing the Layer 2 defense-in-depth gap in tool_result_storage.py. The existing Layer 1 guard inside _handle_read_file already returns a JSON error for oversized reads; this aligns the registry cap with every other tool. Update test_read_file_never_persisted → test_read_file_result_size_cap to assert 100_000, and add test_read_file_registry_cap_is_100k as an explicit regression guard against re-introducing float('inf'). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- tests/tools/test_tool_result_storage.py | 17 +++++++++++++++-- tools/file_tools.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py index 0bbb95bbd61..3cea3b59ffa 100644 --- a/tests/tools/test_tool_result_storage.py +++ b/tests/tools/test_tool_result_storage.py @@ -516,12 +516,25 @@ class TestPerToolThresholds: except ImportError: pytest.skip("terminal_tool not importable in test env") - def test_read_file_never_persisted(self): + def test_read_file_result_size_cap(self): from tools.registry import registry try: import tools.file_tools # noqa: F401 val = registry.get_max_result_size("read_file") - assert val == float("inf") + assert val == 100_000 + except ImportError: + pytest.skip("file_tools not importable in test env") + + def test_read_file_registry_cap_is_100k(self): + """Regression test: read_file must have a 100_000 char registry cap (Layer 2 safety net).""" + from tools.registry import registry + try: + import tools.file_tools # noqa: F401 + val = registry.get_max_result_size("read_file") + assert val == 100_000, ( + f"read_file registry cap must be 100_000, got {val!r}. " + "float('inf') is not allowed — it disables the Layer 2 result-size guard." + ) except ImportError: pytest.skip("file_tools not importable in test env") diff --git a/tools/file_tools.py b/tools/file_tools.py index a4187b6aa96..6022eee9124 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -1137,7 +1137,7 @@ def _handle_search_files(args, **kw): output_mode=args.get("output_mode", "content"), context=args.get("context", 0), task_id=tid) -registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf')) +registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=100_000) registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000) registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000) registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=100_000) From 412f2389f14a625074fc0ae5a1bda6f97a1c6d8f Mon Sep 17 00:00:00 2001 From: charliekerfoot <charliekerfoot@gmail.com> Date: Mon, 27 Apr 2026 16:26:06 -0500 Subject: [PATCH 112/171] fix(google_oauth): close TOCTOU window when saving credentials --- agent/google_oauth.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/agent/google_oauth.py b/agent/google_oauth.py index d6b96da6e5f..ede64251e29 100644 --- a/agent/google_oauth.py +++ b/agent/google_oauth.py @@ -489,16 +489,29 @@ def save_credentials(creds: GoogleCredentials) -> Path: """Atomically write creds to disk with 0o600 permissions.""" path = _credentials_path() path.parent.mkdir(parents=True, exist_ok=True) + # Tighten parent dir to 0o700 so siblings can't traverse to the creds file. + # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures. + try: + os.chmod(path.parent, 0o700) + except OSError: + pass payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n" with _credentials_lock(): tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}") try: - with open(tmp_path, "w", encoding="utf-8") as fh: + # Create with 0o600 atomically to close the TOCTOU window where the + # default umask (often 0o644) would briefly expose tokens to other + # local users between open() and chmod(). + fd = os.open( + str(tmp_path), + os.O_WRONLY | os.O_CREAT | os.O_EXCL, + stat.S_IRUSR | stat.S_IWUSR, + ) + with os.fdopen(fd, "w", encoding="utf-8") as fh: fh.write(payload) fh.flush() os.fsync(fh.fileno()) - os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR) atomic_replace(tmp_path, path) finally: try: From a219a0a4df2aeacd5ff7dcdbaf75e0bb6e6ef876 Mon Sep 17 00:00:00 2001 From: Grey0202 <grey0202@users.noreply.github.com> Date: Mon, 4 May 2026 03:17:12 -0700 Subject: [PATCH 113/171] fix(anthropic): strip top-level oneOf/allOf/anyOf from tool input_schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the existing _normalize_tool_input_schema to also drop top-level union keywords that Anthropic's tool schema validator rejects with HTTP 400. Several upstream and plugin tools ship schemas with a top-level oneOf/ allOf/anyOf (common for Pydantic discriminated unions). The existing strip_nullable_unions pass only handles anyOf-with-null patterns; a non-null top-level union keyword sails through and hits the API. Salvage of #16471 — approach folded into the existing normalize helper rather than introducing a parallel _sanitize_input_schema function, to avoid two schema-munging code paths running against the same input. Co-authored-by: Grey0202 <grey0202@users.noreply.github.com> --- agent/anthropic_adapter.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 7cdac560b19..8c468e8686b 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1225,6 +1225,14 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]: ``keep_nullable_hint=False`` because the Anthropic validator does not recognize the OpenAPI-style ``nullable: true`` extension and strict schema-to-grammar converters may reject unknown keywords. + + Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the + Anthropic API rejects union keywords at the schema root with a generic + HTTP 400. Several upstream and plugin tools ship schemas with one of + these keywords at the top level (commonly for Pydantic discriminated + unions). If we land here with those keywords still present after + nullable-union stripping, drop them and fall back to a plain object + schema so the tool still validates at the Anthropic boundary. """ if not schema: return {"type": "object", "properties": {}} @@ -1234,6 +1242,12 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]: normalized = strip_nullable_unions(schema, keep_nullable_hint=False) if not isinstance(normalized, dict): return {"type": "object", "properties": {}} + # Strip top-level union keywords that Anthropic's validator rejects. + banned = {"oneOf", "allOf", "anyOf"} + if banned & normalized.keys(): + normalized = {k: v for k, v in normalized.items() if k not in banned} + if "type" not in normalized: + normalized["type"] = "object" if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict): normalized = {**normalized, "properties": {}} return normalized From 33f554d83cc6a600ec87fe70449b66d40d0b7852 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 03:40:39 -0700 Subject: [PATCH 114/171] feat(kanban-dashboard): workspace kind + path inputs in inline create form (#19679) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #18718. Exposes the existing `workspace_kind` + `workspace_path` fields (already accepted by POST /api/plugins/kanban/tasks) in the dashboard's per-column inline-create form so users can create tasks targeting a git worktree or an explicit directory without dropping back to the CLI. - Add a workspace-kind Select (scratch / worktree / dir) to InlineCreate in plugins/kanban/dashboard/dist/index.js. - Conditionally render a workspace_path Input next to the select when kind != scratch; placeholder tells the user whether the path is required (dir) or optional (worktree — derived from assignee when blank). - Submit wires `workspace_kind` / `workspace_path` into the POST body only when they're non-default, keeping the request shape small and interoperable with older dispatcher versions. E2E verified in a dashboard pointed at the worktree: selecting dir + typing /tmp/test-18718 produces a POST body with {workspace_kind: 'dir', workspace_path: '/tmp/test-18718'} and the task lands in sqlite with those fields set. 42/42 kanban dashboard plugin tests pass. --- plugins/kanban/dashboard/dist/index.js | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index 1b37ef72d47..a818514e2b6 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -919,6 +919,12 @@ const [priority, setPriority] = useState(0); const [parent, setParent] = useState(""); const [skills, setSkills] = useState(""); + // Workspace controls. `scratch` (default) ignores path; `worktree` optionally + // takes a path (dispatcher derives one from the assignee profile otherwise); + // `dir` requires a path. Backend enforces the rule — we only hide/show the + // input here to save vertical space in the common `scratch` case. + const [workspaceKind, setWorkspaceKind] = useState("scratch"); + const [workspacePath, setWorkspacePath] = useState(""); const submit = function () { const trimmed = title.trim(); @@ -938,10 +944,23 @@ .map(function (s) { return s.trim(); }) .filter(function (s) { return s.length > 0; }); if (skillList.length > 0) body.skills = skillList; + // Only send workspace_kind when it's non-default. Keeps the request + // shape small and interoperable with older dispatcher versions. + if (workspaceKind && workspaceKind !== "scratch") { + body.workspace_kind = workspaceKind; + } + const wpTrim = workspacePath.trim(); + if (wpTrim) body.workspace_path = wpTrim; props.onSubmit(body); setTitle(""); setAssignee(""); setPriority(0); setParent(""); setSkills(""); + setWorkspaceKind("scratch"); setWorkspacePath(""); }; + const showPathInput = workspaceKind !== "scratch"; + const pathPlaceholder = workspaceKind === "dir" + ? "workspace path (required, e.g. ~/projects/my-app)" + : "workspace path (optional, derived from assignee if blank)"; + return h("div", { className: "hermes-kanban-inline-create" }, h(Input, { value: title, @@ -978,6 +997,24 @@ title: "Force-load these skills into the worker (in addition to the built-in kanban-worker).", className: "h-7 text-xs", }), + h("div", { className: "flex gap-2" }, + h(Select, { + value: workspaceKind, + onChange: function (e) { setWorkspaceKind(e.target.value); }, + title: "scratch: isolated temp dir (default). worktree: git worktree on the assignee profile. dir: exact path (required below).", + className: "h-7 text-xs w-28", + }, + h(SelectOption, { value: "scratch" }, "scratch"), + h(SelectOption, { value: "worktree" }, "worktree"), + h(SelectOption, { value: "dir" }, "dir"), + ), + showPathInput ? h(Input, { + value: workspacePath, + onChange: function (e) { setWorkspacePath(e.target.value); }, + placeholder: pathPlaceholder, + className: "h-7 text-xs flex-1", + }) : null, + ), h(Select, { value: parent, onChange: function (e) { setParent(e.target.value); }, From af6f9bc2a12682b06fb3632acf5a9cbf01e74a85 Mon Sep 17 00:00:00 2001 From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com> Date: Mon, 4 May 2026 16:27:51 +0530 Subject: [PATCH 115/171] fix: refresh systemd unit on gateway boot (not just start/restart) (#19684) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resilient restart settings from PR #18639 only took effect when the gateway was started via `hermes gateway start` or `hermes gateway restart` — both of which call refresh_systemd_unit_if_needed() which writes the new unit and runs daemon-reload. However, when the gateway self-restarts via exit-code-75 (stale-code detection after `hermes update`, or the /restart command), systemd respawns the process directly without going through any CLI function. The unit file on disk stays stale, and systemd keeps using the old cached settings (StartLimitBurst=5, RestartSec=30) until someone manually runs `hermes gateway restart`. This meant that after PR #18639 was deployed, users who never ran `hermes gateway restart` manually were still vulnerable to the permanent-death-on-network-outage bug. Fix: call refresh_systemd_unit_if_needed() at the top of run_gateway() (the foreground entry point that systemd's ExecStart invokes). This ensures that on every boot — whether triggered by systemd restart, exit-75 respawn, or manual foreground run — the unit definition and daemon state are current. The call is best-effort (exceptions caught) and a no-op when the unit is already current (one stat + string compare). --- hermes_cli/gateway.py | 14 ++++++++++ tests/hermes_cli/test_gateway_service.py | 34 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index c7abea5bad4..7dec83cbff9 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -2492,6 +2492,20 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False): hasn't fully exited yet. """ sys.path.insert(0, str(PROJECT_ROOT)) + + # Refresh the systemd unit definition on every boot so that restart + # settings (RestartSec, StartLimitIntervalSec, etc.) stay current even + # when the process was respawned via exit-code-75 (stale-code or + # /restart) rather than through `hermes gateway restart` which already + # calls refresh_systemd_unit_if_needed(). Without this, a code update + # that ships new unit settings won't take effect until the next manual + # `hermes gateway start/restart` — leaving the gateway vulnerable to + # the exact failure mode the new settings were meant to prevent. + if supports_systemd_services(): + try: + refresh_systemd_unit_if_needed(system=False) + except Exception: + pass # best-effort; don't block gateway startup from gateway.run import start_gateway diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index f2bfa8b870c..a2e3869c8c8 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -107,6 +107,40 @@ class TestSystemdServiceRefresh: ] + def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch): + """run_gateway() should refresh the systemd unit on boot so that + restart settings take effect even when the process was respawned + via exit-code-75 (bypassing `hermes gateway restart`).""" + unit_path = tmp_path / "hermes-gateway.service" + unit_path.write_text("old unit\n", encoding="utf-8") + + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n") + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) + + calls = [] + + def fake_run(cmd, check=True, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + # Prevent run_gateway from actually starting the gateway + def fake_start_gateway(**kwargs): + import asyncio + f = asyncio.Future() + f.set_result(True) + return f + + monkeypatch.setattr("gateway.run.start_gateway", fake_start_gateway) + + gateway_cli.run_gateway() + + assert unit_path.read_text(encoding="utf-8") == "new unit\n" + assert ["systemctl", "--user", "daemon-reload"] in calls + + class TestGeneratedSystemdUnits: def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): unit = gateway_cli.generate_systemd_unit(system=False) From 110387d1494af3fc01dc8431c91a0b4a8dcc847e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:36:18 -0700 Subject: [PATCH 116/171] =?UTF-8?q?docs(open-webui):=20fill=20gaps=20in=20?= =?UTF-8?q?quick=20setup=20=E2=80=94=20verify=20curls,=20ollama=20flag,=20?= =?UTF-8?q?restart=20note=20(#19654)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by @neopabo — the Open WebUI page was missing several steps users hit in practice: - Use hermes config set instead of hand-editing .env (matches current UX) - Restart-gateway note after enabling API_SERVER_ENABLED - curl /health + /v1/models verification step before jumping to Docker - ENABLE_OLLAMA_API=false in both docker run and compose snippets to suppress the empty Ollama backend that otherwise clutters the picker - 15-30s startup wait note for first-run embedding model download - Troubleshooting entry for the empty-Ollama-shadowing case - /v1/models troubleshoot command now includes the Authorization header --- .../docs/user-guide/messaging/open-webui.md | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md index efdf901371b..9c90eb79985 100644 --- a/website/docs/user-guide/messaging/open-webui.md +++ b/website/docs/user-guide/messaging/open-webui.md @@ -26,11 +26,15 @@ Open WebUI talks to Hermes server-to-server, so you do not need `API_SERVER_CORS ### 1. Enable the API server -Add to `~/.hermes/.env`: +```bash +hermes config set API_SERVER_ENABLED true +hermes config set API_SERVER_KEY your-secret-key +``` + +`hermes config set` auto-routes the flag to `config.yaml` and the secret to `~/.hermes/.env`. If the gateway is already running, restart it so the change takes effect: ```bash -API_SERVER_ENABLED=true -API_SERVER_KEY=your-secret-key +hermes gateway stop && hermes gateway ``` ### 2. Start Hermes Agent gateway @@ -45,12 +49,25 @@ You should see: [API Server] API server listening on http://127.0.0.1:8642 ``` -### 3. Start Open WebUI +### 3. Verify the API server is reachable + +```bash +curl -s http://127.0.0.1:8642/health +# {"status": "ok", ...} + +curl -s -H "Authorization: Bearer your-secret-key" http://127.0.0.1:8642/v1/models +# {"object":"list","data":[{"id":"hermes-agent", ...}]} +``` + +If `/health` fails, the gateway didn't pick up `API_SERVER_ENABLED=true` — restart it. If `/v1/models` returns `401`, your `Authorization` header doesn't match `API_SERVER_KEY`. + +### 4. Start Open WebUI ```bash docker run -d -p 3000:8080 \ -e OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1 \ -e OPENAI_API_KEY=your-secret-key \ + -e ENABLE_OLLAMA_API=false \ --add-host=host.docker.internal:host-gateway \ -v open-webui:/app/backend/data \ --name open-webui \ @@ -58,7 +75,11 @@ docker run -d -p 3000:8080 \ ghcr.io/open-webui/open-webui:main ``` -### 4. Open the UI +`ENABLE_OLLAMA_API=false` suppresses the default Ollama backend, which would otherwise show up empty and clutter the model picker. Omit it if you actually have Ollama running alongside. + +First launch takes 15–30 seconds: Open WebUI downloads sentence-transformer embedding models (~150MB) the first time it starts. Wait for `docker logs open-webui` to settle before opening the UI. + +### 5. Open the UI Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see your agent in the model dropdown (named after your profile, or **hermes-agent** for the default profile). Start chatting! @@ -77,6 +98,7 @@ services: environment: - OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1 - OPENAI_API_KEY=your-secret-key + - ENABLE_OLLAMA_API=false extra_hosts: - "host.docker.internal:host-gateway" restart: always @@ -181,8 +203,9 @@ With streaming enabled (the default), you'll see brief inline indicators as tool - **Check the URL has `/v1` suffix**: `http://host.docker.internal:8642/v1` (not just `:8642`) - **Verify the gateway is running**: `curl http://localhost:8642/health` should return `{"status": "ok"}` -- **Check model listing**: `curl http://localhost:8642/v1/models` should return a list with `hermes-agent` +- **Check model listing**: `curl -H "Authorization: Bearer your-secret-key" http://localhost:8642/v1/models` should return a list with `hermes-agent` - **Docker networking**: From inside Docker, `localhost` means the container, not your host. Use `host.docker.internal` or `--network=host`. +- **Empty Ollama backend shadowing the picker**: If you omitted `ENABLE_OLLAMA_API=false`, Open WebUI shows an empty Ollama section above your Hermes models. Restart the container with `-e ENABLE_OLLAMA_API=false` or disable Ollama in **Admin Settings → Connections**. ### Connection test passes but no models load From 844d4a32cecf09ccebbe1147849a648c6182eab8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:40:22 -0700 Subject: [PATCH 117/171] chore(release): AUTHOR_MAP entries for Tier 1e salvage batch --- scripts/release.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 2a4965f0237..4794f5bbfd9 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -683,6 +683,14 @@ AUTHOR_MAP = { "xyywtt@gmail.com": "xyiy001", "charliekerfoot@gmail.com": "CharlieKerfoot", "grey0202@users.noreply.github.com": "Grey0202", + "vominh1919@gmail.com": "vominh1919", + "giwavictor9@gmail.com": "giwaov", + "yoimexex@gmail.com": "Yoimex", + "76803960+atongrun@users.noreply.github.com": "atongrun", + "michaeldanko@icloud.com": "MichaelWDanko", + "xudavid429@gmail.com": "YX234", + "kathy@Kathy.local": "julysir", + "274902531@qq.com": "JanCong", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From d1d2d433877ac80728adf4ba7d69fdcd36949d77 Mon Sep 17 00:00:00 2001 From: vominh1919 <vominh1919@gmail.com> Date: Sun, 26 Apr 2026 19:53:50 +0700 Subject: [PATCH 118/171] fix(test): add skip marker for transcription tests requiring faster_whisper TestTranscribeLocalExtended patches faster_whisper.WhisperModel, which triggers an ImportError when the faster_whisper package is not installed. Added a pytest.mark.skipif marker using importlib.util.find_spec so these tests are gracefully skipped instead of failing with ModuleNotFoundError. --- tests/tools/test_transcription_tools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index 5e4a9ad716e..e5b27d9e4d4 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -414,6 +414,10 @@ class TestTranscribeLocalCommand: # _transcribe_local — additional tests # ============================================================================ +@pytest.mark.skipif( + not __import__("importlib").util.find_spec("faster_whisper"), + reason="faster_whisper not installed", +) class TestTranscribeLocalExtended: def test_model_reuse_on_second_call(self, tmp_path): """Second call with same model should NOT reload the model.""" From 0d563621fbaf6e4d4ccbd0d29e829124b7c85170 Mon Sep 17 00:00:00 2001 From: vominh1919 <vominh1919@gmail.com> Date: Sun, 26 Apr 2026 20:37:39 +0700 Subject: [PATCH 119/171] fix(test): skip bedrock adapter tests when botocore is not installed Six tests in test_bedrock_adapter.py import botocore.exceptions directly (ConnectionClosedError, EndpointConnectionError, ReadTimeoutError, ClientError) without guarding the import. When botocore is not installed (it's an optional dependency), these tests fail with ModuleNotFoundError instead of being gracefully skipped. Added pytest.importorskip('botocore') to each affected test function, following the same pattern used elsewhere in the test suite (e.g. test_voice_mode.py for numpy, test_mcp_oauth.py for mcp). Tests affected: - TestIsStaleConnectionError: 3 tests - TestCallConverseInvalidatesOnStaleError: 3 tests Before: 6 FAIL with ModuleNotFoundError After: 6 SKIP with reason message --- tests/agent/test_bedrock_adapter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py index 2005a6c13c9..27c55cb1e9b 100644 --- a/tests/agent/test_bedrock_adapter.py +++ b/tests/agent/test_bedrock_adapter.py @@ -1283,18 +1283,21 @@ class TestIsStaleConnectionError: """Classifier that decides whether an exception warrants client eviction.""" def test_detects_botocore_connection_closed_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import ConnectionClosedError exc = ConnectionClosedError(endpoint_url="https://bedrock.example") assert is_stale_connection_error(exc) is True def test_detects_botocore_endpoint_connection_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import EndpointConnectionError exc = EndpointConnectionError(endpoint_url="https://bedrock.example") assert is_stale_connection_error(exc) is True def test_detects_botocore_read_timeout(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import ReadTimeoutError exc = ReadTimeoutError(endpoint_url="https://bedrock.example") @@ -1355,6 +1358,7 @@ class TestCallConverseInvalidatesOnStaleError: reconnects instead of reusing the dead socket.""" def test_converse_evicts_client_on_stale_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse, @@ -1381,6 +1385,7 @@ class TestCallConverseInvalidatesOnStaleError: ) def test_converse_stream_evicts_client_on_stale_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse_stream, @@ -1406,6 +1411,7 @@ class TestCallConverseInvalidatesOnStaleError: def test_converse_does_not_evict_on_non_stale_error(self): """Non-stale errors (e.g. ValidationException) leave the client cache alone.""" + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse, From 135b4c8b351cda70da89868b9bc1a78bbbb8cf33 Mon Sep 17 00:00:00 2001 From: vominh1919 <vominh1919@gmail.com> Date: Sun, 26 Apr 2026 19:52:03 +0700 Subject: [PATCH 120/171] fix(mcp): decouple AnyUrl import from mcp dependency AnyUrl was imported inside the same try block as mcp.client.auth, so when the mcp package was not installed, AnyUrl was undefined and _build_client_metadata raised NameError at runtime. Moved the AnyUrl import to its own try/except block so it's available whenever pydantic is installed (which is a core dependency), regardless of whether the mcp SDK is present. Also added pytest.importorskip('mcp') to the three test_build_client_metadata tests that exercise _build_client_metadata, since that function depends on OAuthClientMetadata from the mcp package. --- tests/tools/test_mcp_oauth.py | 3 +++ tools/mcp_oauth.py | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_mcp_oauth.py b/tests/tools/test_mcp_oauth.py index db0342e9933..319620e4127 100644 --- a/tests/tools/test_mcp_oauth.py +++ b/tests/tools/test_mcp_oauth.py @@ -440,6 +440,7 @@ class TestBuildOAuthAuthNonInteractive: def test_build_client_metadata_basic(): """_build_client_metadata returns metadata with expected defaults.""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {"client_name": "Test Client"} @@ -453,6 +454,7 @@ def test_build_client_metadata_basic(): def test_build_client_metadata_without_secret_is_public(): """Without client_secret, token endpoint auth is 'none' (public client).""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {} @@ -463,6 +465,7 @@ def test_build_client_metadata_without_secret_is_public(): def test_build_client_metadata_with_secret_is_confidential(): """With client_secret, token endpoint auth is 'client_secret_post'.""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {"client_secret": "shh"} diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py index 51e243c6c11..80dacdc420c 100644 --- a/tools/mcp_oauth.py +++ b/tools/mcp_oauth.py @@ -53,7 +53,7 @@ logger = logging.getLogger(__name__) # Lazy imports -- MCP SDK with OAuth support is optional # --------------------------------------------------------------------------- -_OAUTH_AVAILABLE = False +_OAUTH_AVAILABLE=False try: from mcp.client.auth import OAuthClientProvider from mcp.shared.auth import ( @@ -61,12 +61,16 @@ try: OAuthClientMetadata, OAuthToken, ) - from pydantic import AnyUrl - _OAUTH_AVAILABLE = True + _OAUTH_AVAILABLE=True except ImportError: logger.debug("MCP OAuth types not available -- OAuth MCP auth disabled") +try: + from pydantic import AnyUrl +except ImportError: + AnyUrl = None # type: ignore[assignment, misc] + # --------------------------------------------------------------------------- # Exceptions From 5ec6baa40060ed677d6a3808fcb4eecc12545827 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:42:38 -0700 Subject: [PATCH 121/171] =?UTF-8?q?feat(kanban):=20multi-project=20boards?= =?UTF-8?q?=20=E2=80=94=20one=20install,=20many=20kanbans=20(#19653)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds first-class board support to kanban so users can separate unrelated streams of work (projects, repos, domains) into isolated queues. Single- project users stay on the 'default' board and see no UI change. Isolation model --------------- - Each board is a directory at `~/.hermes/kanban/boards/<slug>/` with its own `kanban.db`, `workspaces/`, and `logs/`. The 'default' board keeps its legacy path (`~/.hermes/kanban.db`) for back-compat — fresh installs and pre-boards users get zero migration. - Workers spawned by the dispatcher have `HERMES_KANBAN_BOARD` pinned in their env alongside the existing `HERMES_KANBAN_DB` / `HERMES_KANBAN_WORKSPACES_ROOT` pins, so workers physically cannot see other boards' tasks. - The gateway's single dispatcher loop now sweeps every board per tick; per-tick cost is a few extra filesystem stats. - CAS concurrency guarantees are preserved per-board (each board is its own SQLite DB, same WAL+IMMEDIATE machinery as before). CLI --- hermes kanban boards list|create|switch|show|rename|rm hermes kanban --board <slug> <any-subcommand> Board resolution order: `--board` flag → `HERMES_KANBAN_BOARD` env → `~/.hermes/kanban/current` file → `default`. Slug validation is strict: lowercase alphanumerics + hyphens + underscores, 1-64 chars, starts with alphanumeric. Uppercase is auto-downcased; slashes / dots / `..` / control chars are rejected so boards can't name their way out of the boards/ directory. Passive discoverability: when more than one board exists, `hermes kanban list` prints a one-line header ("Board: foo (2 other boards …)") so users who stumble across multi-project never have to hunt for the feature. Invisible for single-board installs. Dashboard --------- - New `BoardSwitcher` component at the top of the Kanban tab: dropdown with all boards + task counts, `+ New board` button, `Archive` button (non-default only). Hidden entirely when only `default` exists and is empty — single-project users never see it. - New `NewBoardDialog` modal: slug / display name / description / icon + "switch to this board after creating" checkbox. - Selected board persists to `localStorage` so browser users don't shift the CLI's active board out from under a terminal they left open. - New `?board=<slug>` query param on every existing endpoint plus a new `/boards` CRUD surface (`GET /boards`, `POST /boards`, `PATCH /boards/<slug>`, `DELETE /boards/<slug>`, `POST /boards/<slug>/switch`). - Events WebSocket is pinned to a board at connection time; switching opens a fresh WS against the new board. Also fixes a pre-existing bug in the plugin's tenant / assignee filters: the SDK's `Select` uses `onValueChange(value)`, not native `onChange(event)`, so those filters silently didn't work. New `selectChangeHandler` helper wires both signatures. Tests ----- 49 new tests in `tests/hermes_cli/test_kanban_boards.py` covering: slug validation (valid / invalid / auto-downcase), path resolution (default = legacy path, named = `boards/<slug>/`, env var override), current-board resolution chain (env > file > default), board CRUD + archive / hard-delete, per-board connection isolation (tasks don't leak), worker spawn env injection (`HERMES_KANBAN_BOARD`, `HERMES_KANBAN_DB`, `HERMES_KANBAN_WORKSPACES_ROOT` all point at the right board), and end-to-end CLI surface. Regression surface: all 264 pre-existing kanban tests continue to pass. Live-tested via the dashboard: created 3 boards (default, hermes-agent, atm10-server), created tasks on each via both CLI (`--board <slug> create`) and dashboard (inline create on the Ready column), confirmed zero cross-board leakage, confirmed `BoardSwitcher` + `NewBoardDialog` work end-to-end in the browser. --- gateway/run.py | 212 +++++--- hermes_cli/kanban.py | 308 +++++++++++ hermes_cli/kanban_db.py | 592 +++++++++++++++++++-- plugins/kanban/dashboard/dist/index.js | 414 ++++++++++++-- plugins/kanban/dashboard/dist/style.css | 54 ++ plugins/kanban/dashboard/plugin_api.py | 243 +++++++-- tests/hermes_cli/test_kanban_boards.py | 483 +++++++++++++++++ website/docs/user-guide/features/kanban.md | 97 +++- 8 files changed, 2191 insertions(+), 212 deletions(-) create mode 100644 tests/hermes_cli/test_kanban_boards.py diff --git a/gateway/run.py b/gateway/run.py index 4ca4711cdd0..28d13994bad 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3277,6 +3277,11 @@ class GatewayRunner: Runs in the gateway event loop; all SQLite work is pushed to a thread via ``asyncio.to_thread`` so the loop never blocks on the WAL lock. Failures in one tick don't stop subsequent ticks. + + **Multi-board:** iterates every board discovered on disk per + tick. Subscriptions live inside each board's own DB and cannot + cross boards, so delivery semantics are unchanged — this is + purely a fan-out of the single-DB poll. """ from gateway.config import Platform as _Platform try: @@ -3309,40 +3314,54 @@ class GatewayRunner: while self._running: try: def _collect(): - conn = _kb.connect() + deliveries: list[dict] = [] + # Enumerate every board on disk. Cheap: a few + # directory stat calls per tick. Missing/empty + # boards are silently skipped. try: - _kb.init_db() # idempotent; handles first-run + boards = _kb.list_boards(include_archived=False) except Exception: - pass - try: - subs = _kb.list_notify_subs(conn) - deliveries: list[dict] = [] - for sub in subs: - cursor, events = _kb.unseen_events_for_sub( - conn, - task_id=sub["task_id"], - platform=sub["platform"], - chat_id=sub["chat_id"], - thread_id=sub.get("thread_id") or "", - kinds=TERMINAL_KINDS, - ) - if not events: - continue - task = _kb.get_task(conn, sub["task_id"]) - deliveries.append({ - "sub": sub, - "cursor": cursor, - "events": events, - "task": task, - }) - return deliveries - finally: - conn.close() + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + for board_meta in boards: + slug = board_meta.get("slug") or _kb.DEFAULT_BOARD + try: + conn = _kb.connect(board=slug) + except Exception: + continue + try: + try: + _kb.init_db(board=slug) # idempotent; handles first-run + except Exception: + pass + subs = _kb.list_notify_subs(conn) + for sub in subs: + cursor, events = _kb.unseen_events_for_sub( + conn, + task_id=sub["task_id"], + platform=sub["platform"], + chat_id=sub["chat_id"], + thread_id=sub.get("thread_id") or "", + kinds=TERMINAL_KINDS, + ) + if not events: + continue + task = _kb.get_task(conn, sub["task_id"]) + deliveries.append({ + "sub": sub, + "cursor": cursor, + "events": events, + "task": task, + "board": slug, + }) + finally: + conn.close() + return deliveries deliveries = await asyncio.to_thread(_collect) for d in deliveries: sub = d["sub"] task = d["task"] + board_slug = d.get("board") platform_str = (sub["platform"] or "").lower() try: plat = _Platform(platform_str) @@ -3350,7 +3369,7 @@ class GatewayRunner: # Unknown platform string; skip and advance cursor so # we don't replay forever. await asyncio.to_thread( - self._kanban_advance, sub, d["cursor"], + self._kanban_advance, sub, d["cursor"], board_slug, ) continue adapter = self.adapters.get(plat) @@ -3440,14 +3459,14 @@ class GatewayRunner: "%s on %s after %d consecutive send failures", sub["task_id"], platform_str, fails, ) - await asyncio.to_thread(self._kanban_unsub, sub) + await asyncio.to_thread(self._kanban_unsub, sub, board_slug) sub_fail_counts.pop(sub_key, None) # Don't advance cursor on send failure — retry next tick. break else: # All events delivered; advance cursor + maybe unsub. await asyncio.to_thread( - self._kanban_advance, sub, d["cursor"], + self._kanban_advance, sub, d["cursor"], board_slug, ) # Unsubscribe when the LAST delivered event is a # terminal kind (the task hit a "no further updates" @@ -3459,7 +3478,7 @@ class GatewayRunner: event_terminal = last_kind in TERMINAL_EVENT_KINDS if task_terminal or event_terminal: await asyncio.to_thread( - self._kanban_unsub, sub, + self._kanban_unsub, sub, board_slug, ) except Exception as exc: logger.warning("kanban notifier tick failed: %s", exc) @@ -3469,10 +3488,16 @@ class GatewayRunner: return await asyncio.sleep(1) - def _kanban_advance(self, sub: dict, cursor: int) -> None: - """Sync helper: advance a subscription's cursor. Runs in to_thread.""" + def _kanban_advance( + self, sub: dict, cursor: int, board: Optional[str] = None, + ) -> None: + """Sync helper: advance a subscription's cursor. Runs in to_thread. + + ``board`` scopes the DB connection to the board that owns this + subscription. Unsub cursors in one board can't touch another's. + """ from hermes_cli import kanban_db as _kb - conn = _kb.connect() + conn = _kb.connect(board=board) try: _kb.advance_notify_cursor( conn, @@ -3485,9 +3510,9 @@ class GatewayRunner: finally: conn.close() - def _kanban_unsub(self, sub: dict) -> None: + def _kanban_unsub(self, sub: dict, board: Optional[str] = None) -> None: from hermes_cli import kanban_db as _kb - conn = _kb.connect() + conn = _kb.connect(board=board) try: _kb.remove_notify_sub( conn, @@ -3565,20 +3590,25 @@ class GatewayRunner: bad_ticks = 0 last_warn_at = 0 - def _tick_once() -> "Optional[object]": - """Run one dispatch_once; return result or None on error. + def _tick_once_for_board(slug: str) -> "Optional[object]": + """Run one dispatch_once for a specific board. - Runs in a worker thread via `asyncio.to_thread`.""" + Runs in a worker thread via `asyncio.to_thread`. `board=slug` + is passed through `dispatch_once` so `resolve_workspace` and + `_default_spawn` see the right paths. The per-board DB is + opened explicitly so concurrent boards never share a + connection handle or accidentally claim across each other. + """ conn = None try: - conn = _kb.connect() + conn = _kb.connect(board=slug) try: - _kb.init_db() # idempotent, handles first-run + _kb.init_db(board=slug) # idempotent, handles first-run except Exception: pass - return _kb.dispatch_once(conn) + return _kb.dispatch_once(conn, board=slug) except Exception: - logger.exception("kanban dispatcher: tick failed") + logger.exception("kanban dispatcher: tick failed on board %s", slug) return None finally: if conn is not None: @@ -3587,49 +3617,77 @@ class GatewayRunner: except Exception: pass - def _ready_nonempty() -> bool: - """Cheap probe: is there at least one ready+assigned+unclaimed task?""" - conn = None + def _tick_once() -> "list[tuple[str, Optional[object]]]": + """Run one dispatch_once per board. Returns (slug, result) pairs. + + Enumerating boards on every tick keeps the dispatcher honest + when users create a new board mid-run: no restart required, + the next tick picks it up automatically. + """ try: - conn = _kb.connect() - row = conn.execute( - "SELECT 1 FROM tasks " - "WHERE status = 'ready' AND assignee IS NOT NULL " - " AND claim_lock IS NULL LIMIT 1" - ).fetchone() - return row is not None + boards = _kb.list_boards(include_archived=False) except Exception: - return False - finally: - if conn is not None: - try: - conn.close() - except Exception: - pass + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + out: list[tuple[str, "Optional[object]"]] = [] + for b in boards: + slug = b.get("slug") or _kb.DEFAULT_BOARD + out.append((slug, _tick_once_for_board(slug))) + return out + + def _ready_nonempty() -> bool: + """Cheap probe: is there a ready+assigned+unclaimed task on ANY board?""" + try: + boards = _kb.list_boards(include_archived=False) + except Exception: + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + for b in boards: + slug = b.get("slug") or _kb.DEFAULT_BOARD + conn = None + try: + conn = _kb.connect(board=slug) + row = conn.execute( + "SELECT 1 FROM tasks " + "WHERE status = 'ready' AND assignee IS NOT NULL " + " AND claim_lock IS NULL LIMIT 1" + ).fetchone() + if row is not None: + return True + except Exception: + continue + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + return False logger.info( "kanban dispatcher: embedded in gateway (interval=%.1fs)", interval ) while self._running: try: - res = await asyncio.to_thread(_tick_once) - if res is not None and getattr(res, "spawned", None): - # Quiet by default — only log when something actually - # happened, so an idle gateway stays silent. - logger.info( - "kanban dispatcher: tick spawned=%d reclaimed=%d " - "crashed=%d timed_out=%d promoted=%d auto_blocked=%d", - len(res.spawned), - res.reclaimed, - len(res.crashed) if hasattr(res.crashed, "__len__") else 0, - len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0, - res.promoted, - len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0, - ) - # Health telemetry + results = await asyncio.to_thread(_tick_once) + any_spawned = False + for slug, res in (results or []): + if res is not None and getattr(res, "spawned", None): + any_spawned = True + # Quiet by default — only log when something actually + # happened, so an idle gateway stays silent. + logger.info( + "kanban dispatcher [%s]: spawned=%d reclaimed=%d " + "crashed=%d timed_out=%d promoted=%d auto_blocked=%d", + slug, + len(res.spawned), + res.reclaimed, + len(res.crashed) if hasattr(res.crashed, "__len__") else 0, + len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0, + res.promoted, + len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0, + ) + # Health telemetry (aggregate across boards) ready_pending = await asyncio.to_thread(_ready_nonempty) - spawned_any = bool(res and getattr(res, "spawned", None)) - if ready_pending and not spawned_any: + if ready_pending and not any_spawned: bad_ticks += 1 else: bad_ticks = 0 diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index 46ec6c32ab4..4befd64fa43 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -169,11 +169,93 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu "or docs/hermes-kanban-v1-spec.pdf for the full design." ), ) + # --- global --board flag --- + # Applies to every subcommand below. When set, scopes all reads and + # writes to that board's DB. When omitted, resolves via the + # HERMES_KANBAN_BOARD env var, then the persisted current-board + # file, then "default". See kanban_db.get_current_board(). + kanban_parser.add_argument( + "--board", + default=None, + metavar="<slug>", + help=( + "Board slug to operate on. Defaults to the current board " + "(set via `hermes kanban boards switch <slug>` or the " + "HERMES_KANBAN_BOARD env var). Use `hermes kanban boards list` " + "to see all boards." + ), + ) sub = kanban_parser.add_subparsers(dest="kanban_action") # --- init --- sub.add_parser("init", help="Create kanban.db if missing (idempotent)") + # --- boards (new in v2: multi-project support) --- + p_boards = sub.add_parser( + "boards", + help="Manage kanban boards (one board per project / workstream)", + description=( + "Boards let you separate unrelated streams of work " + "(projects, repos, domains) into isolated queues. Each " + "board has its own DB, workspaces directory, and dispatcher " + "loop — tasks on one board cannot collide with tasks on " + "another. The first board is 'default' and always exists." + ), + ) + boards_sub = p_boards.add_subparsers(dest="boards_action") + + b_list = boards_sub.add_parser( + "list", aliases=["ls"], + help="List all boards with task counts", + ) + b_list.add_argument("--json", action="store_true") + b_list.add_argument("--all", action="store_true", + help="Include archived boards too") + + b_create = boards_sub.add_parser( + "create", aliases=["new"], + help="Create a new board", + ) + b_create.add_argument("slug", + help="Board slug (kebab-case, e.g. atm10-server)") + b_create.add_argument("--name", default=None, + help="Human-readable display name (defaults to Title Case of slug)") + b_create.add_argument("--description", default=None, + help="Optional description") + b_create.add_argument("--icon", default=None, + help="Optional emoji or single-character icon for the dashboard") + b_create.add_argument("--color", default=None, + help="Optional hex color (e.g. '#8b5cf6') for the dashboard") + b_create.add_argument("--switch", action="store_true", + help="Switch to the new board after creating it") + + b_rm = boards_sub.add_parser( + "rm", aliases=["remove", "delete"], + help="Archive (default) or delete a board", + ) + b_rm.add_argument("slug") + b_rm.add_argument("--delete", action="store_true", + help="Hard-delete the board directory instead of archiving it. " + "Default is to move it to boards/_archived/ so it's recoverable.") + + b_switch = boards_sub.add_parser( + "switch", aliases=["use"], + help="Set the active board for subsequent CLI calls", + ) + b_switch.add_argument("slug") + + boards_sub.add_parser( + "show", aliases=["current"], + help="Print the currently-active board slug", + ) + + b_rename = boards_sub.add_parser( + "rename", + help="Change a board's human-readable display name (slug is immutable)", + ) + b_rename.add_argument("slug") + b_rename.add_argument("name", help="New display name") + # --- create --- p_create = sub.add_parser("create", help="Create a new task") p_create.add_argument("title", help="Task title") @@ -442,6 +524,38 @@ def kanban_command(args: argparse.Namespace) -> int: ) return 0 + # `--board <slug>` applies to every subcommand below by way of an + # env-var pin for the duration of this call. Using HERMES_KANBAN_BOARD + # (rather than threading `board=` through 50+ kb.connect() sites) + # keeps the patch small and inherits the exact same resolution the + # dispatcher uses for workers — consistency is a feature here. + board_override = getattr(args, "board", None) + if board_override: + try: + normed = kb._normalize_board_slug(board_override) + except ValueError as exc: + print(f"kanban: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban: --board requires a slug", file=sys.stderr) + return 2 + # Boards other than 'default' must already exist — typoed slugs + # would otherwise silently create an empty board. + if normed != kb.DEFAULT_BOARD and not kb.board_exists(normed): + print( + f"kanban: board {normed!r} does not exist. " + f"Create it with `hermes kanban boards create {normed}`.", + file=sys.stderr, + ) + return 1 + os.environ["HERMES_KANBAN_BOARD"] = normed + + # Boards management doesn't touch the DB at all — dispatch early so + # fresh installs that haven't initialized any DB can still use + # `hermes kanban boards create …`. + if action == "boards": + return _dispatch_boards(args) + # Auto-initialize the DB before dispatching any subcommand. init_db # is idempotent, so running it every invocation is cheap (one # SELECT against sqlite_master when tables already exist) and @@ -513,6 +627,185 @@ def _profile_author() -> str: return "user" +# --------------------------------------------------------------------------- +# Boards management (hermes kanban boards …) +# --------------------------------------------------------------------------- + +def _dispatch_boards(args: argparse.Namespace) -> int: + """Handle ``hermes kanban boards <action>``. + + Boards management is deliberately separate from the task-level + commands: it operates on the filesystem (board directories, + ``current`` pointer, ``board.json``), not on the per-board SQLite + DB, so a fresh HERMES_HOME that has never called ``kanban init`` + can still run ``boards create`` / ``boards list``. + """ + sub = getattr(args, "boards_action", None) or "list" + if sub in ("list", "ls"): + return _cmd_boards_list(args) + if sub in ("create", "new"): + return _cmd_boards_create(args) + if sub in ("rm", "remove", "delete"): + return _cmd_boards_rm(args) + if sub in ("switch", "use"): + return _cmd_boards_switch(args) + if sub in ("show", "current"): + return _cmd_boards_show(args) + if sub == "rename": + return _cmd_boards_rename(args) + print(f"kanban boards: unknown action {sub!r}", file=sys.stderr) + return 2 + + +def _board_task_counts(slug: str) -> dict[str, int]: + """Return ``{status: count}`` for a board. Safe to call on an empty DB.""" + try: + path = kb.kanban_db_path(board=slug) + if not path.exists(): + return {} + with kb.connect(board=slug) as conn: + rows = conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status" + ).fetchall() + return {r["status"]: int(r["n"]) for r in rows} + except Exception: + return {} + + +def _cmd_boards_list(args: argparse.Namespace) -> int: + include_archived = bool(getattr(args, "all", False)) + boards = kb.list_boards(include_archived=include_archived) + # Enrich each entry with task counts + whether it's the current board. + current = kb.get_current_board() + for b in boards: + b["is_current"] = (b["slug"] == current) + b["counts"] = _board_task_counts(b["slug"]) + b["total"] = sum(b["counts"].values()) + if getattr(args, "json", False): + print(json.dumps(boards, indent=2, ensure_ascii=False)) + return 0 + # Human table: marker (•) for current, slug, display name, counts. + if not boards: + print("(no boards — create one with `hermes kanban boards create <slug>`)") + return 0 + print(f"{'':2s} {'SLUG':24s} {'NAME':28s} COUNTS") + for b in boards: + marker = "●" if b["is_current"] else " " + counts = b["counts"] or {} + counts_str = ( + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) + or "(empty)" + ) + name = b.get("name") or "" + if b.get("archived"): + name += " [archived]" + print(f"{marker:2s} {b['slug']:24s} {name:28s} {counts_str}") + print() + print(f"Current board: {current}") + if len(boards) > 1: + print("Switch boards with `hermes kanban boards switch <slug>`.") + return 0 + + +def _cmd_boards_create(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards create: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban boards create: slug is required", file=sys.stderr) + return 2 + already = kb.board_exists(normed) and normed != kb.DEFAULT_BOARD + meta = kb.create_board( + normed, + name=args.name, + description=args.description, + icon=args.icon, + color=args.color, + ) + verb = "already exists" if already else "created" + print(f"Board {meta['slug']!r} {verb}.") + print(f" Display name: {meta.get('name', '')}") + print(f" DB path: {meta['db_path']}") + if getattr(args, "switch", False): + kb.set_current_board(meta["slug"]) + print(f" Switched to {meta['slug']!r}.") + else: + print(f" Use `hermes kanban boards switch {meta['slug']}` to make it current.") + return 0 + + +def _cmd_boards_rm(args: argparse.Namespace) -> int: + try: + res = kb.remove_board(args.slug, archive=not getattr(args, "delete", False)) + except ValueError as exc: + print(f"kanban boards rm: {exc}", file=sys.stderr) + return 1 + if res["action"] == "archived": + print(f"Board {res['slug']!r} archived → {res['new_path']}") + print("Recover by moving the directory back to " + "<root>/kanban/boards/<slug>/.") + else: + print(f"Board {res['slug']!r} deleted.") + return 0 + + +def _cmd_boards_switch(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards switch: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban boards switch: slug is required", file=sys.stderr) + return 2 + if not kb.board_exists(normed): + print( + f"kanban boards switch: board {normed!r} does not exist. " + f"Create it with `hermes kanban boards create {normed}`.", + file=sys.stderr, + ) + return 1 + kb.set_current_board(normed) + print(f"Active board is now {normed!r}.") + return 0 + + +def _cmd_boards_show(args: argparse.Namespace) -> int: + current = kb.get_current_board() + meta = kb.read_board_metadata(current) + counts = _board_task_counts(current) + total = sum(counts.values()) + print(f"Current board: {current}") + print(f" Display name: {meta.get('name', '')}") + if meta.get("description"): + print(f" Description: {meta['description']}") + print(f" DB path: {meta['db_path']}") + print(f" Tasks: {total} total" + + (f" ({', '.join(f'{k}={v}' for k, v in sorted(counts.items()))})" + if counts else "")) + return 0 + + +def _cmd_boards_rename(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards rename: {exc}", file=sys.stderr) + return 2 + if not normed or not kb.board_exists(normed): + print(f"kanban boards rename: board {args.slug!r} does not exist", + file=sys.stderr) + return 1 + meta = kb.write_board_metadata(normed, name=args.name) + print(f"Board {normed!r} renamed to {meta['name']!r}.") + return 0 + + +# --------------------------------------------------------------------------- + + def _parse_duration(val) -> Optional[int]: """Parse ``30s`` / ``5m`` / ``2h`` / ``1d`` or a raw integer → seconds. @@ -662,6 +955,21 @@ def _cmd_list(args: argparse.Namespace) -> int: if getattr(args, "json", False): print(json.dumps([_task_to_dict(t) for t in tasks], indent=2, ensure_ascii=False)) return 0 + # Passive discoverability: when the user has multiple boards, surface + # which one they're looking at in the list header. Single-board users + # never see this — the feature stays invisible until you opt in. + try: + all_boards = kb.list_boards(include_archived=False) + except Exception: + all_boards = [] + if len(all_boards) > 1: + current = kb.get_current_board() + other_count = len(all_boards) - 1 + print( + f"Board: {current} " + f"({other_count} other board{'s' if other_count != 1 else ''} — " + f"`hermes kanban boards list`)\n" + ) if not tasks: print("(no matching tasks)") return 0 diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 98ee4828d3e..73445699243 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -1,28 +1,56 @@ -"""SQLite-backed Kanban board for multi-profile collaboration. +"""SQLite-backed Kanban board for multi-profile, multi-project collaboration. -The board lives at ``<root>/kanban.db`` where ``<root>`` is the **shared -Hermes root** (the parent of any active profile). Profiles intentionally -collapse onto a single board: it IS the cross-profile coordination -primitive. A worker spawned with ``hermes -p <profile>`` joins the same -board as the dispatcher that claimed the task. The same applies to -``<root>/kanban/workspaces/`` and ``<root>/kanban/logs/``. +In a fresh install the board lives at ``<root>/kanban.db`` where +``<root>`` is the **shared Hermes root** (the parent of any active +profile). Profiles intentionally collapse onto a shared board: it IS +the cross-profile coordination primitive. A worker spawned with +``hermes -p <profile>`` joins the same board as the dispatcher that +claimed the task. The same applies to ``<root>/kanban/workspaces/`` and +``<root>/kanban/logs/``. + +**Multiple boards (projects):** users can create additional boards to +separate unrelated streams of work (e.g. one per project / repo / domain). +Each board is a directory under ``<root>/kanban/boards/<slug>/`` with +its own ``kanban.db``, ``workspaces/``, and ``logs/``. All boards share +the profile's Hermes home but are otherwise isolated: a worker spawned +for a task on board ``atm10-server`` sees only that board's tasks, +cannot enumerate other boards, and its dispatcher ticks don't touch +other boards' DBs. + +The first (and for single-project users, only) board is ``default``. +For back-compat its on-disk DB is ``<root>/kanban.db`` (not +``boards/default/kanban.db``), so installs that predate the boards +feature keep working with zero migration. See :func:`kanban_db_path`. + +Board resolution order (highest precedence first, all optional): + +* ``board=`` argument passed directly to :func:`connect` / :func:`init_db` + (explicit — used by the CLI ``--board`` flag and the dashboard + ``?board=...`` query param). +* ``HERMES_KANBAN_BOARD`` env var (used by the dispatcher to pin workers + to the board their task lives on — workers cannot see other boards). +* ``HERMES_KANBAN_DB`` env var (pins the DB file path directly — legacy + override still honoured; highest precedence when the file path itself + is what the caller wants to force). +* ``<root>/kanban/current`` — a one-line text file holding the slug of + the "currently selected" board. Written by ``hermes kanban boards + switch <slug>``. When absent, the active board is ``default``. In standard installs ``<root>`` is ``~/.hermes``. In Docker / custom deployments where ``HERMES_HOME`` points outside ``~/.hermes`` (e.g. -``/opt/hermes``), ``<root>`` is ``HERMES_HOME``. Three env-var overrides -are available (highest precedence first, all optional): +``/opt/hermes``), ``<root>`` is ``HERMES_HOME``. Legacy env-var +overrides still work: * ``HERMES_KANBAN_DB`` — pin the database file path directly. * ``HERMES_KANBAN_WORKSPACES_ROOT`` — pin the workspaces root directly. -* ``HERMES_KANBAN_HOME`` — pin the umbrella root that anchors all three - kanban paths (db + workspaces + logs). Useful for tests and unusual - deployments where a single override is enough. +* ``HERMES_KANBAN_HOME`` — pin the umbrella root that anchors kanban + paths. Useful for tests and unusual deployments. -The dispatcher injects ``HERMES_KANBAN_DB`` and -``HERMES_KANBAN_WORKSPACES_ROOT`` into the worker subprocess env as a -defense-in-depth measure: even if the worker's ``get_default_hermes_root()`` -resolution somehow disagrees with the dispatcher's (unusual symlink or -Docker layout), the two processes still converge on the same files. +The dispatcher injects ``HERMES_KANBAN_DB``, +``HERMES_KANBAN_WORKSPACES_ROOT``, and ``HERMES_KANBAN_BOARD`` into +worker subprocess env so workers converge on the exact DB the +dispatcher used to claim their task — even under unusual symlink or +Docker layouts. Schema is intentionally small: tasks, task_links, task_comments, task_events. The ``workspace_kind`` field decouples coordination from git @@ -35,6 +63,9 @@ transactions + compare-and-swap (CAS) updates on ``tasks.status`` and ``tasks.claim_lock``. SQLite serializes writers via its WAL lock, so at most one claimer can win any given task. Losers observe zero affected rows and move on -- no retry loops, no distributed-lock machinery. +The CAS coordination is **per-board** — each board is a separate DB, +so multi-board installs get the same atomicity guarantees without any +new locking. """ from __future__ import annotations @@ -42,6 +73,7 @@ from __future__ import annotations import contextlib import json import os +import re import secrets import sqlite3 import sys @@ -81,6 +113,31 @@ _CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment # Paths # --------------------------------------------------------------------------- +DEFAULT_BOARD = "default" + +# Slug validator: lowercase alphanumerics, digits, hyphens; 1–64 chars. +# Strict enough to stop traversal (`..`) and embedded path separators, loose +# enough that kebab-case names like ``atm10-server`` or ``hermes-agent`` +# pass without fuss. Board names with display formatting (spaces, emoji) +# live in ``board.json``; the slug is just the directory name. +_BOARD_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9\-_]{0,63}$") + + +def _normalize_board_slug(slug: Optional[str]) -> Optional[str]: + """Lowercase + strip a slug; validate; return ``None`` for empty.""" + if slug is None: + return None + s = str(slug).strip().lower() + if not s: + return None + if not _BOARD_SLUG_RE.match(s): + raise ValueError( + f"invalid board slug {slug!r}: must be 1-64 chars, lowercase " + f"alphanumerics / hyphens / underscores, not starting with '-' or '_'" + ) + return s + + def kanban_home() -> Path: """Return the shared Hermes root that anchors the kanban board. @@ -104,34 +161,390 @@ def kanban_home() -> Path: return get_default_hermes_root() -def kanban_db_path() -> Path: - """Return the path to the shared ``kanban.db``. +def boards_root() -> Path: + """Return ``<root>/kanban/boards`` — the parent of non-default board dirs. - Anchored at :func:`kanban_home`, not the active profile's - ``HERMES_HOME``, so profile workers and the dispatcher converge on - the same board. ``HERMES_KANBAN_DB`` pins the path directly (highest - precedence) — the dispatcher injects this into worker subprocess env - as defense-in-depth. + ``default`` is intentionally NOT under this directory — its DB lives at + ``<root>/kanban.db`` for back-compat with pre-boards installs. This + function returns the directory where *additional* named boards live, + used by :func:`list_boards` to enumerate them. + """ + return kanban_home() / "kanban" / "boards" + + +def current_board_path() -> Path: + """Return the path to ``<root>/kanban/current``. + + One-line text file written by ``hermes kanban boards switch <slug>`` + to persist the user's board selection across CLI invocations. Absent + by default (meaning: active board is ``default``). + """ + return kanban_home() / "kanban" / "current" + + +def get_current_board() -> str: + """Return the active board slug, honouring the resolution chain. + + Order (highest precedence first): + + 1. ``HERMES_KANBAN_BOARD`` env var (set by the dispatcher on worker + spawn, or manually for ad-hoc overrides). + 2. ``<root>/kanban/current`` on disk (set by ``hermes kanban boards + switch``). + 3. ``DEFAULT_BOARD`` (``"default"``). + + A malformed slug at any step falls through to the next layer with a + best-effort warning — the dispatcher must never crash because a user + hand-edited a file. + """ + env = os.environ.get("HERMES_KANBAN_BOARD", "").strip() + if env: + try: + normed = _normalize_board_slug(env) + if normed: + return normed + except ValueError: + pass + try: + f = current_board_path() + if f.exists(): + val = f.read_text(encoding="utf-8").strip() + if val: + try: + normed = _normalize_board_slug(val) + if normed: + return normed + except ValueError: + pass + except OSError: + pass + return DEFAULT_BOARD + + +def set_current_board(slug: str) -> Path: + """Persist ``slug`` as the active board. Returns the file written. + + Writes ``<root>/kanban/current``. The caller should validate the slug + exists first (via :func:`board_exists`) — this function does not — + so that ``hermes kanban boards switch <typo>`` returns an error + instead of silently pointing at nothing. + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + path = current_board_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(normed + "\n", encoding="utf-8") + return path + + +def clear_current_board() -> None: + """Remove ``<root>/kanban/current`` so the active board reverts to ``default``.""" + try: + current_board_path().unlink() + except FileNotFoundError: + pass + + +def board_dir(board: Optional[str] = None) -> Path: + """Return the on-disk directory for ``board``. + + ``default`` is ``<root>/kanban/boards/default/`` **for metadata only** + (board.json + workspaces/ + logs/). Its DB file stays at + ``<root>/kanban.db`` for back-compat — see :func:`kanban_db_path`. + + All other boards live at ``<root>/kanban/boards/<slug>/`` with + everything inside that directory including the ``kanban.db``. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + return boards_root() / slug + + +def board_exists(board: Optional[str] = None) -> bool: + """Return True if the board has a DB or a metadata dir on disk. + + ``default`` is considered to always exist — its DB is created + on first :func:`connect` and there's no way for it to be missing + in a configuration where the kanban feature is usable at all. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + if slug == DEFAULT_BOARD: + return True + d = board_dir(slug) + return d.is_dir() or (d / "kanban.db").exists() + + +def kanban_db_path(board: Optional[str] = None) -> Path: + """Return the path to the ``kanban.db`` for ``board``. + + Resolution (highest precedence first): + + 1. ``HERMES_KANBAN_DB`` env var — pins the path directly. Honoured for + back-compat and for the dispatcher→worker handoff (defense in + depth: dispatcher injects this into worker env so workers are + immune to any path-resolution disagreement). + 2. When ``board`` arg is None, the active board from + :func:`get_current_board` is used. + 3. Board ``default`` → ``<root>/kanban.db`` (back-compat path). + Other boards → ``<root>/kanban/boards/<slug>/kanban.db``. """ override = os.environ.get("HERMES_KANBAN_DB", "").strip() if override: return Path(override).expanduser() - return kanban_home() / "kanban.db" + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban.db" + return board_dir(slug) / "kanban.db" -def workspaces_root() -> Path: +def workspaces_root(board: Optional[str] = None) -> Path: """Return the directory under which ``scratch`` workspaces are created. - Anchored at :func:`kanban_home` so workspace paths are stable across - profile workers spawned by the dispatcher. + Anchored per-board so workspaces don't leak between projects. ``HERMES_KANBAN_WORKSPACES_ROOT`` pins the path directly (highest - precedence) — the dispatcher injects this into worker subprocess env - as defense-in-depth. + precedence) — the dispatcher injects this into worker env. + + ``default`` keeps the legacy path ``<root>/kanban/workspaces/`` so + that existing scratch workspaces from before the boards feature are + preserved. Other boards use ``<root>/kanban/boards/<slug>/workspaces/``. """ override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip() if override: return Path(override).expanduser() - return kanban_home() / "kanban" / "workspaces" + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban" / "workspaces" + return board_dir(slug) / "workspaces" + + +def worker_logs_dir(board: Optional[str] = None) -> Path: + """Return the directory under which per-task worker logs are written. + + ``default`` keeps the legacy path ``<root>/kanban/logs/``. Other + boards use ``<root>/kanban/boards/<slug>/logs/``. Logs follow the + board — makes ``hermes kanban log`` unambiguous even when multiple + boards have tasks with the same id. + """ + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban" / "logs" + return board_dir(slug) / "logs" + + +def board_metadata_path(board: Optional[str] = None) -> Path: + """Return the path to ``board.json`` for ``board``. + + Stores display metadata (display name, description, icon, color, + created_at). The on-disk slug is the canonical identity; this file + is purely for presentation in the CLI / dashboard. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + return board_dir(slug) / "board.json" + + +def _default_board_display_name(slug: str) -> str: + """Turn a slug into a reasonable default display name. + + ``atm10-server`` → ``Atm10 Server``. Users can override via + ``board.json`` but the default should look presentable in the + dashboard without any follow-up editing. + """ + return " ".join(part.capitalize() for part in slug.replace("_", "-").split("-") if part) or slug + + +def read_board_metadata(board: Optional[str] = None) -> dict: + """Return ``board.json`` contents (or synthesized defaults). + + Never raises — a missing / malformed ``board.json`` falls back to a + synthesised entry so the dashboard always has something to render. + Includes the canonical ``slug`` and ``db_path`` so the caller + doesn't need to reconstruct them. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + meta: dict[str, Any] = { + "slug": slug, + "name": _default_board_display_name(slug), + "description": "", + "icon": "", + "color": "", + "created_at": None, + "archived": False, + } + try: + p = board_metadata_path(slug) + if p.exists(): + raw = json.loads(p.read_text(encoding="utf-8")) + if isinstance(raw, dict): + # Never let the metadata file claim a different slug than + # its directory — trust the filesystem. + raw["slug"] = slug + meta.update(raw) + except (OSError, json.JSONDecodeError): + pass + meta["db_path"] = str(kanban_db_path(slug)) + return meta + + +def write_board_metadata( + board: Optional[str], + *, + name: Optional[str] = None, + description: Optional[str] = None, + icon: Optional[str] = None, + color: Optional[str] = None, + archived: Optional[bool] = None, +) -> dict: + """Create / update ``board.json`` for ``board``. + + Preserves any existing fields not mentioned in the call. Sets + ``created_at`` on first write. Returns the resulting metadata dict. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + meta = read_board_metadata(slug) + # Preserve existing DB-derived fields — they get re-computed each + # read but shouldn't be written into board.json. + meta.pop("db_path", None) + if name is not None: + meta["name"] = str(name).strip() or _default_board_display_name(slug) + if description is not None: + meta["description"] = str(description) + if icon is not None: + meta["icon"] = str(icon) + if color is not None: + meta["color"] = str(color) + if archived is not None: + meta["archived"] = bool(archived) + if not meta.get("created_at"): + meta["created_at"] = int(time.time()) + path = board_metadata_path(slug) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps(meta, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + meta["db_path"] = str(kanban_db_path(slug)) + return meta + + +def create_board( + slug: str, + *, + name: Optional[str] = None, + description: Optional[str] = None, + icon: Optional[str] = None, + color: Optional[str] = None, +) -> dict: + """Create a new board directory + DB + metadata. Idempotent. + + Returns the resulting metadata. Raises :class:`ValueError` for a + malformed slug; returns the existing metadata (not an error) if the + board already exists — matching ``mkdir -p`` semantics. + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + meta = write_board_metadata( + normed, + name=name, + description=description, + icon=icon, + color=color, + ) + # Touch the DB so list_boards() sees it immediately. + init_db(board=normed) + return meta + + +def list_boards(*, include_archived: bool = True) -> list[dict]: + """Enumerate all boards that exist on disk. + + Always includes ``default`` (even when the ``boards/default/`` + metadata dir doesn't exist, because its DB is at the legacy path). + Other boards are discovered by scanning ``boards/`` for subdirectories + that either contain a ``kanban.db`` or a ``board.json``. + + Returns a list of metadata dicts, sorted with ``default`` first and + the rest alphabetically. + """ + entries: list[dict] = [] + seen: set[str] = set() + + # Default board is always first. + entries.append(read_board_metadata(DEFAULT_BOARD)) + seen.add(DEFAULT_BOARD) + + root = boards_root() + if root.is_dir(): + for child in sorted(root.iterdir(), key=lambda p: p.name.lower()): + if not child.is_dir(): + continue + slug = child.name + # Keep slug normalisation soft for discovery — but skip dirs + # that don't parse as valid slugs so we don't surface junk. + try: + normed = _normalize_board_slug(slug) + except ValueError: + continue + if not normed or normed in seen: + continue + has_db = (child / "kanban.db").exists() + has_meta = (child / "board.json").exists() + if not (has_db or has_meta): + continue + meta = read_board_metadata(normed) + if meta.get("archived") and not include_archived: + continue + entries.append(meta) + seen.add(normed) + return entries + + +def remove_board(slug: str, *, archive: bool = True) -> dict: + """Remove or archive a board. + + ``archive=True`` (default) moves the board's directory to + ``<root>/kanban/boards/_archived/<slug>-<timestamp>/`` so the data + is recoverable. ``archive=False`` deletes the directory outright. + + The ``default`` board cannot be removed — raises :class:`ValueError`. + Returns a summary dict describing what happened (``{"slug", "action", + "new_path"}``). + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + if normed == DEFAULT_BOARD: + raise ValueError("the 'default' board cannot be removed") + d = board_dir(normed) + if not d.exists(): + raise ValueError(f"board {normed!r} does not exist") + + # If the user removed the currently-active board, revert to default. + if get_current_board() == normed: + clear_current_board() + + if archive: + archive_root = boards_root() / "_archived" + archive_root.mkdir(parents=True, exist_ok=True) + ts = int(time.time()) + target = archive_root / f"{normed}-{ts}" + # Avoid collision on rapid double-archives. + suffix = 1 + while target.exists(): + target = archive_root / f"{normed}-{ts}-{suffix}" + suffix += 1 + d.rename(target) + return {"slug": normed, "action": "archived", "new_path": str(target)} + else: + import shutil + shutil.rmtree(d) + return {"slug": normed, "action": "deleted", "new_path": ""} # --------------------------------------------------------------------------- @@ -429,7 +842,11 @@ CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_ _INITIALIZED_PATHS: set[str] = set() -def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: +def connect( + db_path: Optional[Path] = None, + *, + board: Optional[str] = None, +) -> sqlite3.Connection: """Open (and initialize if needed) the kanban DB. WAL mode is enabled on every connection; it's a no-op after the first @@ -439,8 +856,19 @@ def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: fresh installs and test harnesses that construct `connect()` directly don't have to remember a separate init step. Subsequent connections skip the schema check via a module-level path cache. + + Path resolution: + + * ``db_path`` explicit → used as-is (legacy callers, tests). + * ``board`` explicit → resolves to that board's DB. + * Neither → :func:`kanban_db_path` resolves via + ``HERMES_KANBAN_DB`` env → ``HERMES_KANBAN_BOARD`` env → + ``<root>/kanban/current`` → ``default``. """ - path = db_path or kanban_db_path() + if db_path is not None: + path = db_path + else: + path = kanban_db_path(board=board) path.parent.mkdir(parents=True, exist_ok=True) resolved = str(path.resolve()) needs_init = resolved not in _INITIALIZED_PATHS @@ -459,7 +887,11 @@ def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: return conn -def init_db(db_path: Optional[Path] = None) -> Path: +def init_db( + db_path: Optional[Path] = None, + *, + board: Optional[str] = None, +) -> Path: """Create the schema if it doesn't exist; return the path used. Kept as a public entry point so CLI ``hermes kanban init`` and the @@ -470,7 +902,10 @@ def init_db(db_path: Optional[Path] = None) -> Path: external tools that upgrade an old DB file — can call this to force re-migration. """ - path = db_path or kanban_db_path() + if db_path is not None: + path = db_path + else: + path = kanban_db_path(board=board) path.parent.mkdir(parents=True, exist_ok=True) resolved = str(path.resolve()) # Clear the cache entry so the underlying connect() re-runs the @@ -1574,13 +2009,13 @@ def archive_task(conn: sqlite3.Connection, task_id: str) -> bool: # Workspace resolution # --------------------------------------------------------------------------- -def resolve_workspace(task: Task) -> Path: +def resolve_workspace(task: Task, *, board: Optional[str] = None) -> Path: """Resolve (and create if needed) the workspace for a task. - - ``scratch``: a fresh dir under ``<kanban-root>/kanban/workspaces/<id>/``, - where ``<kanban-root>`` is the shared Hermes root (see - :func:`kanban_home`). The path is the same for the dispatcher and - every profile worker, so handoff is path-stable. + - ``scratch``: a fresh dir under ``<board-root>/workspaces/<id>/``, + where ``<board-root>`` is the active board's root. The path is the + same for the dispatcher and every profile worker, so handoff is + path-stable. - ``dir:<path>``: the path stored in ``workspace_path``. Created if missing. MUST be absolute — relative paths are rejected to prevent confused-deputy traversal where ``../../../tmp/attacker`` @@ -1607,7 +2042,7 @@ def resolve_workspace(task: Task) -> Path: f"{task.workspace_path!r}; workspace paths must be absolute" ) else: - p = workspaces_root() / task.id + p = workspaces_root(board=board) / task.id p.mkdir(parents=True, exist_ok=True) return p if kind == "dir": @@ -2021,6 +2456,7 @@ def dispatch_once( dry_run: bool = False, max_spawn: Optional[int] = None, failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT, + board: Optional[str] = None, ) -> DispatchResult: """Run one dispatcher tick. @@ -2029,15 +2465,17 @@ def dispatch_once( 2. Reclaim crashed running tasks (host-local PID no longer alive). 3. Promote todo -> ready where all parents are done. 4. For each ready task with an assignee, atomically claim and call - ``spawn_fn(task, workspace_path) -> Optional[int]``. The return - value (if any) is recorded as ``worker_pid`` so subsequent ticks - can detect crashes before the TTL expires. + ``spawn_fn(task, workspace_path, board) -> Optional[int]``. The + return value (if any) is recorded as ``worker_pid`` so subsequent + ticks can detect crashes before the TTL expires. Spawn failures are counted per-task. After ``failure_limit`` consecutive failures the task is auto-blocked with the last error as its reason — prevents the dispatcher from thrashing forever on an unfixable task. ``spawn_fn`` defaults to ``_default_spawn``. Tests pass a stub. + ``board`` pins workspace/log/db resolution for this tick to a specific + board. When omitted, the current-board resolution chain is used. """ result = DispatchResult() result.reclaimed = release_stale_claims(conn) @@ -2064,7 +2502,7 @@ def dispatch_once( if claimed is None: continue try: - workspace = resolve_workspace(claimed) + workspace = resolve_workspace(claimed, board=board) except Exception as exc: auto = _record_spawn_failure( conn, claimed.id, f"workspace: {exc}", @@ -2077,7 +2515,18 @@ def dispatch_once( set_workspace_path(conn, claimed.id, str(workspace)) _spawn = spawn_fn if spawn_fn is not None else _default_spawn try: - pid = _spawn(claimed, str(workspace)) + # Back-compat: older spawn_fn signatures accept only + # (task, workspace). Test stubs in the suite rely on that. + # Introspect the callable and pass `board` only when supported. + import inspect + try: + sig = inspect.signature(_spawn) + if "board" in sig.parameters: + pid = _spawn(claimed, str(workspace), board=board) + else: + pid = _spawn(claimed, str(workspace)) + except (TypeError, ValueError): + pid = _spawn(claimed, str(workspace)) if pid: _set_worker_pid(conn, claimed.id, int(pid)) _clear_spawn_failures(conn, claimed.id) @@ -2116,13 +2565,23 @@ def _rotate_worker_log(log_path: Path, max_bytes: int) -> None: pass -def _default_spawn(task: Task, workspace: str) -> Optional[int]: +def _default_spawn( + task: Task, + workspace: str, + *, + board: Optional[str] = None, +) -> Optional[int]: """Fire-and-forget ``hermes -p <profile> chat -q ...`` subprocess. Returns the spawned child's PID so the dispatcher can detect crashes before the claim TTL expires. The child's completion is still observed via the ``complete`` / ``block`` transitions the worker writes itself; the PID check is a safety net for crashes, OOM kills, and Ctrl+C. + + ``board`` pins the child's kanban context to that board: the child's + ``HERMES_KANBAN_DB`` / ``HERMES_KANBAN_BOARD`` / workspaces_root env + vars all resolve to the same board the dispatcher claimed the task + from. Workers cannot accidentally see other boards. """ import subprocess if not task.assignee: @@ -2140,8 +2599,13 @@ def _default_spawn(task: Task, workspace: str) -> Optional[int]: # dispatcher's. Belt-and-braces with the `get_default_hermes_root()` # resolution in `kanban_home()` — symmetric resolution is the norm, # but unusual symlink / Docker layouts are caught here too. - env["HERMES_KANBAN_DB"] = str(kanban_db_path()) - env["HERMES_KANBAN_WORKSPACES_ROOT"] = str(workspaces_root()) + env["HERMES_KANBAN_DB"] = str(kanban_db_path(board=board)) + env["HERMES_KANBAN_WORKSPACES_ROOT"] = str(workspaces_root(board=board)) + # Board slug — the final defense-in-depth pin. If the worker ever + # resolves kanban paths without the DB / workspaces env vars, the + # board slug still forces it to the right directory. + resolved_board = _normalize_board_slug(board) or get_current_board() + env["HERMES_KANBAN_BOARD"] = resolved_board # HERMES_PROFILE is the author the kanban_comment tool defaults to. # `hermes -p <assignee>` activates the profile, but the env var is # what the tool reads — set it explicitly here so comments are @@ -2176,10 +2640,11 @@ def _default_spawn(task: Task, workspace: str) -> Optional[int]: "chat", "-q", prompt, ]) - # Redirect output to a per-task log under <kanban-root>/kanban/logs/. - # Anchored at the shared kanban root, not the worker's profile home, - # so `hermes kanban tail` reads the same file the worker writes to. - log_dir = kanban_home() / "kanban" / "logs" + # Redirect output to a per-task log under <board-root>/logs/. + # Anchored at the board root (not the shared kanban root), so + # `hermes kanban log` on a specific board reads its own file and + # logs don't collide across boards that happen to share task ids. + log_dir = worker_logs_dir(board=board) log_dir.mkdir(parents=True, exist_ok=True) log_path = log_dir / f"{task.id}.log" _rotate_worker_log(log_path, DEFAULT_LOG_ROTATE_BYTES) @@ -2660,11 +3125,14 @@ def gc_events( def gc_worker_logs( *, older_than_seconds: int = 30 * 24 * 3600, + board: Optional[str] = None, ) -> int: """Delete worker log files older than ``older_than_seconds``. Returns the number of files removed. Kept separate from ``gc_events`` because - log files live on disk, not in SQLite.""" - log_dir = kanban_home() / "kanban" / "logs" + log files live on disk, not in SQLite. Scoped to ``board`` (defaults + to the active board) — per-board isolation means deleting logs from + board A cannot touch board B's logs.""" + log_dir = worker_logs_dir(board=board) if not log_dir.exists(): return 0 cutoff = time.time() - older_than_seconds @@ -2683,19 +3151,25 @@ def gc_worker_logs( # Worker log accessor # --------------------------------------------------------------------------- -def worker_log_path(task_id: str) -> Path: +def worker_log_path(task_id: str, *, board: Optional[str] = None) -> Path: """Return the path to a worker's log file. The file may not exist - (task never spawned, or log already GC'd).""" - return kanban_home() / "kanban" / "logs" / f"{task_id}.log" + (task never spawned, or log already GC'd). + + When ``board`` is None, resolves via the active board (env var → + current-board file → default). The dispatcher always passes the + board explicitly to avoid any resolution ambiguity when multiple + boards exist.""" + return worker_logs_dir(board=board) / f"{task_id}.log" def read_worker_log( task_id: str, *, tail_bytes: Optional[int] = None, + board: Optional[str] = None, ) -> Optional[str]: """Read the worker log for ``task_id``. Returns None if the file doesn't exist. If ``tail_bytes`` is set, only the last N bytes are returned (useful for the dashboard drawer which shouldn't page megabytes).""" - path = worker_log_path(task_id) + path = worker_log_path(task_id, board=board) if not path.exists(): return None try: diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index a818514e2b6..3bdd92d47e1 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -63,6 +63,53 @@ const API = "/api/plugins/kanban"; const MIME_TASK = "text/x-hermes-task"; + // localStorage key for the user's selected board. Independent of the + // CLI's on-disk ``<root>/kanban/current`` pointer so browser users + // can inspect any board without shifting the CLI's active board out + // from under a terminal they left open. + const LS_BOARD_KEY = "hermes.kanban.selectedBoard"; + + function readSelectedBoard() { + try { + const v = window.localStorage.getItem(LS_BOARD_KEY); + return (v || "").trim() || null; + } catch (_e) { return null; } + } + + function writeSelectedBoard(slug) { + try { + if (slug && slug !== "default") window.localStorage.setItem(LS_BOARD_KEY, slug); + else window.localStorage.removeItem(LS_BOARD_KEY); + } catch (_e) { /* ignore quota / private mode */ } + } + + function withBoard(url, board) { + // Append ?board=<slug> when a non-default board is active. Omitted + // for default so the URL stays clean and the backend falls through + // to its own resolution chain (env var → ``current`` file → + // default) which is already correct. + if (!board || board === "default") return url; + const sep = url.indexOf("?") >= 0 ? "&" : "?"; + return `${url}${sep}board=${encodeURIComponent(board)}`; + } + + // The SDK's Select component fires ``onValueChange(value)`` directly + // (it's a shadcn-style popup, not a native <select>). Older plugin + // code calls ``onChange({target: {value}})`` which silently never + // fires. This helper wires both signatures so a setter works with + // either API — use it as: + // + // h(Select, {..., ...selectChangeHandler(setState), ...}) + function selectChangeHandler(setter) { + return { + onValueChange: function (v) { setter(v == null ? "" : v); }, + onChange: function (e) { + const v = e && e.target ? e.target.value : e; + setter(v == null ? "" : v); + }, + }; + } + // ------------------------------------------------------------------------- // Minimal safe markdown renderer. // @@ -245,7 +292,19 @@ // ------------------------------------------------------------------------- function KanbanPage() { - const [board, setBoard] = useState(null); + const [board, setBoard] = useState(() => readSelectedBoard() || "default"); + const [boardList, setBoardList] = useState([]); // [{slug, name, counts, ...}] + const [showNewBoard, setShowNewBoard] = useState(false); + + const [kanbanBoard, setKanbanBoard] = useState(null); // the grid data + // Alias so the rest of the function can keep using `board` semantically + // for the grid data (card columns + tenants + assignees) without + // colliding with the selected-board slug above. History: the old + // component had `const [board, setBoard]` for the grid data. We + // renamed the grid data to `kanbanBoard` so the more useful name + // (`board`) belongs to the selected slug. + const boardData = kanbanBoard; + const setBoardData = setKanbanBoard; const [config, setConfig] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); @@ -292,9 +351,9 @@ if (tenantFilter) qs.set("tenant", tenantFilter); if (includeArchived) qs.set("include_archived", "true"); const url = qs.toString() ? `${API}/board?${qs}` : `${API}/board`; - return SDK.fetchJSON(url) + return SDK.fetchJSON(withBoard(url, board)) .then(function (data) { - setBoard(data); + setBoardData(data); cursorRef.current = data.latest_event_id || 0; setError(null); }) @@ -302,7 +361,26 @@ setError(String(err && err.message ? err.message : err)); }) .finally(function () { setLoading(false); }); - }, [tenantFilter, includeArchived]); + }, [tenantFilter, includeArchived, board]); + + // --- load list of boards for the switcher ------------------------------ + const loadBoardList = useCallback(function () { + return SDK.fetchJSON(`${API}/boards`) + .then(function (data) { + const boards = (data && data.boards) || []; + setBoardList(boards); + // If the stored slug isn't in the list any longer (board was + // deleted in the CLI while dashboard was open), fall back to + // default so the UI doesn't hang on a 404. + if (board !== "default" && !boards.find(function (b) { return b.slug === board; })) { + setBoard("default"); + writeSelectedBoard("default"); + } + }) + .catch(function () { /* non-fatal */ }); + }, [board]); + + useEffect(function () { loadBoardList(); }, [loadBoardList]); const scheduleReload = useCallback(function () { if (reloadTimerRef.current) return; @@ -324,16 +402,21 @@ // --- WebSocket --------------------------------------------------------- useEffect(function () { - if (!board) return undefined; + if (!boardData) return undefined; wsClosedRef.current = false; function openWs() { if (wsClosedRef.current) return; const token = window.__HERMES_SESSION_TOKEN__ || ""; const proto = window.location.protocol === "https:" ? "wss:" : "ws:"; - const qs = new URLSearchParams({ + const qsParams = { since: String(cursorRef.current || 0), token: token, - }); + }; + // Pin the WS stream to the currently-selected board so events + // from other boards don't bleed in. Only set for non-default so + // single-board installs keep the cleaner URL. + if (board && board !== "default") qsParams.board = board; + const qs = new URLSearchParams(qsParams); const url = `${proto}//${window.location.host}${API}/events?${qs}`; let ws; try { ws = new WebSocket(url); } catch (_e) { return; } @@ -372,11 +455,11 @@ wsClosedRef.current = true; try { wsRef.current && wsRef.current.close(); } catch (_e) { /* noop */ } }; - }, [!!board, scheduleReload]); + }, [!!boardData, board, scheduleReload]); // --- filtering ---------------------------------------------------------- const filteredBoard = useMemo(function () { - if (!board) return null; + if (!boardData) return null; const q = search.trim().toLowerCase(); const filterTask = function (t) { if (assigneeFilter && t.assignee !== assigneeFilter) return false; @@ -386,18 +469,18 @@ } return true; }; - return Object.assign({}, board, { - columns: board.columns.map(function (col) { + return Object.assign({}, boardData, { + columns: boardData.columns.map(function (col) { return Object.assign({}, col, { tasks: col.tasks.filter(filterTask) }); }), }); - }, [board, assigneeFilter, search]); + }, [boardData, assigneeFilter, search]); // --- actions ------------------------------------------------------------ const moveTask = useCallback(function (taskId, newStatus) { const confirmMsg = DESTRUCTIVE_TRANSITIONS[newStatus]; if (confirmMsg && !window.confirm(confirmMsg)) return; - setBoard(function (b) { + setBoardData(function (b) { if (!b) return b; let moved = null; const columns = b.columns.map(function (col) { @@ -413,7 +496,7 @@ } return Object.assign({}, b, { columns }); }); - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(taskId)}`, { + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(taskId)}`, board), { method: "PATCH", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ status: newStatus }), @@ -421,10 +504,10 @@ setError(`Move failed: ${err.message || err}`); loadBoard(); }); - }, [loadBoard]); + }, [loadBoard, board]); const createTask = useCallback(function (body) { - return SDK.fetchJSON(`${API}/tasks`, { + return SDK.fetchJSON(withBoard(`${API}/tasks`, board), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -437,9 +520,10 @@ setError("Task created, but: " + res.warning); } loadBoard(); + loadBoardList(); // refresh counts in the switcher return res; }); - }, [loadBoard]); + }, [loadBoard, loadBoardList, board]); const toggleSelected = useCallback(function (id, additive) { setSelectedIds(function (prev) { @@ -455,7 +539,7 @@ if (selectedIds.size === 0) return; if (confirmMsg && !window.confirm(confirmMsg)) return; const body = Object.assign({ ids: Array.from(selectedIds) }, patch); - SDK.fetchJSON(`${API}/tasks/bulk`, { + SDK.fetchJSON(withBoard(`${API}/tasks/bulk`, board), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -470,14 +554,50 @@ loadBoard(); }) .catch(function (e) { setError(String(e.message || e)); }); - }, [selectedIds, loadBoard, clearSelected]); + }, [selectedIds, loadBoard, clearSelected, board]); + + // --- board switching ---------------------------------------------------- + const switchBoard = useCallback(function (nextSlug) { + if (!nextSlug || nextSlug === board) return; + // Optimistic UI: clear the current grid + show loading, reset the + // event cursor so the WS reopens aligned to the new board's + // latest_event_id on the next loadBoard. + setBoardData(null); + cursorRef.current = 0; + setLoading(true); + setBoard(nextSlug); + writeSelectedBoard(nextSlug); + }, [board]); + + const createNewBoard = useCallback(function (payload) { + return SDK.fetchJSON(`${API}/boards`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }).then(function (res) { + loadBoardList(); + const slug = res && res.board && res.board.slug; + if (slug && payload.switch) switchBoard(slug); + return res; + }); + }, [loadBoardList, switchBoard]); + + const deleteBoard = useCallback(function (slug) { + if (!slug || slug === "default") return Promise.resolve(); + return SDK.fetchJSON(`${API}/boards/${encodeURIComponent(slug)}`, { + method: "DELETE", + }).then(function () { + loadBoardList(); + if (board === slug) switchBoard("default"); + }); + }, [board, loadBoardList, switchBoard]); // --- render ------------------------------------------------------------- - if (loading && !board) { + if (loading && !boardData) { return h("div", { className: "p-8 text-sm text-muted-foreground" }, "Loading Kanban board…"); } - if (error && !board) { + if (error && !boardData) { return h(Card, null, h(CardContent, { className: "p-6" }, h("div", { className: "text-sm text-destructive" }, @@ -493,15 +613,28 @@ return h(ErrorBoundary, null, h("div", { className: "hermes-kanban flex flex-col gap-4" }, - h(BoardToolbar, { + h(BoardSwitcher, { board: board, + boardList: boardList, + onSwitch: switchBoard, + onNewClick: function () { setShowNewBoard(true); }, + onDeleteBoard: deleteBoard, + }), + showNewBoard ? h(NewBoardDialog, { + onCancel: function () { setShowNewBoard(false); }, + onCreate: function (payload) { + return createNewBoard(payload).then(function () { setShowNewBoard(false); }); + }, + }) : null, + h(BoardToolbar, { + board: boardData, tenantFilter, setTenantFilter, assigneeFilter, setAssigneeFilter, includeArchived, setIncludeArchived, laneByProfile, setLaneByProfile, search, setSearch, onNudgeDispatch: function () { - SDK.fetchJSON(`${API}/dispatch?max=8`, { method: "POST" }) + SDK.fetchJSON(withBoard(`${API}/dispatch?max=8`, board), { method: "POST" }) .then(loadBoard) .catch(function (e) { setError(String(e.message || e)); }); }, @@ -509,7 +642,7 @@ }), selectedIds.size > 0 ? h(BulkActionBar, { count: selectedIds.size, - assignees: (board && board.assignees) || [], + assignees: (boardData && boardData.assignees) || [], onApply: applyBulk, onClear: clearSelected, }) : null, @@ -522,20 +655,215 @@ onMove: moveTask, onOpen: setSelectedTaskId, onCreate: createTask, - allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + allTasks: boardData.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), }), selectedTaskId ? h(TaskDrawer, { taskId: selectedTaskId, + boardSlug: board, onClose: function () { setSelectedTaskId(null); }, onRefresh: loadBoard, renderMarkdown: renderMd, - allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + allTasks: boardData.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), eventTick: taskEventTick[selectedTaskId] || 0, }) : null, ), ); } + // ------------------------------------------------------------------------- + // Board switcher (multi-project) + // ------------------------------------------------------------------------- + + function BoardSwitcher(props) { + const list = props.boardList || []; + const current = list.find(function (b) { return b.slug === props.board; }); + const currentName = current && current.name ? current.name : props.board; + const currentTotal = current ? current.total : 0; + const hasMultipleBoards = list.length > 1; + + // Hide entirely when only the default board exists AND it's empty — + // single-project users never see boards UI unless they ask for it. + // We show the [+ New board] affordance as soon as any board has a + // task (so the user can discover multi-project before they need it) + // OR when any non-default board exists. + const totalAcrossAllBoards = list.reduce(function (n, b) { return n + (b.total || 0); }, 0); + const shouldShow = hasMultipleBoards || totalAcrossAllBoards > 0; + if (!shouldShow) { + return h("div", { + className: "hermes-kanban-boardswitcher-compact", + title: "Boards let you separate unrelated streams of work", + }, + h(Button, { + onClick: props.onNewClick, + size: "sm", + className: "h-7 text-xs", + }, "+ New board"), + ); + } + + return h("div", { className: "hermes-kanban-boardswitcher" }, + h("div", { className: "hermes-kanban-boardswitcher-inner" }, + h("div", { className: "flex flex-col gap-0.5" }, + h("div", { className: "text-[11px] uppercase tracking-wider text-muted-foreground" }, + "Board"), + h("div", { className: "flex items-center gap-2" }, + h(Select, Object.assign({ + value: props.board, + className: "h-8 min-w-[220px]", + "aria-label": "Switch kanban board", + }, selectChangeHandler(function (v) { if (v) props.onSwitch(v); })), + list.map(function (b) { + const label = b.total > 0 + ? `${b.name || b.slug} · ${b.total}` + : (b.name || b.slug); + return h(SelectOption, { key: b.slug, value: b.slug }, label); + }), + ), + h("span", { className: "text-xs text-muted-foreground" }, + `${currentTotal || 0} task${currentTotal === 1 ? "" : "s"}`), + ), + ), + h("div", { className: "flex-1" }), + h(Button, { + onClick: props.onNewClick, + size: "sm", + className: "h-8", + }, "+ New board"), + props.board !== "default" + ? h(Button, { + onClick: function () { + const msg = + `Archive board '${currentName}'? ` + + `It will be moved to boards/_archived/ so you can recover it later. ` + + `Tasks on this board will no longer appear anywhere in the UI.`; + if (window.confirm(msg)) props.onDeleteBoard(props.board); + }, + size: "sm", + className: "h-8", + title: "Archive this board", + }, "Archive") + : null, + ), + ); + } + + function NewBoardDialog(props) { + const [slug, setSlug] = useState(""); + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [icon, setIcon] = useState(""); + const [switchTo, setSwitchTo] = useState(true); + const [submitting, setSubmitting] = useState(false); + const [err, setErr] = useState(null); + + // Auto-derive a name from the slug if the user hasn't typed one. + const autoName = useMemo(function () { + if (!slug) return ""; + return slug.replace(/[-_]+/g, " ") + .split(" ") + .filter(Boolean) + .map(function (w) { return w[0].toUpperCase() + w.slice(1); }) + .join(" "); + }, [slug]); + + function onSubmit(ev) { + if (ev) ev.preventDefault(); + if (!slug.trim()) { setErr("slug is required"); return; } + setSubmitting(true); + setErr(null); + props.onCreate({ + slug: slug.trim(), + name: name.trim() || autoName || undefined, + description: description.trim() || undefined, + icon: icon.trim() || undefined, + switch: switchTo, + }).catch(function (e) { + setErr(String(e && e.message ? e.message : e)); + setSubmitting(false); + }); + } + + return h("div", { + className: "hermes-kanban-dialog-backdrop", + onClick: function (e) { if (e.target === e.currentTarget) props.onCancel(); }, + }, + h("form", { + className: "hermes-kanban-dialog", + onSubmit: onSubmit, + }, + h("div", { className: "hermes-kanban-dialog-title" }, "New board"), + h("div", { className: "text-xs text-muted-foreground mb-2" }, + "Boards let you separate unrelated streams of work — one per project, repo, or domain. Workers on one board never see another board's tasks."), + h("div", { className: "flex flex-col gap-3" }, + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Slug ", + h("span", { className: "text-muted-foreground" }, + "— lowercase, hyphens, e.g. atm10-server")), + h(Input, { + value: slug, + onChange: function (e) { setSlug(e.target.value.toLowerCase().replace(/[^a-z0-9\-_]/g, "-")); }, + placeholder: "atm10-server", + autoFocus: true, + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Display name ", + h("span", { className: "text-muted-foreground" }, "(optional)")), + h(Input, { + value: name, + onChange: function (e) { setName(e.target.value); }, + placeholder: autoName || "Display name", + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Description ", + h("span", { className: "text-muted-foreground" }, "(optional)")), + h(Input, { + value: description, + onChange: function (e) { setDescription(e.target.value); }, + placeholder: "What goes on this board?", + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Icon ", + h("span", { className: "text-muted-foreground" }, "(single character or emoji)")), + h(Input, { + value: icon, + onChange: function (e) { setIcon(e.target.value.slice(0, 4)); }, + placeholder: "📦", + className: "h-8 w-24", + }), + ), + h("label", { className: "flex items-center gap-2 text-xs" }, + h("input", { + type: "checkbox", + checked: switchTo, + onChange: function (e) { setSwitchTo(e.target.checked); }, + }), + "Switch to this board after creating it", + ), + ), + err ? h("div", { className: "text-xs text-destructive mt-2" }, err) : null, + h("div", { className: "hermes-kanban-dialog-actions" }, + h(Button, { + type: "button", + onClick: props.onCancel, + size: "sm", + disabled: submitting, + }, "Cancel"), + h(Button, { + type: "submit", + size: "sm", + disabled: submitting || !slug.trim(), + }, submitting ? "Creating…" : "Create board"), + ), + ), + ); + } + // ------------------------------------------------------------------------- // Toolbar // ------------------------------------------------------------------------- @@ -555,11 +883,10 @@ ), h("div", { className: "flex flex-col gap-1" }, h(Label, { className: "text-xs text-muted-foreground" }, "Tenant"), - h(Select, { + h(Select, Object.assign({ value: props.tenantFilter, - onChange: function (e) { props.setTenantFilter(e.target.value); }, className: "h-8", - }, + }, selectChangeHandler(props.setTenantFilter)), h(SelectOption, { value: "" }, "All tenants"), tenants.map(function (t) { return h(SelectOption, { key: t, value: t }, t); @@ -568,11 +895,10 @@ ), h("div", { className: "flex flex-col gap-1" }, h(Label, { className: "text-xs text-muted-foreground" }, "Assignee"), - h(Select, { + h(Select, Object.assign({ value: props.assigneeFilter, - onChange: function (e) { props.setAssigneeFilter(e.target.value); }, className: "h-8", - }, + }, selectChangeHandler(props.setAssigneeFilter)), h(SelectOption, { value: "" }, "All profiles"), assignees.map(function (a) { return h(SelectOption, { key: a, value: a }, a); @@ -1049,13 +1375,14 @@ const [err, setErr] = useState(null); const [newComment, setNewComment] = useState(""); const [editing, setEditing] = useState(false); + const boardSlug = props.boardSlug; const load = useCallback(function () { - return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`) + return SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}`, boardSlug)) .then(function (d) { setData(d); setErr(null); }) .catch(function (e) { setErr(String(e.message || e)); }) .finally(function () { setLoading(false); }); - }, [props.taskId]); + }, [props.taskId, boardSlug]); // Reload when the WS stream reports new events for this task id // (completion, block, crash, etc. — anything that'd make the drawer @@ -1070,7 +1397,7 @@ const handleComment = function () { const body = newComment.trim(); if (!body) return; - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/comments`, { + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/comments`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ body }), @@ -1085,7 +1412,7 @@ if (opts && opts.confirm && !window.confirm(opts.confirm)) { return Promise.resolve(); } - return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`, { + return SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}`, boardSlug), { method: "PATCH", headers: { "Content-Type": "application/json" }, body: JSON.stringify(patch), @@ -1093,7 +1420,7 @@ }; const addLink = function (parentId) { - return SDK.fetchJSON(`${API}/links`, { + return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ parent_id: parentId, child_id: props.taskId }), @@ -1102,12 +1429,12 @@ }; const removeLink = function (parentId) { const qs = new URLSearchParams({ parent_id: parentId, child_id: props.taskId }); - return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + return SDK.fetchJSON(withBoard(`${API}/links?${qs}`, boardSlug), { method: "DELETE" }) .then(function () { load(); props.onRefresh(); }) .catch(function (e) { setErr(String(e.message || e)); }); }; const addChild = function (childId) { - return SDK.fetchJSON(`${API}/links`, { + return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ parent_id: props.taskId, child_id: childId }), @@ -1116,7 +1443,7 @@ }; const removeChild = function (childId) { const qs = new URLSearchParams({ parent_id: props.taskId, child_id: childId }); - return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + return SDK.fetchJSON(withBoard(`${API}/links?${qs}`, boardSlug), { method: "DELETE" }) .then(function () { load(); props.onRefresh(); }) .catch(function (e) { setErr(String(e.message || e)); }); }; @@ -1141,6 +1468,7 @@ data, editing, setEditing, renderMarkdown: props.renderMarkdown, allTasks: props.allTasks, + boardSlug: boardSlug, onPatch: doPatch, onAddParent: addLink, onRemoveParent: removeLink, @@ -1253,7 +1581,7 @@ ); }), ), - h(WorkerLogSection, { taskId: t.id }), + h(WorkerLogSection, { taskId: t.id, boardSlug: props.boardSlug }), h(RunHistorySection, { runs: props.data.runs || [] }), ); } @@ -1324,10 +1652,10 @@ const [state, setState] = useState({ loading: false, data: null, err: null }); const load = useCallback(function () { setState({ loading: true, data: null, err: null }); - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/log?tail=100000`) + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/log?tail=100000`, props.boardSlug)) .then(function (d) { setState({ loading: false, data: d, err: null }); }) .catch(function (e) { setState({ loading: false, data: null, err: String(e.message || e) }); }); - }, [props.taskId]); + }, [props.taskId, props.boardSlug]); // Auto-load when the section mounts; the user opened the drawer so the // cost is one small HTTP round-trip. diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css index c638946ad27..3c197e62095 100644 --- a/plugins/kanban/dashboard/dist/style.css +++ b/plugins/kanban/dashboard/dist/style.css @@ -769,3 +769,57 @@ word-break: break-word; font-family: var(--font-mono, ui-monospace, monospace); } + +/* ------------------------------------------------------------------------- + Multi-project: board switcher + create-board dialog + ------------------------------------------------------------------------- */ +.hermes-kanban-boardswitcher { + border: 1px solid var(--color-border, rgba(120, 120, 140, 0.25)); + border-radius: 0.5rem; + padding: 0.6rem 0.85rem; + background: var(--color-card-subtle, rgba(255, 255, 255, 0.02)); +} +.hermes-kanban-boardswitcher-inner { + display: flex; + align-items: flex-end; + gap: 0.75rem; + flex-wrap: wrap; +} +.hermes-kanban-boardswitcher-compact { + display: flex; + justify-content: flex-end; + padding: 0 0.25rem; +} +.hermes-kanban-dialog-backdrop { + position: fixed; + inset: 0; + background: rgba(8, 10, 16, 0.55); + backdrop-filter: blur(2px); + z-index: 60; + display: flex; + align-items: center; + justify-content: center; +} +.hermes-kanban-dialog { + background: var(--color-card, #121421); + color: var(--color-foreground); + border: 1px solid var(--color-border, rgba(120, 120, 140, 0.25)); + border-radius: 0.5rem; + padding: 1.1rem 1.2rem 1rem; + width: 28rem; + max-width: calc(100vw - 2rem); + max-height: calc(100vh - 3rem); + overflow: auto; + box-shadow: 0 18px 40px rgba(0, 0, 0, 0.5); +} +.hermes-kanban-dialog-title { + font-size: 1rem; + font-weight: 600; + margin-bottom: 0.25rem; +} +.hermes-kanban-dialog-actions { + display: flex; + justify-content: flex-end; + gap: 0.5rem; + margin-top: 1rem; +} diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index acccf63c9de..d80296b888a 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -72,19 +72,45 @@ def _check_ws_token(provided: Optional[str]) -> bool: return hmac.compare_digest(str(provided), str(expected)) -def _conn(): +def _resolve_board(board: Optional[str]) -> Optional[str]: + """Validate and normalise a board slug from a query param. + + Raises :class:`HTTPException` 400 on malformed slugs so the browser + sees a clean error instead of a 500. Returns the normalised slug, + or ``None`` when the caller omitted the param (which then falls + through to the active board inside ``kb.connect()``). + """ + if board is None or board == "": + return None + try: + normed = kanban_db._normalize_board_slug(board) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if normed and normed != kanban_db.DEFAULT_BOARD and not kanban_db.board_exists(normed): + raise HTTPException( + status_code=404, + detail=f"board {normed!r} does not exist", + ) + return normed + + +def _conn(board: Optional[str] = None): """Open a kanban_db connection, creating the schema on first use. Every handler that mutates the DB goes through this so the plugin self-heals on a fresh install (no user-visible "no such table" error if somebody hits POST /tasks before GET /board). ``init_db`` is idempotent. + + ``board`` is the query-param slug (already normalised by + :func:`_resolve_board`). When ``None`` the active board is used + via the resolution chain (env var → ``current`` file → ``default``). """ try: - kanban_db.init_db() + kanban_db.init_db(board=board) except Exception as exc: log.warning("kanban init_db failed: %s", exc) - return kanban_db.connect() + return kanban_db.connect(board=board) # --------------------------------------------------------------------------- @@ -177,13 +203,19 @@ def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]: def get_board( tenant: Optional[str] = Query(None, description="Filter to a single tenant"), include_archived: bool = Query(False), + board: Optional[str] = Query(None, description="Kanban board slug (omit for current)"), ): """Return the full board grouped by status column. ``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh install doesn't surface a "failed to load" error on the plugin tab. + + ``board`` selects which board to read from. Omitting it falls + through to the active board (``HERMES_KANBAN_BOARD`` env → on-disk + ``current`` pointer → ``default``). """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: tasks = kanban_db.list_tasks( conn, tenant=tenant, include_archived=include_archived @@ -274,8 +306,9 @@ def get_board( # --------------------------------------------------------------------------- @router.get("/tasks/{task_id}") -def get_task(task_id: str): - conn = _conn() +def get_task(task_id: str, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) if task is None: @@ -311,8 +344,9 @@ class CreateTaskBody(BaseModel): @router.post("/tasks") -def create_task(payload: CreateTaskBody): - conn = _conn() +def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task_id = kanban_db.create_task( conn, @@ -373,8 +407,9 @@ class UpdateTaskBody(BaseModel): @router.patch("/tasks/{task_id}") -def update_task(task_id: str, payload: UpdateTaskBody): - conn = _conn() +def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) if task is None: @@ -527,10 +562,11 @@ class CommentBody(BaseModel): @router.post("/tasks/{task_id}/comments") -def add_comment(task_id: str, payload: CommentBody): +def add_comment(task_id: str, payload: CommentBody, board: Optional[str] = Query(None)): if not payload.body.strip(): raise HTTPException(status_code=400, detail="body is required") - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: if kanban_db.get_task(conn, task_id) is None: raise HTTPException(status_code=404, detail=f"task {task_id} not found") @@ -552,8 +588,9 @@ class LinkBody(BaseModel): @router.post("/links") -def add_link(payload: LinkBody): - conn = _conn() +def add_link(payload: LinkBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: kanban_db.link_tasks(conn, payload.parent_id, payload.child_id) return {"ok": True} @@ -564,8 +601,13 @@ def add_link(payload: LinkBody): @router.delete("/links") -def delete_link(parent_id: str = Query(...), child_id: str = Query(...)): - conn = _conn() +def delete_link( + parent_id: str = Query(...), + child_id: str = Query(...), + board: Optional[str] = Query(None), +): + board = _resolve_board(board) + conn = _conn(board=board) try: ok = kanban_db.unlink_tasks(conn, parent_id, child_id) return {"ok": bool(ok)} @@ -586,7 +628,7 @@ class BulkTaskBody(BaseModel): @router.post("/tasks/bulk") -def bulk_update(payload: BulkTaskBody): +def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)): """Apply the same patch to every id in ``payload.ids``. This is an *independent* iteration — per-task failures don't abort @@ -596,7 +638,8 @@ def bulk_update(payload: BulkTaskBody): if not ids: raise HTTPException(status_code=400, detail="ids is required") results: list[dict] = [] - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: for tid in ids: entry: dict[str, Any] = {"id": tid, "ok": True} @@ -690,14 +733,15 @@ def get_config(): # --------------------------------------------------------------------------- @router.get("/stats") -def get_stats(): +def get_stats(board: Optional[str] = Query(None)): """Per-status + per-assignee counts + oldest-ready age. Designed for the dashboard HUD and for router profiles that need to answer "is this specialist overloaded?" without scanning the whole board themselves. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: return kanban_db.board_stats(conn) finally: @@ -705,7 +749,7 @@ def get_stats(): @router.get("/assignees") -def get_assignees(): +def get_assignees(board: Optional[str] = Query(None)): """Known profiles + per-profile task counts. Returns the union of ``~/.hermes/profiles/*`` on disk and every @@ -713,7 +757,8 @@ def get_assignees(): this to populate its assignee dropdown so a freshly-created profile appears in the picker before it's been given any task. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: return {"assignees": kanban_db.known_assignees(conn)} finally: @@ -725,7 +770,11 @@ def get_assignees(): # --------------------------------------------------------------------------- @router.get("/tasks/{task_id}/log") -def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)): +def get_task_log( + task_id: str, + tail: Optional[int] = Query(None, ge=1, le=2_000_000), + board: Optional[str] = Query(None), +): """Return the worker's stdout/stderr log. ``tail`` caps the response size (bytes) so the dashboard drawer @@ -734,15 +783,16 @@ def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_ ``_rotate_worker_log`` — a single ``.log.1`` is kept, no further generations, so disk usage per task is bounded at ~4 MiB. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) finally: conn.close() if task is None: raise HTTPException(status_code=404, detail=f"task {task_id} not found") - content = kanban_db.read_worker_log(task_id, tail_bytes=tail) - log_path = kanban_db.worker_log_path(task_id) + content = kanban_db.read_worker_log(task_id, tail_bytes=tail, board=board) + log_path = kanban_db.worker_log_path(task_id, board=board) size = log_path.stat().st_size if log_path.exists() else 0 return { "task_id": task_id, @@ -760,11 +810,16 @@ def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_ # --------------------------------------------------------------------------- @router.post("/dispatch") -def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")): - conn = _conn() +def dispatch( + dry_run: bool = Query(False), + max_n: int = Query(8, alias="max"), + board: Optional[str] = Query(None), +): + board = _resolve_board(board) + conn = _conn(board=board) try: result = kanban_db.dispatch_once( - conn, dry_run=dry_run, max_spawn=max_n, + conn, dry_run=dry_run, max_spawn=max_n, board=board, ) # DispatchResult is a dataclass. try: @@ -775,6 +830,124 @@ def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")): conn.close() +# --------------------------------------------------------------------------- +# Boards CRUD (multi-project support) +# --------------------------------------------------------------------------- + +class CreateBoardBody(BaseModel): + slug: str + name: Optional[str] = None + description: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + switch: bool = False + + +class RenameBoardBody(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + + +def _board_counts(slug: str) -> dict[str, int]: + """Return ``{status: count}`` for a board. Safe on an empty DB.""" + try: + path = kanban_db.kanban_db_path(board=slug) + if not path.exists(): + return {} + conn = kanban_db.connect(board=slug) + try: + rows = conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status" + ).fetchall() + return {r["status"]: int(r["n"]) for r in rows} + finally: + conn.close() + except Exception: + return {} + + +@router.get("/boards") +def list_boards(include_archived: bool = Query(False)): + """Return every board on disk with task counts and the active slug.""" + boards = kanban_db.list_boards(include_archived=include_archived) + current = kanban_db.get_current_board() + for b in boards: + b["is_current"] = (b["slug"] == current) + b["counts"] = _board_counts(b["slug"]) + b["total"] = sum(b["counts"].values()) + return {"boards": boards, "current": current} + + +@router.post("/boards") +def create_board_endpoint(payload: CreateBoardBody): + """Create a new board. Idempotent — ``slug`` collision returns existing.""" + try: + meta = kanban_db.create_board( + payload.slug, + name=payload.name, + description=payload.description, + icon=payload.icon, + color=payload.color, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if payload.switch: + try: + kanban_db.set_current_board(meta["slug"]) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return {"board": meta, "current": kanban_db.get_current_board()} + + +@router.patch("/boards/{slug}") +def rename_board(slug: str, payload: RenameBoardBody): + """Update a board's display metadata (slug is immutable — create a new one to rename the directory).""" + try: + normed = kanban_db._normalize_board_slug(slug) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if not normed or not kanban_db.board_exists(normed): + raise HTTPException(status_code=404, detail=f"board {slug!r} does not exist") + meta = kanban_db.write_board_metadata( + normed, + name=payload.name, + description=payload.description, + icon=payload.icon, + color=payload.color, + ) + return {"board": meta} + + +@router.delete("/boards/{slug}") +def delete_board(slug: str, delete: bool = Query(False, description="Hard-delete instead of archive")): + """Archive (default) or hard-delete a board.""" + try: + res = kanban_db.remove_board(slug, archive=not delete) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return {"result": res, "current": kanban_db.get_current_board()} + + +@router.post("/boards/{slug}/switch") +def switch_board(slug: str): + """Persist ``slug`` as the active board for subsequent CLI / slash calls. + + Dashboard users pick boards via a client-side ``localStorage`` — this + endpoint is for ``/kanban boards switch`` parity so gateway slash + commands and the CLI share the same current-board pointer. + """ + try: + normed = kanban_db._normalize_board_slug(slug) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if not normed or not kanban_db.board_exists(normed): + raise HTTPException(status_code=404, detail=f"board {slug!r} does not exist") + kanban_db.set_current_board(normed) + return {"current": normed} + + # --------------------------------------------------------------------------- # WebSocket: /events?since=<event_id> # --------------------------------------------------------------------------- @@ -802,8 +975,18 @@ async def stream_events(ws: WebSocket): except ValueError: cursor = 0 + # Board selection — pinned at the WS handshake; re-subscribe to + # switch boards. Changing boards mid-stream would require + # reconciling two cursors, so the UI just opens a new WS on + # board change. + ws_board_raw = ws.query_params.get("board") + try: + ws_board = kanban_db._normalize_board_slug(ws_board_raw) if ws_board_raw else None + except ValueError: + ws_board = None + def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]: - conn = kanban_db.connect() + conn = kanban_db.connect(board=ws_board) try: rows = conn.execute( "SELECT id, task_id, run_id, kind, payload, created_at " diff --git a/tests/hermes_cli/test_kanban_boards.py b/tests/hermes_cli/test_kanban_boards.py new file mode 100644 index 00000000000..a86a8713302 --- /dev/null +++ b/tests/hermes_cli/test_kanban_boards.py @@ -0,0 +1,483 @@ +"""Tests for the multi-board kanban layer (``hermes kanban boards …``). + +Covers the pieces added when boards became a first-class concept: + +* Slug validation and normalisation. +* Path resolution for ``default`` (legacy ``<root>/kanban.db``) vs + named boards (``<root>/kanban/boards/<slug>/kanban.db``). +* Current-board persistence via ``<root>/kanban/current`` and + ``HERMES_KANBAN_BOARD`` env var. +* ``connect(board=)`` isolation — writes on one board don't leak. +* ``create_board`` / ``list_boards`` / ``remove_board`` round trip. +* CLI surface: ``hermes kanban boards list/create/switch/rm``. +* ``_default_spawn`` injects ``HERMES_KANBAN_BOARD`` into worker env. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +# Ensure the worktree (not the stale global clone) is first on sys.path. +_WORKTREE = Path(__file__).resolve().parents[2] +if str(_WORKTREE) not in sys.path: + sys.path.insert(0, str(_WORKTREE)) + +from hermes_cli import kanban_db as kb + + +# --------------------------------------------------------------------------- +# Fixture +# --------------------------------------------------------------------------- + +@pytest.fixture +def fresh_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with no prior kanban state. + + The autouse hermetic conftest already nukes credentials + TZ; this + fixture layers a per-test HERMES_HOME plus a path-init cache reset + so each test sees a truly empty board set. + """ + home = tmp_path / "hermes_home" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + for var in ( + "HERMES_KANBAN_DB", + "HERMES_KANBAN_WORKSPACES_ROOT", + "HERMES_KANBAN_HOME", + "HERMES_KANBAN_BOARD", + ): + monkeypatch.delenv(var, raising=False) + # Also reset hermes_constants cache so get_default_hermes_root() re-reads. + try: + import hermes_constants + hermes_constants._cached_default_hermes_root = None # type: ignore[attr-defined] + except Exception: + pass + # Kanban module-level init cache must not leak between tests. + kb._INITIALIZED_PATHS.clear() + return home + + +# --------------------------------------------------------------------------- +# Slug validation +# --------------------------------------------------------------------------- + +class TestSlugValidation: + @pytest.mark.parametrize("good", [ + "default", "atm10-server", "hermes-agent", "proj_1", "a", + "very-long-but-still-ok-slug-with-hyphens-and-numbers-1234", + ]) + def test_accepts_valid(self, good): + assert kb._normalize_board_slug(good) == good + + @pytest.mark.parametrize("bad", [ + "-leading-hyphen", "_leading_underscore", + "with/slash", "with space", + "has.dot", "has?question", + "..", "../etc", "foo\x00bar", + ]) + def test_rejects_invalid(self, bad): + with pytest.raises(ValueError): + kb._normalize_board_slug(bad) + + def test_empty_returns_none(self): + assert kb._normalize_board_slug(None) is None + assert kb._normalize_board_slug("") is None + assert kb._normalize_board_slug(" ") is None + + def test_auto_lowercases(self): + # Uppercase is auto-downcased (friendlier than rejecting). ``Default`` + # → ``default``, ``ATM10`` → ``atm10``. The on-disk slug is always + # lowercase regardless of what the user typed. + assert kb._normalize_board_slug("Default") == "default" + assert kb._normalize_board_slug("ATM10-Server") == "atm10-server" + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + +class TestPathResolution: + def test_default_board_legacy_path(self, fresh_home): + """The default board's DB lives at ``<root>/kanban.db`` for back-compat.""" + assert kb.kanban_db_path() == fresh_home / "kanban.db" + assert kb.kanban_db_path(board="default") == fresh_home / "kanban.db" + + def test_named_board_under_boards_dir(self, fresh_home): + p = kb.kanban_db_path(board="atm10-server") + assert p == fresh_home / "kanban" / "boards" / "atm10-server" / "kanban.db" + + def test_workspaces_per_board(self, fresh_home): + assert kb.workspaces_root() == fresh_home / "kanban" / "workspaces" + # Uppercase input gets auto-downcased to the on-disk slug. + assert kb.workspaces_root(board="projA") == ( + fresh_home / "kanban" / "boards" / "proja" / "workspaces" + ) + + def test_logs_per_board(self, fresh_home): + assert kb.worker_logs_dir() == fresh_home / "kanban" / "logs" + assert kb.worker_logs_dir(board="other") == ( + fresh_home / "kanban" / "boards" / "other" / "logs" + ) + + def test_env_var_db_override_still_wins(self, fresh_home, tmp_path, monkeypatch): + """``HERMES_KANBAN_DB`` pins the file regardless of board= arg.""" + forced = tmp_path / "custom.db" + monkeypatch.setenv("HERMES_KANBAN_DB", str(forced)) + assert kb.kanban_db_path() == forced + assert kb.kanban_db_path(board="ignored") == forced + + def test_env_var_workspaces_override(self, fresh_home, tmp_path, monkeypatch): + forced = tmp_path / "ws" + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(forced)) + assert kb.workspaces_root(board="any") == forced + + +# --------------------------------------------------------------------------- +# Current-board resolution +# --------------------------------------------------------------------------- + +class TestCurrentBoard: + def test_default_when_unset(self, fresh_home): + assert kb.get_current_board() == "default" + + def test_env_var_takes_precedence(self, fresh_home, monkeypatch): + # Create the board so the env-var value is honoured (get_current_board + # trusts env-var validity, but the resolution chain doesn't require + # the board to exist; we just test that env trumps). + kb.create_board("envboard") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "envboard") + assert kb.get_current_board() == "envboard" + + def test_file_pointer_honoured(self, fresh_home): + kb.create_board("filepick") + kb.set_current_board("filepick") + assert kb.get_current_board() == "filepick" + + def test_env_beats_file(self, fresh_home, monkeypatch): + kb.create_board("a") + kb.create_board("b") + kb.set_current_board("a") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "b") + assert kb.get_current_board() == "b" + + def test_invalid_env_falls_through(self, fresh_home, monkeypatch): + monkeypatch.setenv("HERMES_KANBAN_BOARD", "!!bad!!") + # Should not crash — falls through to default. + assert kb.get_current_board() == "default" + + def test_clear_current_board(self, fresh_home): + kb.create_board("x") + kb.set_current_board("x") + kb.clear_current_board() + assert kb.get_current_board() == "default" + + def test_kanban_db_path_reads_current(self, fresh_home): + """kanban_db_path() with no args respects the on-disk pointer.""" + kb.create_board("my-proj") + kb.set_current_board("my-proj") + expected = fresh_home / "kanban" / "boards" / "my-proj" / "kanban.db" + assert kb.kanban_db_path() == expected + + +# --------------------------------------------------------------------------- +# Board CRUD +# --------------------------------------------------------------------------- + +class TestBoardCRUD: + def test_create_and_list(self, fresh_home): + assert [b["slug"] for b in kb.list_boards()] == ["default"] + kb.create_board("foo", name="Foo Board", description="test") + slugs = [b["slug"] for b in kb.list_boards()] + assert slugs == ["default", "foo"] + + def test_create_is_idempotent(self, fresh_home): + kb.create_board("bar") + kb.create_board("bar") # no error + slugs = [b["slug"] for b in kb.list_boards()] + assert slugs == ["default", "bar"] + + def test_create_writes_metadata(self, fresh_home): + meta = kb.create_board( + "baz", + name="Baz", + description="desc", + icon="📦", + color="#abcdef", + ) + assert meta["slug"] == "baz" + assert meta["name"] == "Baz" + assert meta["icon"] == "📦" + # Round-trip via read_board_metadata. + again = kb.read_board_metadata("baz") + assert again["name"] == "Baz" + assert again["description"] == "desc" + assert again["icon"] == "📦" + + def test_remove_archive(self, fresh_home): + kb.create_board("toremove") + res = kb.remove_board("toremove") + assert res["action"] == "archived" + assert Path(res["new_path"]).exists() + assert "toremove" not in [b["slug"] for b in kb.list_boards()] + + def test_remove_hard_delete(self, fresh_home): + kb.create_board("nuke") + d = kb.board_dir("nuke") + assert d.exists() + res = kb.remove_board("nuke", archive=False) + assert res["action"] == "deleted" + assert not d.exists() + + def test_remove_default_forbidden(self, fresh_home): + with pytest.raises(ValueError, match="default"): + kb.remove_board("default") + + def test_remove_nonexistent_raises(self, fresh_home): + with pytest.raises(ValueError, match="does not exist"): + kb.remove_board("nosuch") + + def test_remove_clears_current_pointer(self, fresh_home): + kb.create_board("pinned") + kb.set_current_board("pinned") + kb.remove_board("pinned") + assert kb.get_current_board() == "default" + + def test_rename_updates_metadata(self, fresh_home): + kb.create_board("slug-immutable") + kb.write_board_metadata("slug-immutable", name="New Display Name") + assert kb.read_board_metadata("slug-immutable")["name"] == "New Display Name" + # Slug must not change. + assert kb.board_exists("slug-immutable") + + +# --------------------------------------------------------------------------- +# Connection isolation +# --------------------------------------------------------------------------- + +class TestConnectionIsolation: + def test_tasks_do_not_leak_across_boards(self, fresh_home): + kb.create_board("alpha") + kb.create_board("beta") + + with kb.connect(board="alpha") as conn: + kb.create_task(conn, title="alpha-task-1", assignee="dev") + kb.create_task(conn, title="alpha-task-2", assignee="dev") + + with kb.connect(board="beta") as conn: + kb.create_task(conn, title="beta-only", assignee="dev") + + with kb.connect(board="alpha") as conn: + a = kb.list_tasks(conn) + with kb.connect(board="beta") as conn: + b = kb.list_tasks(conn) + with kb.connect(board="default") as conn: + d = kb.list_tasks(conn) + + assert {t.title for t in a} == {"alpha-task-1", "alpha-task-2"} + assert {t.title for t in b} == {"beta-only"} + assert d == [] + + def test_connect_without_args_uses_current(self, fresh_home): + kb.create_board("curr") + kb.set_current_board("curr") + with kb.connect() as conn: + kb.create_task(conn, title="implicit", assignee="x") + with kb.connect(board="curr") as conn: + tasks = kb.list_tasks(conn) + assert [t.title for t in tasks] == ["implicit"] + + def test_connect_env_var_overrides_current(self, fresh_home, monkeypatch): + kb.create_board("persist") + kb.create_board("envwin") + kb.set_current_board("persist") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "envwin") + with kb.connect() as conn: + kb.create_task(conn, title="via-env", assignee="x") + with kb.connect(board="envwin") as conn: + assert [t.title for t in kb.list_tasks(conn)] == ["via-env"] + with kb.connect(board="persist") as conn: + assert kb.list_tasks(conn) == [] + + +# --------------------------------------------------------------------------- +# Worker spawn env injection +# --------------------------------------------------------------------------- + +class TestWorkerSpawnEnv: + """Ensure the dispatcher pins ``HERMES_KANBAN_BOARD`` / DB / workspaces on spawn. + + We monkey-patch ``subprocess.Popen`` to capture the child env without + actually spawning anything. + """ + + def test_default_spawn_sets_env_vars(self, fresh_home, monkeypatch): + captured = {} + + class FakeProc: + pid = 12345 + + def fake_popen(cmd, *args, **kwargs): + captured["cmd"] = cmd + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr(subprocess, "Popen", fake_popen) + kb.create_board("spawntest") + + task = kb.Task( + id="t_abc", + title="worker test", + body=None, + assignee="teknium", + status="ready", + priority=0, + created_by="user", + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + + kb._default_spawn(task, str(fresh_home / "ws"), board="spawntest") + + env = captured["env"] + assert env["HERMES_KANBAN_BOARD"] == "spawntest" + assert env["HERMES_KANBAN_TASK"] == "t_abc" + # DB path should match the per-board DB, not the legacy default. + expected_db = fresh_home / "kanban" / "boards" / "spawntest" / "kanban.db" + assert env["HERMES_KANBAN_DB"] == str(expected_db) + expected_ws = fresh_home / "kanban" / "boards" / "spawntest" / "workspaces" + assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str(expected_ws) + + def test_default_board_spawn_keeps_legacy_paths(self, fresh_home, monkeypatch): + captured = {} + + class FakeProc: + pid = 1 + + def fake_popen(cmd, *args, **kwargs): + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr(subprocess, "Popen", fake_popen) + task = kb.Task( + id="t_def", + title="", + body=None, + assignee="teknium", + status="ready", + priority=0, + created_by=None, + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + kb._default_spawn(task, str(fresh_home / "ws"), board=None) + env = captured["env"] + assert env["HERMES_KANBAN_BOARD"] == "default" + assert env["HERMES_KANBAN_DB"] == str(fresh_home / "kanban.db") + + +# --------------------------------------------------------------------------- +# CLI surface +# --------------------------------------------------------------------------- + +def _cli(args: list[str], env_extra: dict | None = None) -> subprocess.CompletedProcess: + """Run ``hermes kanban …`` with PYTHONPATH pinned to the worktree.""" + env = dict(os.environ) + env["PYTHONPATH"] = str(_WORKTREE) + if env_extra: + env.update(env_extra) + return subprocess.run( + [sys.executable, "-m", "hermes_cli.main", "kanban"] + args, + env=env, + capture_output=True, + text=True, + cwd=str(_WORKTREE), + timeout=30, + ) + + +class TestCLI: + def test_boards_list_default_only(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + res = _cli(["boards", "list", "--json"], env_extra=env) + assert res.returncode == 0, res.stderr + data = json.loads(res.stdout) + slugs = [b["slug"] for b in data] + assert slugs == ["default"] + assert data[0]["is_current"] is True + + def test_boards_create_and_switch(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + r1 = _cli( + ["boards", "create", "myproj", "--name", "My Project", "--switch"], + env_extra=env, + ) + assert r1.returncode == 0, r1.stderr + assert "created" in r1.stdout + assert "Switched" in r1.stdout + + r2 = _cli(["boards", "list", "--json"], env_extra=env) + data = json.loads(r2.stdout) + cur = [b for b in data if b["is_current"]][0] + assert cur["slug"] == "myproj" + + def test_per_board_task_isolation_via_cli(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + assert _cli(["boards", "create", "projA"], env_extra=env).returncode == 0 + assert _cli(["boards", "create", "projB"], env_extra=env).returncode == 0 + + # Create one task on each via --board. + r = _cli(["--board", "projA", "create", "Task A", "--assignee", "dev"], env_extra=env) + assert r.returncode == 0, r.stderr + r = _cli(["--board", "projB", "create", "Task B", "--assignee", "dev"], env_extra=env) + assert r.returncode == 0, r.stderr + + # list on each board only shows its own. + listA = _cli(["--board", "projA", "list", "--json"], env_extra=env) + listB = _cli(["--board", "projB", "list", "--json"], env_extra=env) + listD = _cli(["list", "--json"], env_extra=env) + + titlesA = [t["title"] for t in json.loads(listA.stdout)] + titlesB = [t["title"] for t in json.loads(listB.stdout)] + titlesD = [t["title"] for t in json.loads(listD.stdout)] + + assert titlesA == ["Task A"] + assert titlesB == ["Task B"] + assert titlesD == [] + + def test_board_flag_rejects_unknown(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + r = _cli(["--board", "ghost", "list"], env_extra=env) + # main.py's dispatcher doesn't propagate return codes today, so we + # assert the user-visible signal: a stderr error message. Whether + # the exit code stays 0 is a separate (pre-existing) issue. + assert "does not exist" in r.stderr + + def test_boards_rm_archives(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + _cli(["boards", "create", "rmme"], env_extra=env) + r = _cli(["boards", "rm", "rmme"], env_extra=env) + assert r.returncode == 0, r.stderr + assert "archived" in r.stdout + # Default board list no longer shows it. + res = _cli(["boards", "list", "--json"], env_extra=env) + slugs = [b["slug"] for b in json.loads(res.stdout)] + assert "rmme" not in slugs diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md index adf52f4a89d..f1bad41a205 100644 --- a/website/docs/user-guide/features/kanban.md +++ b/website/docs/user-guide/features/kanban.md @@ -54,15 +54,106 @@ They coexist: a kanban worker may call `delegate_task` internally during its run ## Core concepts +- **Board** — a standalone queue of tasks with its own SQLite DB, workspaces + directory, and dispatcher loop. A single install can have many boards + (e.g. one per project, repo, or domain); see [Boards (multi-project)](#boards-multi-project) + below. Single-project users stay on the `default` board and never see the + word "board" outside this docs section. - **Task** — a row with title, optional body, one assignee (a profile name), status (`triage | todo | ready | running | blocked | done | archived`), optional tenant namespace, optional idempotency key (dedup for retried automation). - **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`. - **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context. - **Workspace** — the directory a worker operates in. Three kinds: - - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`. + - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/` (or `~/.hermes/kanban/boards/<slug>/workspaces/<id>/` on non-default boards). - `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). **Must be an absolute path.** Relative paths like `dir:../tenants/foo/` are rejected at dispatch because they'd resolve against whatever CWD the dispatcher happens to be in, which is ambiguous and a confused-deputy escape vector. The path is otherwise trusted — it's your box, your filesystem, the worker runs with your uid. This is the trusted-local-user threat model; kanban is single-host by design. - `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. Worker-side `git worktree add` creates it. -- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs **inside the gateway** by default (`kanban.dispatch_in_gateway: true`). After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc. -- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. +- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs **inside the gateway** by default (`kanban.dispatch_in_gateway: true`). One dispatcher sweeps all boards per tick; workers are spawned with `HERMES_KANBAN_BOARD` pinned so they can't see other boards. After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc. +- **Tenant** — optional string namespace *within* a board. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. Tenants are a soft filter; boards are the hard isolation boundary. + +## Boards (multi-project) + +Boards let you separate unrelated streams of work — one per project, repo, +or domain — into isolated queues. A new install has exactly one board +called `default` (DB at `~/.hermes/kanban.db` for back-compat). Users who +only want one stream of work never need to know about boards; the feature +is opt-in. + +Per-board isolation is absolute: + +- Separate SQLite DB per board (`~/.hermes/kanban/boards/<slug>/kanban.db`). +- Separate `workspaces/` and `logs/` directories. +- Workers spawned for a task see **only** their board's tasks — the + dispatcher sets `HERMES_KANBAN_BOARD` in the child env and every + `kanban_*` tool the worker has access to reads it. +- Linking tasks across boards is not allowed (keeps the schema simple; if + you really need cross-project refs, use free-text mentions and look + them up by id manually). + +### Managing boards from the CLI + +```bash +# See what's on disk. Fresh installs show only "default". +hermes kanban boards list + +# Create a new board. +hermes kanban boards create atm10-server \ + --name "ATM10 Server" \ + --description "Minecraft modded server ops" \ + --icon 🎮 \ + --switch # optional: make it the active board + +# Operate on a specific board without switching. +hermes kanban --board atm10-server list +hermes kanban --board atm10-server create "Restart ATM server" --assignee ops + +# Change which board is "current" for subsequent calls. +hermes kanban boards switch atm10-server +hermes kanban boards show # who's active right now? + +# Rename the display name (the slug is immutable — it's the directory name). +hermes kanban boards rename atm10-server "ATM10 (Prod)" + +# Archive (default) — moves the board's dir to boards/_archived/<slug>-<ts>/. +# Recoverable by moving the dir back. +hermes kanban boards rm atm10-server + +# Hard delete — `rm -rf` the board dir. No recovery. +hermes kanban boards rm atm10-server --delete +``` + +Board resolution order (highest precedence first): + +1. Explicit `--board <slug>` on the CLI call. +2. `HERMES_KANBAN_BOARD` env var (set by the dispatcher when spawning a + worker, so workers can't see other boards). +3. `~/.hermes/kanban/current` — the slug persisted by `hermes kanban + boards switch`. +4. `default`. + +Slugs are validated: lowercase alphanumerics + hyphens + underscores, 1-64 +chars, must start with alphanumeric. Uppercase input is auto-downcased. +Anything else (slashes, spaces, dots, `..`) is rejected at the CLI layer +so path-traversal tricks can't name a board. + +### Managing boards from the dashboard + +`hermes dashboard` → Kanban tab shows a board switcher at the top as soon +as more than one board exists (or any board has tasks). Single-board users +see only a small `+ New board` button; the switcher is hidden until it +matters. + +- **Board dropdown** — pick the active board. Your selection is saved to + the browser's `localStorage` so it persists across reloads without + shifting the CLI's `current` pointer out from under a terminal you left + open. +- **+ New board** — opens a modal asking for slug, display name, + description, and icon. Option to auto-switch to the new board. +- **Archive** — only shown on non-`default` boards. Confirms, then moves + the board dir to `boards/_archived/`. + +All dashboard API endpoints accept `?board=<slug>` for board scoping. The +events WebSocket is pinned to a board at connection time; switching in +the UI opens a fresh WS against the new board. + ## Quick start From 75b4a3467032f3382cc860a73a06a706ed580b12 Mon Sep 17 00:00:00 2001 From: atongrun <76803960+atongrun@users.noreply.github.com> Date: Sun, 26 Apr 2026 13:01:45 +0800 Subject: [PATCH 122/171] fix(cli): check updates against upstream/main for fork users --- hermes_cli/main.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 89cc2e40d98..4f15cd26d59 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -6495,13 +6495,29 @@ def _cmd_update_check(): if sys.platform == "win32": git_cmd = ["git", "-c", "windows.appendAtomically=false"] - print("→ Fetching from origin...") + # Fetch both origin and upstream; prefer upstream as the canonical reference + print("→ Fetching from upstream...") fetch_result = subprocess.run( - git_cmd + ["fetch", "origin"], + git_cmd + ["fetch", "upstream"], cwd=PROJECT_ROOT, capture_output=True, text=True, ) + if fetch_result.returncode != 0: + # Fallback to origin if upstream doesn't exist + print("→ Fetching from origin...") + fetch_result = subprocess.run( + git_cmd + ["fetch", "origin"], + cwd=PROJECT_ROOT, + capture_output=True, + text=True, + ) + upstream_exists = False + compare_branch = "origin/main" + else: + upstream_exists = True + compare_branch = "upstream/main" + if fetch_result.returncode != 0: stderr = fetch_result.stderr.strip() if "Could not resolve host" in stderr or "unable to access" in stderr: @@ -6509,13 +6525,13 @@ def _cmd_update_check(): elif "Authentication failed" in stderr or "could not read Username" in stderr: print("✗ Authentication failed — check your git credentials or SSH key.") else: - print("✗ Failed to fetch from origin.") + print("✗ Failed to fetch.") if stderr: print(f" {stderr.splitlines()[0]}") sys.exit(1) rev_result = subprocess.run( - git_cmd + ["rev-list", "HEAD..origin/main", "--count"], + git_cmd + ["rev-list", f"HEAD..{compare_branch}", "--count"], cwd=PROJECT_ROOT, capture_output=True, text=True, @@ -6527,7 +6543,7 @@ def _cmd_update_check(): print("✓ Already up to date.") else: commits_word = "commit" if behind == 1 else "commits" - print(f"⚕ Update available: {behind} {commits_word} behind origin/main.") + print(f"⚕ Update available: {behind} {commits_word} behind {compare_branch}.") from hermes_cli.config import recommended_update_command print(f" Run '{recommended_update_command()}' to install.") From 239ea1bdeabb4bc5b56571b3f5b0aa63956b982e Mon Sep 17 00:00:00 2001 From: Cameron Aragon <69489633+camaragon@users.noreply.github.com> Date: Thu, 30 Apr 2026 01:23:57 +0000 Subject: [PATCH 123/171] fix(image-gen): preserve xAI API error status --- plugins/image_gen/xai/__init__.py | 7 ++++--- tests/plugins/image_gen/test_xai_provider.py | 21 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py index b1ec4368efa..93fd10ce390 100644 --- a/plugins/image_gen/xai/__init__.py +++ b/plugins/image_gen/xai/__init__.py @@ -203,11 +203,12 @@ class XAIImageGenProvider(ImageGenProvider): ) response.raise_for_status() except requests.HTTPError as exc: - status = exc.response.status_code if exc.response else 0 + response = exc.response + status = response.status_code if response is not None else 0 try: - err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300]) + err_msg = response.json().get("error", {}).get("message", response.text[:300]) except Exception: - err_msg = exc.response.text[:300] if exc.response else str(exc) + err_msg = response.text[:300] if response is not None else str(exc) logger.error("xAI image gen failed (%d): %s", status, err_msg) return error_response( error=f"xAI image generation failed ({status}): {err_msg}", diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py index ab1bf88345a..0da46d43ec9 100644 --- a/tests/plugins/image_gen/test_xai_provider.py +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -172,6 +172,27 @@ class TestGenerate: assert result["success"] is False assert result["error_type"] == "api_error" + def test_api_error_preserves_real_response_status(self): + import requests as req_lib + from plugins.image_gen.xai import XAIImageGenProvider + + response = req_lib.Response() + response.status_code = 401 + response._content = json.dumps({"error": {"message": "Invalid API key"}}).encode() + response.headers["Content-Type"] = "application/json" + + response.raise_for_status = MagicMock( + side_effect=req_lib.HTTPError(response=response) + ) + + with patch("plugins.image_gen.xai.requests.post", return_value=response): + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + + assert result["success"] is False + assert result["error_type"] == "api_error" + assert "xAI image generation failed (401): Invalid API key" in result["error"] + def test_timeout(self): import requests as req_lib From da8654bb4134634f924d275bc0bc562521887cca Mon Sep 17 00:00:00 2001 From: MichaelWDanko <michaeldanko@icloud.com> Date: Sun, 26 Apr 2026 00:25:57 -0400 Subject: [PATCH 124/171] fix(dashboard): show custom theme palette swatches --- web/src/components/ThemeSwitcher.tsx | 17 ++++++++--------- web/src/themes/context.tsx | 14 ++++++++++---- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/web/src/components/ThemeSwitcher.tsx b/web/src/components/ThemeSwitcher.tsx index 4d50e611efa..462ccaacfc9 100644 --- a/web/src/components/ThemeSwitcher.tsx +++ b/web/src/components/ThemeSwitcher.tsx @@ -4,6 +4,7 @@ import { Button } from "@nous-research/ui/ui/components/button"; import { ListItem } from "@nous-research/ui/ui/components/list-item"; import { Typography } from "@/components/NouiTypography"; import { BUILTIN_THEMES, useTheme } from "@/themes"; +import type { DashboardTheme } from "@/themes"; import { useI18n } from "@/i18n"; import { cn } from "@/lib/utils"; @@ -11,8 +12,8 @@ import { cn } from "@/lib/utils"; * Compact theme picker mounted next to the language switcher in the header. * Each dropdown row shows a 3-stop swatch (background / midground / warm * glow) so users can preview the palette before committing. User-defined - * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in - * `BUILTIN_THEMES` render without swatches and apply the default palette. + * themes from `~/.hermes/dashboard-themes/*.yaml` use their API-provided + * definitions so they show real palette swatches just like built-ins. * * When placed at the bottom of a container (e.g. the sidebar rail), pass * `dropUp` so the menu opens above the trigger instead of clipping below @@ -95,7 +96,7 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { {availableThemes.map((th) => { const isActive = th.name === themeName; - const preset = BUILTIN_THEMES[th.name]; + const paletteTheme = BUILTIN_THEMES[th.name] ?? th.definition; return ( <ListItem @@ -109,8 +110,8 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { }} className="gap-3" > - {preset ? ( - <ThemeSwatch theme={preset.name} /> + {paletteTheme ? ( + <ThemeSwatch theme={paletteTheme} /> ) : ( <PlaceholderSwatch /> )} @@ -144,10 +145,8 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { ); } -function ThemeSwatch({ theme }: { theme: string }) { - const preset = BUILTIN_THEMES[theme]; - if (!preset) return <PlaceholderSwatch />; - const { background, midground, warmGlow } = preset.palette; +function ThemeSwatch({ theme }: { theme: DashboardTheme }) { + const { background, midground, warmGlow } = theme.palette; return ( <div aria-hidden diff --git a/web/src/themes/context.tsx b/web/src/themes/context.tsx index efc99b6317f..3c14771d321 100644 --- a/web/src/themes/context.tsx +++ b/web/src/themes/context.tsx @@ -311,9 +311,7 @@ export function ThemeProvider({ children }: { children: ReactNode }) { /** All selectable themes (shown in the picker). Starts with just the * built-ins; the API call below merges in user themes. */ - const [availableThemes, setAvailableThemes] = useState< - Array<{ description: string; label: string; name: string }> - >(() => + const [availableThemes, setAvailableThemes] = useState<ThemeSummary[]>(() => Object.values(BUILTIN_THEMES).map((t) => ({ name: t.name, label: t.label, @@ -360,6 +358,7 @@ export function ThemeProvider({ children }: { children: ReactNode }) { name: t.name, label: t.label, description: t.description, + definition: t.definition, })), ); // Index any definitions the server shipped (user themes). @@ -430,8 +429,15 @@ const ThemeContext = createContext<ThemeContextValue>({ }); interface ThemeContextValue { - availableThemes: Array<{ description: string; label: string; name: string }>; + availableThemes: ThemeSummary[]; setTheme: (name: string) => void; theme: DashboardTheme; themeName: string; } + +interface ThemeSummary { + description: string; + label: string; + name: string; + definition?: DashboardTheme; +} From 60c4bc96fd81b51277663a8283fa5eea2be8ab51 Mon Sep 17 00:00:00 2001 From: Yuyang Xu <xudavid429@gmail.com> Date: Sun, 26 Apr 2026 15:26:14 -0400 Subject: [PATCH 125/171] fix(security): restore .env/auth.json/state.db with 0600 perms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `hermes import` was creating secret files with the process umask (typically 0644) instead of 0600. zipfile.open() does not honor the Unix mode bits stored in zip member external_attr; the restore loop used open(target, "wb") which always falls back to umask. Threat: silent privilege downgrade after a routine restore on multi-user systems (shared dev boxes, CI runners, jump hosts) — any local user could read API keys and OAuth tokens from ~/.hermes/. Fix mirrors the convention already used at file creation (hermes_cli/auth.py: stat.S_IRUSR | stat.S_IWUSR for auth.json). The quick-snapshot restore path (restore_quick_snapshot) is unaffected — it uses shutil.copy2 which preserves perms via copystat(). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- hermes_cli/backup.py | 5 +++++ tests/hermes_cli/test_backup.py | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 2a766f7502a..20ddb3c87d4 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -61,6 +61,9 @@ _EXCLUDED_NAMES = { "cron.pid", } +# zipfile.open() drops Unix mode bits on extract; restore tightens these to 0600. +_SECRET_FILE_NAMES = {".env", "auth.json", "state.db"} + def _should_exclude(rel_path: Path) -> bool: """Return True if *rel_path* (relative to hermes root) should be skipped.""" @@ -381,6 +384,8 @@ def run_import(args) -> None: target.parent.mkdir(parents=True, exist_ok=True) with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) + if target.name in _SECRET_FILE_NAMES: + os.chmod(target, 0o600) restored += 1 except (PermissionError, OSError) as exc: errors.append(f" {rel}: {exc}") diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index 346c38dbe63..9a99a035faa 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -471,6 +471,32 @@ class TestImport: with pytest.raises(SystemExit): run_import(args) + @pytest.mark.skipif(os.name != "posix", reason="POSIX file permissions only") + def test_restores_secret_files_with_0600_perms(self, tmp_path, monkeypatch): + """Secret files must end up at 0600 after restore (zipfile drops mode bits).""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + zip_path = tmp_path / "backup.zip" + self._make_backup_zip(zip_path, { + "config.yaml": "model: openrouter\n", + ".env": "OPENROUTER_API_KEY=sk-secret\n", + "auth.json": '{"providers": {"nous": "token"}}', + "state.db": b"SQLite format 3\x00", + "profiles/coder/.env": "ANTHROPIC_API_KEY=sk-ant-secret\n", + }) + + args = Namespace(zipfile=str(zip_path), force=True) + + from hermes_cli.backup import run_import + run_import(args) + + for rel in (".env", "auth.json", "state.db", "profiles/coder/.env"): + mode = (hermes_home / rel).stat().st_mode & 0o777 + assert mode == 0o600, f"{rel} restored with mode {oct(mode)}, expected 0o600" + # --------------------------------------------------------------------------- # Round-trip test From a31477dabb9b02c85283070d0069c78b76d860bb Mon Sep 17 00:00:00 2001 From: changchun989 <changchun989@proton.me> Date: Sat, 2 May 2026 03:03:30 +0800 Subject: [PATCH 126/171] fix(profiles): normalize profile IDs for Kanban assignees and lookups - Add normalize_profile_name() for lowercase canonical IDs and Default alias - Use canonical names in create/delete/rename/export/import/set_active paths - Canonicalize Kanban assignee on create/assign, list filter, and worker spawn - Tests for mixed-case assignees and profile resolution (fixes #18498) --- hermes_cli/kanban_db.py | 21 +++- hermes_cli/profiles.py | 167 +++++++++++++++++------------ tests/hermes_cli/test_kanban_db.py | 16 +++ tests/hermes_cli/test_profiles.py | 28 ++++- 4 files changed, 160 insertions(+), 72 deletions(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 73445699243..a58e542ac63 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -1086,6 +1086,15 @@ def _claimer_id() -> str: # Task creation / mutation # --------------------------------------------------------------------------- +def _canonical_assignee(assignee: Optional[str]) -> Optional[str]: + """Lowercase-assignee normalization for Kanban rows (dashboard/CLI parity).""" + if assignee is None: + return None + from hermes_cli.profiles import normalize_profile_name + + return normalize_profile_name(assignee) + + def create_task( conn: sqlite3.Connection, *, @@ -1127,6 +1136,7 @@ def create_task( (e.g. ``skills=["translation"]`` so the worker loads the translation skill regardless of the profile's default config). """ + assignee = _canonical_assignee(assignee) if not title or not title.strip(): raise ValueError("title is required") if workspace_kind not in VALID_WORKSPACE_KINDS: @@ -1291,7 +1301,7 @@ def list_tasks( params: list[Any] = [] if assignee is not None: query += " AND assignee = ?" - params.append(assignee) + params.append(_canonical_assignee(assignee)) if status is not None: if status not in VALID_STATUSES: raise ValueError(f"status must be one of {sorted(VALID_STATUSES)}") @@ -1315,6 +1325,7 @@ def assign_task(conn: sqlite3.Connection, task_id: str, profile: Optional[str]) Refuses to reassign a task that's currently running (claim_lock set). Reassign after the current run completes if needed. """ + profile = _canonical_assignee(profile) with write_txn(conn): row = conn.execute( "SELECT status, claim_lock FROM tasks WHERE id = ?", (task_id,) @@ -2587,6 +2598,10 @@ def _default_spawn( if not task.assignee: raise ValueError(f"task {task.id} has no assignee") + from hermes_cli.profiles import normalize_profile_name + + profile_arg = normalize_profile_name(task.assignee) + prompt = f"work kanban task {task.id}" env = dict(os.environ) if task.tenant: @@ -2610,11 +2625,11 @@ def _default_spawn( # `hermes -p <assignee>` activates the profile, but the env var is # what the tool reads — set it explicitly here so comments are # attributed correctly regardless of how the child loads config. - env["HERMES_PROFILE"] = task.assignee + env["HERMES_PROFILE"] = profile_arg cmd = [ "hermes", - "-p", task.assignee, + "-p", profile_arg, # Auto-load the kanban-worker skill so every dispatched worker # has the pattern library (good summary/metadata shapes, retry # diagnostics, block-reason examples) in its context, even if diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index dd5fabcec43..a6fb2761893 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -179,29 +179,50 @@ def _get_wrapper_dir() -> Path: # Validation # --------------------------------------------------------------------------- +def normalize_profile_name(name: str) -> str: + """Return the canonical profile id used on disk and in CLI ``-p`` argv. + + Named profiles are stored lowercase under ``profiles/<id>/``. The special + alias ``default`` is matched case-insensitively (``Default`` → ``default``). + Dashboards and tools may pass title-cased display labels; normalize before + validation, assignment, and subprocess spawn (see issue #18498). + """ + if not isinstance(name, str): + name = str(name) + stripped = name.strip() + if not stripped: + raise ValueError("profile name cannot be empty") + if stripped.casefold() == "default": + return "default" + return stripped.lower() + + def validate_profile_name(name: str) -> None: """Raise ``ValueError`` if *name* is not a valid profile identifier.""" - if name == "default": + canon = normalize_profile_name(name) + if canon == "default": return # special alias for ~/.hermes - if not _PROFILE_ID_RE.match(name): + if not _PROFILE_ID_RE.match(canon): raise ValueError( - f"Invalid profile name {name!r}. Must match " + f"Invalid profile name {canon!r}. Must match " f"[a-z0-9][a-z0-9_-]{{0,63}}" ) def get_profile_dir(name: str) -> Path: """Resolve a profile name to its HERMES_HOME directory.""" - if name == "default": + canon = normalize_profile_name(name) + if canon == "default": return _get_default_hermes_home() - return _get_profiles_root() / name + return _get_profiles_root() / canon def profile_exists(name: str) -> bool: """Check whether a profile directory exists.""" - if name == "default": + canon = normalize_profile_name(name) + if canon == "default": return True - return get_profile_dir(name).is_dir() + return get_profile_dir(canon).is_dir() # --------------------------------------------------------------------------- @@ -213,28 +234,29 @@ def check_alias_collision(name: str) -> Optional[str]: Checks: reserved names, hermes subcommands, existing binaries in PATH. """ - if name in _RESERVED_NAMES: - return f"'{name}' is a reserved name" - if name in _HERMES_SUBCOMMANDS: - return f"'{name}' conflicts with a hermes subcommand" + canon = normalize_profile_name(name) + if canon in _RESERVED_NAMES: + return f"'{canon}' is a reserved name" + if canon in _HERMES_SUBCOMMANDS: + return f"'{canon}' conflicts with a hermes subcommand" # Check existing commands in PATH wrapper_dir = _get_wrapper_dir() try: result = subprocess.run( - ["which", name], capture_output=True, text=True, timeout=5, + ["which", canon], capture_output=True, text=True, timeout=5, ) if result.returncode == 0: existing_path = result.stdout.strip() # Allow overwriting our own wrappers - if existing_path == str(wrapper_dir / name): + if existing_path == str(wrapper_dir / canon): try: - content = (wrapper_dir / name).read_text() + content = (wrapper_dir / canon).read_text() if "hermes -p" in content: return None # it's our wrapper, safe to overwrite except Exception: pass - return f"'{name}' conflicts with an existing command ({existing_path})" + return f"'{canon}' conflicts with an existing command ({existing_path})" except (FileNotFoundError, subprocess.TimeoutExpired): pass @@ -252,6 +274,7 @@ def create_wrapper_script(name: str) -> Optional[Path]: Returns the path to the created wrapper, or None if creation failed. """ + canon = normalize_profile_name(name) wrapper_dir = _get_wrapper_dir() try: wrapper_dir.mkdir(parents=True, exist_ok=True) @@ -259,9 +282,9 @@ def create_wrapper_script(name: str) -> Optional[Path]: print(f"⚠ Could not create {wrapper_dir}: {e}") return None - wrapper_path = wrapper_dir / name + wrapper_path = wrapper_dir / canon try: - wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {name} "$@"\n') + wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {canon} "$@"\n') wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH) return wrapper_path except OSError as e: @@ -271,7 +294,7 @@ def create_wrapper_script(name: str) -> Optional[Path]: def remove_wrapper_script(name: str) -> bool: """Remove the wrapper script for a profile. Returns True if removed.""" - wrapper_path = _get_wrapper_dir() / name + wrapper_path = _get_wrapper_dir() / normalize_profile_name(name) if wrapper_path.exists(): try: # Verify it's our wrapper before removing @@ -422,15 +445,16 @@ def create_profile( The newly created profile directory. """ validate_profile_name(name) + canon = normalize_profile_name(name) - if name == "default": + if canon == "default": raise ValueError( "Cannot create a profile named 'default' — it is the built-in profile (~/.hermes)." ) - profile_dir = get_profile_dir(name) + profile_dir = get_profile_dir(canon) if profile_dir.exists(): - raise FileExistsError(f"Profile '{name}' already exists at {profile_dir}") + raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}") # Resolve clone source source_dir = None @@ -541,23 +565,24 @@ def delete_profile(name: str, yes: bool = False) -> Path: Returns the path that was removed. """ validate_profile_name(name) + canon = normalize_profile_name(name) - if name == "default": + if canon == "default": raise ValueError( "Cannot delete the default profile (~/.hermes).\n" "To remove everything, use: hermes uninstall" ) - profile_dir = get_profile_dir(name) + profile_dir = get_profile_dir(canon) if not profile_dir.is_dir(): - raise FileNotFoundError(f"Profile '{name}' does not exist.") + raise FileNotFoundError(f"Profile '{canon}' does not exist.") # Show what will be deleted model, provider = _read_config_model(profile_dir) gw_running = _check_gateway_running(profile_dir) skill_count = _count_skills(profile_dir) - print(f"\nProfile: {name}") + print(f"\nProfile: {canon}") print(f"Path: {profile_dir}") if model: print(f"Model: {model}" + (f" ({provider})" if provider else "")) @@ -569,7 +594,7 @@ def delete_profile(name: str, yes: bool = False) -> Path: ] # Check for service - wrapper_path = _get_wrapper_dir() / name + wrapper_path = _get_wrapper_dir() / canon has_wrapper = wrapper_path.exists() if has_wrapper: items.append(f"Command alias ({wrapper_path})") @@ -584,16 +609,16 @@ def delete_profile(name: str, yes: bool = False) -> Path: if not yes: print() try: - confirm = input(f"Type '{name}' to confirm: ").strip() + confirm = input(f"Type '{canon}' to confirm: ").strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return profile_dir - if confirm != name: + if confirm != canon: print("Cancelled.") return profile_dir # 1. Disable service (prevents auto-restart) - _cleanup_gateway_service(name, profile_dir) + _cleanup_gateway_service(canon, profile_dir) # 2. Stop running gateway if gw_running: @@ -601,7 +626,7 @@ def delete_profile(name: str, yes: bool = False) -> Path: # 3. Remove wrapper script if has_wrapper: - if remove_wrapper_script(name): + if remove_wrapper_script(canon): print(f"✓ Removed {wrapper_path}") # 4. Remove profile directory @@ -614,13 +639,13 @@ def delete_profile(name: str, yes: bool = False) -> Path: # 5. Clear active_profile if it pointed to this profile try: active = get_active_profile() - if active == name: + if active == canon: set_active_profile("default") print("✓ Active profile reset to default") except Exception: pass - print(f"\nProfile '{name}' deleted.") + print(f"\nProfile '{canon}' deleted.") return profile_dir @@ -731,21 +756,22 @@ def set_active_profile(name: str) -> None: Writes to ``~/.hermes/active_profile``. Use ``"default"`` to clear. """ validate_profile_name(name) - if name != "default" and not profile_exists(name): + canon = normalize_profile_name(name) + if canon != "default" and not profile_exists(canon): raise FileNotFoundError( - f"Profile '{name}' does not exist. " - f"Create it with: hermes profile create {name}" + f"Profile '{canon}' does not exist. " + f"Create it with: hermes profile create {canon}" ) path = _get_active_profile_path() path.parent.mkdir(parents=True, exist_ok=True) - if name == "default": + if canon == "default": # Remove the file to indicate default path.unlink(missing_ok=True) else: # Atomic write tmp = path.with_suffix(".tmp") - tmp.write_text(name + "\n") + tmp.write_text(canon + "\n") tmp.replace(path) @@ -812,15 +838,16 @@ def export_profile(name: str, output_path: str) -> Path: import tempfile validate_profile_name(name) - profile_dir = get_profile_dir(name) + canon = normalize_profile_name(name) + profile_dir = get_profile_dir(canon) if not profile_dir.is_dir(): - raise FileNotFoundError(f"Profile '{name}' does not exist.") + raise FileNotFoundError(f"Profile '{canon}' does not exist.") output = Path(output_path) # shutil.make_archive wants the base name without extension base = str(output).removesuffix(".tar.gz").removesuffix(".tgz") - if name == "default": + if canon == "default": # The default profile IS ~/.hermes itself — its parent is ~/ and its # directory name is ".hermes", not "default". We stage a clean copy # under a temp dir so the archive contains ``default/...``. @@ -836,14 +863,14 @@ def export_profile(name: str, output_path: str) -> Path: # Named profiles — stage a filtered copy to exclude credentials with tempfile.TemporaryDirectory() as tmpdir: - staged = Path(tmpdir) / name + staged = Path(tmpdir) / canon _CREDENTIAL_FILES = {"auth.json", ".env"} shutil.copytree( profile_dir, staged, ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents), ) - result = shutil.make_archive(base, "gztar", tmpdir, name) + result = shutil.make_archive(base, "gztar", tmpdir, canon) return Path(result) @@ -952,16 +979,17 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: # Archives exported from the default profile have "default/" as top-level # dir. Importing as "default" would target ~/.hermes itself — disallow # that and guide the user toward a named profile. - if inferred_name == "default": + validate_profile_name(inferred_name) + canon = normalize_profile_name(inferred_name) + if canon == "default": raise ValueError( "Cannot import as 'default' — that is the built-in root profile (~/.hermes). " "Specify a different name: hermes profile import <archive> --name <name>" ) - validate_profile_name(inferred_name) - profile_dir = get_profile_dir(inferred_name) + profile_dir = get_profile_dir(canon) if profile_dir.exists(): - raise FileExistsError(f"Profile '{inferred_name}' already exists at {profile_dir}") + raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}") profiles_root = _get_profiles_root() profiles_root.mkdir(parents=True, exist_ok=True) @@ -977,8 +1005,8 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: ) final_source = extracted - if archive_root != inferred_name: - final_source = staging_root / inferred_name + if archive_root != canon: + final_source = staging_root / canon extracted.rename(final_source) shutil.move(str(final_source), str(profile_dir)) @@ -1050,23 +1078,25 @@ def rename_profile(old_name: str, new_name: str) -> Path: """ validate_profile_name(old_name) validate_profile_name(new_name) + old_canon = normalize_profile_name(old_name) + new_canon = normalize_profile_name(new_name) - if old_name == "default": + if old_canon == "default": raise ValueError("Cannot rename the default profile.") - if new_name == "default": + if new_canon == "default": raise ValueError("Cannot rename to 'default' — it is reserved.") - old_dir = get_profile_dir(old_name) - new_dir = get_profile_dir(new_name) + old_dir = get_profile_dir(old_canon) + new_dir = get_profile_dir(new_canon) if not old_dir.is_dir(): - raise FileNotFoundError(f"Profile '{old_name}' does not exist.") + raise FileNotFoundError(f"Profile '{old_canon}' does not exist.") if new_dir.exists(): - raise FileExistsError(f"Profile '{new_name}' already exists.") + raise FileExistsError(f"Profile '{new_canon}' already exists.") # 1. Stop gateway if running if _check_gateway_running(old_dir): - _cleanup_gateway_service(old_name, old_dir) + _cleanup_gateway_service(old_canon, old_dir) _stop_gateway_process(old_dir) # 2. Rename directory @@ -1074,22 +1104,22 @@ def rename_profile(old_name: str, new_name: str) -> Path: print(f"✓ Renamed {old_dir.name} → {new_dir.name}") # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity - _migrate_honcho_profile_host(old_name, new_name, new_dir) + _migrate_honcho_profile_host(old_canon, new_canon, new_dir) # 4. Update wrapper script - remove_wrapper_script(old_name) - collision = check_alias_collision(new_name) + remove_wrapper_script(old_canon) + collision = check_alias_collision(new_canon) if not collision: - create_wrapper_script(new_name) - print(f"✓ Alias updated: {new_name}") + create_wrapper_script(new_canon) + print(f"✓ Alias updated: {new_canon}") else: - print(f"⚠ Cannot create alias '{new_name}' — {collision}") + print(f"⚠ Cannot create alias '{new_canon}' — {collision}") # 5. Update active_profile if it pointed to old name try: - if get_active_profile() == old_name: - set_active_profile(new_name) - print(f"✓ Active profile updated: {new_name}") + if get_active_profile() == old_canon: + set_active_profile(new_canon) + print(f"✓ Active profile updated: {new_canon}") except Exception: pass @@ -1192,12 +1222,13 @@ def resolve_profile_env(profile_name: str) -> str: are imported, to set the HERMES_HOME environment variable. """ validate_profile_name(profile_name) - profile_dir = get_profile_dir(profile_name) + canon = normalize_profile_name(profile_name) + profile_dir = get_profile_dir(canon) - if profile_name != "default" and not profile_dir.is_dir(): + if canon != "default" and not profile_dir.is_dir(): raise FileNotFoundError( - f"Profile '{profile_name}' does not exist. " - f"Create it with: hermes profile create {profile_name}" + f"Profile '{canon}' does not exist. " + f"Create it with: hermes profile create {canon}" ) return str(profile_dir) diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index 66992a721c5..1907938b429 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -252,6 +252,22 @@ def test_assign_reassigns_when_not_running(kanban_home): assert kb.get_task(conn, t).assignee == "b" +def test_assignee_normalized_to_lowercase_on_create_and_assign(kanban_home): + """Dashboard/CLI may pass title-cased profile labels; DB + spawn use canonical id.""" + with kb.connect() as conn: + tid = kb.create_task(conn, title="cased", assignee="Jules") + assert kb.get_task(conn, tid).assignee == "jules" + assert kb.assign_task(conn, tid, "Librarian") + assert kb.get_task(conn, tid).assignee == "librarian" + + +def test_list_tasks_assignee_filter_case_insensitive(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="q", assignee="jules") + found = kb.list_tasks(conn, assignee="Jules") + assert len(found) == 1 and found[0].id == tid + + def test_archive_hides_from_default_list(kanban_home): with kb.connect() as conn: t = kb.create_task(conn, title="x") diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py index 9177930f225..9dd783c2ef1 100644 --- a/tests/hermes_cli/test_profiles.py +++ b/tests/hermes_cli/test_profiles.py @@ -15,6 +15,7 @@ from unittest.mock import patch, MagicMock import pytest from hermes_cli.profiles import ( + normalize_profile_name, validate_profile_name, get_profile_dir, create_profile, @@ -58,6 +59,24 @@ def profile_env(tmp_path, monkeypatch): # TestValidateProfileName # =================================================================== +class TestNormalizeProfileName: + """Tests for normalize_profile_name().""" + + def test_title_case_normalized(self): + assert normalize_profile_name("Jules") == "jules" + assert normalize_profile_name(" Librarian ") == "librarian" + + def test_default_case_insensitive(self): + assert normalize_profile_name("Default") == "default" + assert normalize_profile_name("DEFAULT") == "default" + + def test_empty_raises(self): + with pytest.raises(ValueError, match="cannot be empty"): + normalize_profile_name("") + with pytest.raises(ValueError, match="cannot be empty"): + normalize_profile_name(" ") + + class TestValidateProfileName: """Tests for validate_profile_name().""" @@ -66,7 +85,10 @@ class TestValidateProfileName: # Should not raise validate_profile_name(name) - @pytest.mark.parametrize("name", ["UPPER", "has space", ".hidden", "-leading"]) + def test_uppercase_accepted_via_normalization(self): + validate_profile_name("Jules") + + @pytest.mark.parametrize("name", ["has space", ".hidden", "-leading"]) def test_invalid_names_rejected(self, name): with pytest.raises(ValueError): validate_profile_name(name) @@ -107,6 +129,10 @@ class TestGetProfileDir: result = get_profile_dir("coder") assert result == tmp_path / ".hermes" / "profiles" / "coder" + def test_named_profile_matching_is_case_insensitive(self, profile_env): + tmp_path = profile_env + assert get_profile_dir("Coder") == tmp_path / ".hermes" / "profiles" / "coder" + # =================================================================== # TestCreateProfile From ae40fca95523b2daf7d8c3245dd27ea28059a5cb Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:44:00 -0700 Subject: [PATCH 127/171] fix(profiles): keep validate_profile_name strict; callers normalize first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to @changchun989's cherry-pick: reverts the validate-via- normalize change so validate_profile_name remains a strict regex check on the input AS-GIVEN. Callers that accept mixed-case user input (dashboard UI, CLI args, import flows) call normalize_profile_name() first, then validate the result. This keeps validate honest about what the on-disk directory name must look like — e.g. ' jules ' (trailing whitespace) is now rejected instead of silently trimmed and accepted. - validate_profile_name: strict lowercase/regex check again, 'UPPER' back in the invalid-names parametrize - 8 call sites in profiles.py (create_profile, delete_profile, set_active_profile, export_profile, import_profile, rename_profile, resolve_profile_env, plus the clone_from branch): swap the normalize-then-validate order - scripts/release.py: add changchun989@proton.me -> changchun989 to AUTHOR_MAP so CI doesn't block on the unmapped contributor email All kanban + profile tests pass (268 across test_profiles.py + test_kanban_db.py + test_kanban_core_functionality.py, plus 73 in test_kanban_tools.py + test_kanban_dashboard_plugin.py). Closes #18498. --- hermes_cli/profiles.py | 33 +++++++++++++++++++------------ scripts/release.py | 1 + tests/hermes_cli/test_profiles.py | 8 +++++--- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index a6fb2761893..10cd36b88c9 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -198,13 +198,19 @@ def normalize_profile_name(name: str) -> str: def validate_profile_name(name: str) -> None: - """Raise ``ValueError`` if *name* is not a valid profile identifier.""" - canon = normalize_profile_name(name) - if canon == "default": + """Raise ``ValueError`` if *name* is not a valid profile identifier. + + Validates the input as-given — strict lowercase match. Callers that accept + mixed-case or title-cased input from users (dashboard UI, CLI args) should + call :func:`normalize_profile_name` first. This separation keeps validate + honest about what the on-disk directory name must look like, while + ingress-point normalization handles UX flexibility (see #18498). + """ + if name == "default": return # special alias for ~/.hermes - if not _PROFILE_ID_RE.match(canon): + if not _PROFILE_ID_RE.match(name): raise ValueError( - f"Invalid profile name {canon!r}. Must match " + f"Invalid profile name {name!r}. Must match " f"[a-z0-9][a-z0-9_-]{{0,63}}" ) @@ -444,8 +450,8 @@ def create_profile( Path The newly created profile directory. """ - validate_profile_name(name) canon = normalize_profile_name(name) + validate_profile_name(canon) if canon == "default": raise ValueError( @@ -464,6 +470,7 @@ def create_profile( from hermes_constants import get_hermes_home source_dir = get_hermes_home() else: + clone_from = normalize_profile_name(clone_from) validate_profile_name(clone_from) source_dir = get_profile_dir(clone_from) if not source_dir.is_dir(): @@ -564,8 +571,8 @@ def delete_profile(name: str, yes: bool = False) -> Path: Returns the path that was removed. """ - validate_profile_name(name) canon = normalize_profile_name(name) + validate_profile_name(canon) if canon == "default": raise ValueError( @@ -755,8 +762,8 @@ def set_active_profile(name: str) -> None: Writes to ``~/.hermes/active_profile``. Use ``"default"`` to clear. """ - validate_profile_name(name) canon = normalize_profile_name(name) + validate_profile_name(canon) if canon != "default" and not profile_exists(canon): raise FileNotFoundError( f"Profile '{canon}' does not exist. " @@ -837,8 +844,8 @@ def export_profile(name: str, output_path: str) -> Path: """ import tempfile - validate_profile_name(name) canon = normalize_profile_name(name) + validate_profile_name(canon) profile_dir = get_profile_dir(canon) if not profile_dir.is_dir(): raise FileNotFoundError(f"Profile '{canon}' does not exist.") @@ -979,8 +986,8 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: # Archives exported from the default profile have "default/" as top-level # dir. Importing as "default" would target ~/.hermes itself — disallow # that and guide the user toward a named profile. - validate_profile_name(inferred_name) canon = normalize_profile_name(inferred_name) + validate_profile_name(canon) if canon == "default": raise ValueError( "Cannot import as 'default' — that is the built-in root profile (~/.hermes). " @@ -1076,10 +1083,10 @@ def rename_profile(old_name: str, new_name: str) -> Path: Returns the new profile directory. """ - validate_profile_name(old_name) - validate_profile_name(new_name) old_canon = normalize_profile_name(old_name) new_canon = normalize_profile_name(new_name) + validate_profile_name(old_canon) + validate_profile_name(new_canon) if old_canon == "default": raise ValueError("Cannot rename the default profile.") @@ -1221,8 +1228,8 @@ def resolve_profile_env(profile_name: str) -> str: Called early in the CLI entry point, before any hermes modules are imported, to set the HERMES_HOME environment variable. """ - validate_profile_name(profile_name) canon = normalize_profile_name(profile_name) + validate_profile_name(canon) profile_dir = get_profile_dir(canon) if canon != "default" and not profile_dir.is_dir(): diff --git a/scripts/release.py b/scripts/release.py index 4794f5bbfd9..cfafa36e2a6 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -97,6 +97,7 @@ AUTHOR_MAP = { "252818347@qq.com": "hejuntt1014", "uzmpsk.dilekakbas@gmail.com": "dlkakbs", "beliefanx@gmail.com": "BeliefanX", + "changchun989@proton.me": "changchun989", "jefferson@heimdallstrategy.com": "Mind-Dragon", "44753291+Nanako0129@users.noreply.github.com": "Nanako0129", "steve.westerhouse@origami-analytics.com": "westers", diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py index 9dd783c2ef1..7ddb8fd20a8 100644 --- a/tests/hermes_cli/test_profiles.py +++ b/tests/hermes_cli/test_profiles.py @@ -85,10 +85,12 @@ class TestValidateProfileName: # Should not raise validate_profile_name(name) - def test_uppercase_accepted_via_normalization(self): - validate_profile_name("Jules") + def test_uppercase_rejected(self): + # validate_profile_name is strict — callers normalize first, then validate. + with pytest.raises(ValueError): + validate_profile_name("Jules") - @pytest.mark.parametrize("name", ["has space", ".hidden", "-leading"]) + @pytest.mark.parametrize("name", ["UPPER", "has space", ".hidden", "-leading"]) def test_invalid_names_rejected(self, name): with pytest.raises(ValueError): validate_profile_name(name) From edf9c75621e6b50c912b77b86b13543008f47f80 Mon Sep 17 00:00:00 2001 From: Yoimex <yoimexex@gmail.com> Date: Sat, 25 Apr 2026 07:56:29 +0300 Subject: [PATCH 128/171] fix(env): pass -- to cd for hyphen-prefixed workdirs --- tests/tools/test_base_environment.py | 21 ++++++++++++++------- tools/environments/base.py | 3 ++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/tools/test_base_environment.py b/tests/tools/test_base_environment.py index 28ce08e840c..eb3661cafd3 100644 --- a/tests/tools/test_base_environment.py +++ b/tests/tools/test_base_environment.py @@ -30,7 +30,7 @@ class TestWrapCommand: wrapped = env._wrap_command("echo hello", "/tmp") assert "source" in wrapped - assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped + assert "cd -- /tmp" in wrapped or "cd -- '/tmp'" in wrapped assert "eval 'echo hello'" in wrapped assert "__hermes_ec=$?" in wrapped assert "export -p >" in wrapped @@ -57,24 +57,31 @@ class TestWrapCommand: env._snapshot_ready = True wrapped = env._wrap_command("ls", "~") - assert "cd ~" in wrapped - assert "cd '~'" not in wrapped + assert "cd -- ~" in wrapped + assert "cd -- '~'" not in wrapped def test_tilde_subpath_with_spaces_uses_home_and_quotes_suffix(self): env = _TestableEnv() env._snapshot_ready = True wrapped = env._wrap_command("ls", "~/my repo") - assert "cd $HOME/'my repo'" in wrapped - assert "cd ~/my repo" not in wrapped + assert "cd -- $HOME/'my repo'" in wrapped + assert "cd -- ~/my repo" not in wrapped def test_tilde_slash_maps_to_home(self): env = _TestableEnv() env._snapshot_ready = True wrapped = env._wrap_command("ls", "~/") - assert "cd $HOME" in wrapped - assert "cd ~/" not in wrapped + assert "cd -- $HOME" in wrapped + assert "cd -- ~/" not in wrapped + + def test_hyphen_prefixed_workdir_is_passed_after_double_dash(self): + env = _TestableEnv() + env._snapshot_ready = True + wrapped = env._wrap_command("pwd", "-demo") + + assert "builtin cd -- -demo || exit 126" in wrapped def test_cd_failure_exit_126(self): env = _TestableEnv() diff --git a/tools/environments/base.py b/tools/environments/base.py index 2f565fe5f87..3f21f1294be 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -405,7 +405,8 @@ class BaseEnvironment(ABC): # Preserve bare ``~`` expansion, but rewrite ``~/...`` through # ``$HOME`` so suffixes with spaces remain a single shell word. quoted_cwd = self._quote_cwd_for_cd(cwd) - parts.append(f"builtin cd {quoted_cwd} || exit 126") + # ``--`` keeps hyphen-prefixed directory names from being parsed as options. + parts.append(f"builtin cd -- {quoted_cwd} || exit 126") # Run the actual command parts.append(f"eval '{escaped}'") From 652f8e6f3ebea9551a5761668d5eeba215245abb Mon Sep 17 00:00:00 2001 From: vominh1919 <vominh1919@gmail.com> Date: Sun, 26 Apr 2026 19:50:04 +0700 Subject: [PATCH 129/171] fix(test): correct _coerce_number inf/nan test assertions The test 'test_inf_stays_string_for_integer_only' incorrectly asserted that _coerce_number('inf') returns float('inf'), but the function correctly returns the original string 'inf' because infinity is not JSON-serializable. Fixed the assertion to expect the string 'inf', and added two new tests for negative infinity and NaN edge cases to improve coverage of the non-JSON-serializable number guard in _coerce_number(). --- tests/run_agent/test_tool_arg_coercion.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py index 8a14da9ea27..a9d768bdcf1 100644 --- a/tests/run_agent/test_tool_arg_coercion.py +++ b/tests/run_agent/test_tool_arg_coercion.py @@ -64,10 +64,23 @@ class TestCoerceNumber: def test_scientific_notation(self): assert _coerce_number("1e5") == 100000 - def test_inf_stays_string_for_integer_only(self): - """Infinity should not be converted to int.""" + def test_inf_stays_string(self): + """Infinity is not JSON-serializable, so it should stay as string.""" result = _coerce_number("inf") assert result == "inf" + assert isinstance(result, str) + + def test_negative_inf_stays_string(self): + """Negative infinity should also stay as string.""" + result = _coerce_number("-inf") + assert result == "-inf" + assert isinstance(result, str) + + def test_nan_stays_string(self): + """NaN is not JSON-serializable, so it should stay as string.""" + result = _coerce_number("nan") + assert result == "nan" + assert isinstance(result, str) def test_negative_float(self): assert _coerce_number("-2.5") == -2.5 From 6b3efcee49afed5fde590c56766634e8cbdf921f Mon Sep 17 00:00:00 2001 From: luyao618 <364939526@qq.com> Date: Mon, 4 May 2026 14:47:13 +0800 Subject: [PATCH 130/171] fix(kanban): reject direct status transition to 'running' via dashboard API The PATCH /tasks/:id endpoint allows setting status='running' via _set_status_direct(), bypassing the dispatcher/claim path that creates run rows, claim locks, expiry, and worker process metadata. This can leave tasks stuck in 'running' with no active worker. Fix: reject status='running' with HTTP 400, requiring all transitions to 'running' to go through the canonical claim_task() path. Closes #19535 --- plugins/kanban/dashboard/plugin_api.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index d80296b888a..1c25f372e61 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -449,7 +449,12 @@ def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Qu ok = _set_status_direct(conn, task_id, "ready") elif s == "archived": ok = kanban_db.archive_task(conn, task_id) - elif s in ("todo", "running", "triage"): + elif s == "running": + raise HTTPException( + status_code=400, + detail="Cannot set status to 'running' directly; use the dispatcher/claim path", + ) + elif s in ("todo", "triage"): ok = _set_status_direct(conn, task_id, s) else: raise HTTPException(status_code=400, detail=f"unknown status: {s}") From a8b689f0c2541ce71afbe9052ddc5d50c1abd71d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:46:26 -0700 Subject: [PATCH 131/171] test(kanban): regression for status=running rejection at dashboard PATCH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reporter of #19535 explicitly asked for a regression test — covers it here so a future refactor of _set_status_direct can't silently re-enable the direct ready/todo -> running bypass. Asserts both: (a) HTTP 400 with 'running' in the detail message, and (b) the task's status is unchanged after the rejected PATCH (pre-request status preserved, no partial mutation). --- tests/plugins/test_kanban_dashboard_plugin.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/plugins/test_kanban_dashboard_plugin.py b/tests/plugins/test_kanban_dashboard_plugin.py index 4bbc621f1aa..0055fc80f04 100644 --- a/tests/plugins/test_kanban_dashboard_plugin.py +++ b/tests/plugins/test_kanban_dashboard_plugin.py @@ -253,6 +253,33 @@ def test_patch_invalid_status(client): assert r.status_code == 400 +def test_patch_status_running_rejected(client): + """Dashboard PATCH cannot transition a task directly to 'running'. + + The only legitimate path into 'running' is through the dispatcher's + ``claim_task`` — which atomically creates a ``task_runs`` row, + claim_lock, expiry, and worker-PID metadata. Allowing a direct set + creates orphaned 'running' tasks with no run row or claim, which + violate the board's run-history invariants. See issue #19535. + """ + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "running"}, + ) + assert r.status_code == 400 + assert "running" in r.json()["detail"] + # Task's status should still be its pre-request value — the direct-set + # was rejected before any mutation. + board = client.get("/api/plugins/kanban/board").json() + statuses = { + tt["id"]: col["name"] + for col in board["columns"] + for tt in col["tasks"] + } + assert statuses.get(t["id"]) != "running" + + # --------------------------------------------------------------------------- # Comments + Links # --------------------------------------------------------------------------- From b2b479b40ece1d0eec8eaf20382bed15d9c25a6d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:47:19 -0700 Subject: [PATCH 132/171] docs(kanban): backfill multi-board refs in reference docs (#19704) Followup to #19653. The feature PR updated the Kanban user guide but missed four other pages that document the same surface. Caught when Teknium asked 'did you add docs to the guide and any other kanban related docs around this?'. - reference/cli-commands.md: rewrite the `hermes kanban` section to document the `--board <slug>` global flag, the `boards` subcommand group (list/create/switch/show/rename/rm), board resolution order, and worked examples. Also fills in the `create` / `complete` flag lists that had drifted from the current CLI (`--summary`, `--metadata`, `--triage`, `--idempotency-key`, `--max-runtime`, `--skill`). - reference/environment-variables.md: add `HERMES_KANBAN_BOARD` row, update `HERMES_KANBAN_DB` precedence note. - reference/slash-commands.md: add `/kanban boards ...` and `/kanban --board <slug> ...` to the two `/kanban` rows (CLI table + gateway table). - features/kanban-tutorial.md: the walkthrough uses the `default` board, so just a note pointing readers at the overview's Boards section if they want multiple queues, plus the corrected per-board DB path. Skill docs (devops-kanban-orchestrator, -worker) intentionally not updated: those are agent-facing lifecycle playbooks and boards are transparent to workers (HERMES_KANBAN_BOARD env var pins the DB automatically), so there's nothing new for a worker to know. --- website/docs/reference/cli-commands.md | 48 +++++++++++++++---- .../docs/reference/environment-variables.md | 3 +- website/docs/reference/slash-commands.md | 4 +- .../user-guide/features/kanban-tutorial.md | 4 +- 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index c8a9dd66c5a..4f307f15e7b 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -340,34 +340,64 @@ hermes cron <list|create|edit|pause|resume|run|remove|status|tick> ## `hermes kanban` ```bash -hermes kanban <action> [options] +hermes kanban [--board <slug>] <action> [options] ``` -Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace. +Multi-profile, multi-project collaboration board. Each install can host many boards (one per project, repo, or domain); each board is a standalone queue with its own SQLite DB and dispatcher scope. New installs start with one board called `default`, whose DB is `~/.hermes/kanban.db` for back-compat; additional boards live at `~/.hermes/kanban/boards/<slug>/kanban.db`. The gateway-embedded dispatcher sweeps every board per tick. -**This is the human / scripting surface.** Agent workers spawned by the dispatcher drive the board through a dedicated `kanban_*` [toolset](/docs/user-guide/features/kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_create`, `kanban_link`, `kanban_comment`, `kanban_heartbeat`) instead of shelling to `hermes kanban`. Both surfaces route through the same `kanban_db` layer, so state is consistent either way. +**Global flags (apply to every action below):** + +| Flag | Purpose | +|------|---------| +| `--board <slug>` | Operate on a specific board. Defaults to the current board (set via `hermes kanban boards switch`, the `HERMES_KANBAN_BOARD` env var, or `default`). | + +**This is the human / scripting surface.** Agent workers spawned by the dispatcher drive the board through a dedicated `kanban_*` [toolset](/docs/user-guide/features/kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_create`, `kanban_link`, `kanban_comment`, `kanban_heartbeat`) instead of shelling to `hermes kanban`. Workers have `HERMES_KANBAN_BOARD` pinned in their env so they physically cannot see other boards. | Action | Purpose | |--------|---------| | `init` | Create `kanban.db` if missing. Idempotent. | -| `create "<title>"` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. | -| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | +| `boards list` / `boards ls` | List all boards with task counts. `--json`, `--all` (include archived). | +| `boards create <slug>` | Create a new board. Flags: `--name`, `--description`, `--icon`, `--color`, `--switch` (make active). Slug is kebab-case, auto-downcased. | +| `boards switch <slug>` / `boards use` | Persist `<slug>` as the active board (writes `~/.hermes/kanban/current`). | +| `boards show` / `boards current` | Print the currently-active board's name, DB path, and task counts. | +| `boards rename <slug> "<name>"` | Change a board's display name. Slug is immutable. | +| `boards rm <slug>` | Archive (default) or hard-delete a board. `--delete` skips the archive step. Archived boards move to `boards/_archived/<slug>-<ts>/`. Refused for `default`. | +| `create "<title>"` | Create a new task on the active board. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`, `--triage`, `--idempotency-key`, `--max-runtime`, `--skill` (repeatable). | +| `list` / `ls` | List tasks on the active board. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | | `show <id>` | Show a task with comments and events. `--json` for machine output. | | `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. | -| `link <parent> <child>` | Add a dependency. Cycle-detected. | +| `link <parent> <child>` | Add a dependency. Cycle-detected. Both tasks must be on the same board. | | `unlink <parent> <child>` | Remove a dependency. | | `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. | | `comment <id> "<text>"` | Append a comment. The next worker that claims the task reads it as part of its `kanban_show()` response. | -| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). | +| `complete <id>` | Mark task done. Flags: `--result`, `--summary`, `--metadata`. | | `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. | | `unblock <id>` | Return a blocked task to ready. | | `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. | | `tail <id>` | Follow a task's event stream. | -| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. | +| `dispatch` | One dispatcher pass on the active board. Flags: `--dry-run`, `--max N`, `--json`. | | `context <id>` | Print the full context a worker would see (title + body + parent results + comments). | | `gc` | Remove scratch workspaces for archived tasks. | -All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface. +Examples: + +```bash +# Create a second board and put a task on it without switching away. +hermes kanban boards create atm10-server --name "ATM10 Server" --icon 🎮 +hermes kanban --board atm10-server create "Restart server" --assignee ops + +# Switch the active board for subsequent calls. +hermes kanban boards switch atm10-server +hermes kanban list # shows atm10-server tasks + +# Archive a board (recoverable) or hard-delete it. +hermes kanban boards rm atm10-server +hermes kanban boards rm atm10-server --delete +``` + +Board resolution order (highest precedence first): `--board <slug>` flag → `HERMES_KANBAN_BOARD` env var → `~/.hermes/kanban/current` file → `default`. + +All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface — including `boards` subcommands and the `--board` flag. For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban). diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index ec2c5ec0e8c..9bcda5695eb 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -89,7 +89,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `HERMES_LOCAL_STT_LANGUAGE` | Default language passed to `HERMES_LOCAL_STT_COMMAND` or auto-detected local `whisper` CLI fallback (default: `en`) | | `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`). Also scopes the gateway PID file and systemd service name, so multiple installations can run concurrently | | `HERMES_KANBAN_HOME` | Override the shared Hermes root that anchors the kanban board (db + workspaces + worker logs). Falls back to `get_default_hermes_root()` (the parent of any active profile). Useful for tests and unusual deployments | -| `HERMES_KANBAN_DB` | Pin the kanban database file path directly (highest precedence; beats `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env so profile workers converge on the dispatcher's board | +| `HERMES_KANBAN_BOARD` | Pin the active kanban board for this process. Takes precedence over `~/.hermes/kanban/current`; the dispatcher injects this into worker subprocess env so workers physically cannot see tasks on other boards. Defaults to `default`. Slug validation: lowercase alphanumerics + hyphens + underscores, 1-64 chars | +| `HERMES_KANBAN_DB` | Pin the kanban database file path directly (highest precedence; beats `HERMES_KANBAN_BOARD` and `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env so profile workers converge on the dispatcher's board | | `HERMES_KANBAN_WORKSPACES_ROOT` | Pin the kanban workspaces root directly (highest precedence for workspaces; beats `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env | ## Provider Auth (OAuth) diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 0e71a932772..ceab9190b84 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -70,7 +70,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/skills` | Search, install, inspect, or manage skills from online registries | | `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) | | `/curator` | Background skill maintenance — `status`, `run`, `pin`, `archive`. See [Curator](/docs/user-guide/features/curator). | -| `/kanban <action>` | Drive the multi-profile collaboration board without leaving chat. Full `hermes kanban` surface is available: `/kanban list`, `/kanban show t_abc`, `/kanban create "title" --assignee X`, `/kanban comment t_abc "text"`, `/kanban unblock t_abc`, `/kanban dispatch`, etc. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | +| `/kanban <action>` | Drive the multi-profile, multi-project collaboration board without leaving chat. Full `hermes kanban` surface is available: `/kanban list`, `/kanban show t_abc`, `/kanban create "title" --assignee X`, `/kanban comment t_abc "text"`, `/kanban unblock t_abc`, `/kanban dispatch`, etc. Multi-board support included: `/kanban boards list`, `/kanban boards create <slug>`, `/kanban boards switch <slug>`, `/kanban --board <slug> <action>`. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config.yaml | | `/reload` | Reload `.env` variables into the running session (picks up new API keys without restarting) | | `/plugins` | List installed plugins and their status | @@ -158,7 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/goal <text>` | Set a standing goal Hermes works toward across turns — our take on the Ralph loop. A judge model checks after each turn; if not done, Hermes auto-continues until it is, you pause/clear it, or the turn budget (default 20) is hit. Subcommands: `/goal status`, `/goal pause`, `/goal resume`, `/goal clear`. Safe to run mid-agent for status/pause/clear; setting a new goal requires `/stop` first. See [Persistent Goals](/docs/user-guide/features/goals). | | `/footer [on\|off\|status]` | Toggle the runtime-metadata footer on final replies (shows model, tool counts, timing). | | `/curator [status\|run\|pin\|archive]` | Background skill maintenance controls. | -| `/kanban <action>` | Drive the multi-profile collaboration board from chat — identical argument surface to the CLI. Bypasses the running-agent guard, so `/kanban unblock t_abc`, `/kanban comment t_abc "…"`, `/kanban list --mine`, etc. work mid-turn. `/kanban create …` auto-subscribes the originating chat to the new task's terminal events. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | +| `/kanban <action>` | Drive the multi-profile, multi-project collaboration board from chat — identical argument surface to the CLI. Bypasses the running-agent guard, so `/kanban unblock t_abc`, `/kanban comment t_abc "…"`, `/kanban list --mine`, `/kanban boards switch <slug>`, etc. work mid-turn. `/kanban create …` auto-subscribes the originating chat to the new task's terminal events. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config. | | `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | | `/commands [page]` | Browse all commands and skills (paginated). | diff --git a/website/docs/user-guide/features/kanban-tutorial.md b/website/docs/user-guide/features/kanban-tutorial.md index 08f3d7ccc4d..f8d9501cb2a 100644 --- a/website/docs/user-guide/features/kanban-tutorial.md +++ b/website/docs/user-guide/features/kanban-tutorial.md @@ -10,7 +10,9 @@ hermes dashboard # opens http://127.0.0.1:9119 in your browser # click Kanban in the left nav ``` -The dashboard is the most comfortable place for **you** to watch the system. Agent workers the dispatcher spawns never see the dashboard or the CLI — they drive the board through a dedicated `kanban_*` [toolset](./kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`). All three surfaces — dashboard, CLI, worker tools — route through the same `~/.hermes/kanban.db`, so the board is consistent no matter which side of the fence a change came from. +The dashboard is the most comfortable place for **you** to watch the system. Agent workers the dispatcher spawns never see the dashboard or the CLI — they drive the board through a dedicated `kanban_*` [toolset](./kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`). All three surfaces — dashboard, CLI, worker tools — route through the same per-board SQLite DB (`~/.hermes/kanban.db` for the default board, `~/.hermes/kanban/boards/<slug>/kanban.db` for any board you create later), so each board is consistent no matter which side of the fence a change came from. + +This tutorial uses the `default` board throughout. If you want multiple isolated queues (one per project / repo / domain), see [Boards (multi-project)](./kanban#boards-multi-project) in the overview — the same CLI / dashboard / worker flows apply per board, and workers physically cannot see tasks on other boards. Throughout the tutorial, **code blocks labelled `bash` are commands *you* run.** Code blocks labelled `# worker tool calls` are what the spawned worker's model emits as tool calls — shown here so you can see the loop end-to-end, not because you'd ever run them yourself. From 297eaa3533f6c98a45db4cb5c63fa07c008fd67e Mon Sep 17 00:00:00 2001 From: ygd58 <buraysandro9@gmail.com> Date: Sat, 25 Apr 2026 10:33:40 +0200 Subject: [PATCH 133/171] fix(api_server): emit run.failed when run_conversation returns failed=True MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When run_conversation encounters a non-retryable client error (401, 400, etc.), it returns a dict with failed=True instead of raising. The gateway's _run_and_close only branched on exceptions, so it always emitted run.completed even for failed runs — clients could not distinguish success from failure. Inspect the result dict before emitting: if failed=True, emit run.failed with the error message; otherwise emit run.completed as before. The existing except Exception path is unchanged for genuine programming errors. Fixes #15561 --- gateway/platforms/api_server.py | 48 ++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index b4d3ccb20cd..230859023b7 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -2578,21 +2578,39 @@ class APIServerAdapter(BasePlatformAdapter): return r, u result, usage = await asyncio.get_running_loop().run_in_executor(None, _run_sync) - final_response = result.get("final_response", "") if isinstance(result, dict) else "" - q.put_nowait({ - "event": "run.completed", - "run_id": run_id, - "timestamp": time.time(), - "output": final_response, - "usage": usage, - }) - self._set_run_status( - run_id, - "completed", - output=final_response, - usage=usage, - last_event="run.completed", - ) + # Check for structured failure (non-retryable client errors like + # 401/400 return failed=True instead of raising, so the except + # block below never fires — issue #15561). + if isinstance(result, dict) and result.get("failed"): + error_msg = result.get("error") or "agent run failed" + q.put_nowait({ + "event": "run.failed", + "run_id": run_id, + "timestamp": time.time(), + "error": error_msg, + }) + self._set_run_status( + run_id, + "failed", + error=error_msg, + last_event="run.failed", + ) + else: + final_response = result.get("final_response", "") if isinstance(result, dict) else "" + q.put_nowait({ + "event": "run.completed", + "run_id": run_id, + "timestamp": time.time(), + "output": final_response, + "usage": usage, + }) + self._set_run_status( + run_id, + "completed", + output=final_response, + usage=usage, + last_event="run.completed", + ) except asyncio.CancelledError: self._set_run_status( run_id, From c5789f4309f3bfc54c73623b25849ac7f12a1d80 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:47:53 -0700 Subject: [PATCH 134/171] feat(achievements): share card render on unlocked badges (#19657) * feat(achievements): share card render on unlocked badges Adds a Share button to each unlocked achievement card that opens a modal and renders a 1200x630 PNG share card client-side via Canvas2D (no backend, no network, no new deps). Two actions: Download PNG and Copy image to clipboard. Card layout mirrors the in-dashboard visual language: tier-colored glow, icon from the existing LUCIDE sprite set, achievement name, tier badge pill, description, progress stat line, and a Hermes Agent watermark. Sized for X/Twitter, Discord, LinkedIn, Bluesky link previews. Vendored on top of the upstream @PCinkusz bundle; the 'in-progress scan banner' precedent already established this divergence pattern. Manifest bumped 0.3.1 -> 0.4.0. * feat(achievements): share-on-X as primary action on share dialog Adds a 'Share on X' button as the primary action in the share dialog. Opens https://x.com/intent/post with a pre-filled tweet referencing the achievement name, tier, @NousResearch, and the Hermes docs URL. Copy image and Download PNG become secondary actions: users who want the badge attached can Copy image, paste into the X composer, post. Primary button styled as X's signature black-on-white fill so the action is unambiguous. --- plugins/hermes-achievements/README.md | 2 + .../dashboard/dist/index.js | 305 +++++++++++++++++- .../dashboard/dist/style.css | 26 ++ .../dashboard/manifest.json | 2 +- 4 files changed, 332 insertions(+), 3 deletions(-) diff --git a/plugins/hermes-achievements/README.md b/plugins/hermes-achievements/README.md index dd360197e8c..33641a9d726 100644 --- a/plugins/hermes-achievements/README.md +++ b/plugins/hermes-achievements/README.md @@ -11,6 +11,8 @@ Achievement system for the Hermes Dashboard: collectible, tiered badges generate The screenshots use temporary demo tier data to show the full visual range. The plugin itself reads real local Hermes session history by default. > **Update notice (2026-04-29):** If you installed this plugin before today, update to the latest version. The achievements scan path was refactored for much faster warm loads (snapshot cache + incremental checkpoint scan). +> +> **Share cards (2026-05-04, vendored in hermes-agent v0.4.0):** Unlocked achievement cards now have a "Share" button that renders a 1200×630 PNG share card (client-side canvas, no backend, no network) with Download + Copy-to-clipboard actions. Fits X/Twitter, Discord, LinkedIn, Bluesky link-preview dimensions. ## What it does diff --git a/plugins/hermes-achievements/dashboard/dist/index.js b/plugins/hermes-achievements/dashboard/dist/index.js index 56b9427e84a..d30f34e11e9 100644 --- a/plugins/hermes-achievements/dashboard/dist/index.js +++ b/plugins/hermes-achievements/dashboard/dist/index.js @@ -66,6 +66,296 @@ }); } + const TIER_HEX = { + "Copper": "#b87333", + "Silver": "#c0c7d2", + "Gold": "#f2c94c", + "Diamond": "#67e8f9", + "Olympian": "#c084fc", + }; + + function tierHex(tier) { + return TIER_HEX[tier] || "#67e8f9"; + } + + // Render a LUCIDE icon path fragment into a standalone SVG string with an + // explicit stroke color so it can be rasterized onto a <canvas> via Image. + // The normal render path uses stroke="currentColor" which browsers honor in + // DOM but NOT when the SVG is drawn to a canvas from a data URL. + function iconSvgForCanvas(iconKey, strokeColor) { + const paths = LUCIDE[iconKey] || LUCIDE.secret; + return "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\" fill=\"none\" " + + "stroke=\"" + strokeColor + "\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\">" + + paths + "</svg>"; + } + + function loadSvgImage(svgString) { + return new Promise(function (resolve, reject) { + const blob = new Blob([svgString], { type: "image/svg+xml;charset=utf-8" }); + const url = URL.createObjectURL(blob); + const img = new Image(); + img.onload = function () { URL.revokeObjectURL(url); resolve(img); }; + img.onerror = function (e) { URL.revokeObjectURL(url); reject(e); }; + img.src = url; + }); + } + + function wrapText(ctx, text, maxWidth) { + const words = String(text || "").split(/\s+/).filter(Boolean); + const lines = []; + let current = ""; + for (let i = 0; i < words.length; i++) { + const candidate = current ? current + " " + words[i] : words[i]; + if (ctx.measureText(candidate).width <= maxWidth) { + current = candidate; + } else { + if (current) lines.push(current); + current = words[i]; + } + } + if (current) lines.push(current); + return lines; + } + + // Build a 1200x630 share card PNG for a single achievement. Returns a Blob. + // Pure client-side render via Canvas2D — no external deps, no network. + async function buildShareImage(achievement) { + const W = 1200; + const H = 630; + const canvas = document.createElement("canvas"); + canvas.width = W; + canvas.height = H; + const ctx = canvas.getContext("2d"); + + const tier = achievement.tier || achievement.next_tier || "Copper"; + const color = tierHex(tier); + + // Background: dark charcoal with a tier-tinted radial highlight on the + // top-left, echoing the card visual language. + ctx.fillStyle = "#0b0d11"; + ctx.fillRect(0, 0, W, H); + const bgGrad = ctx.createRadialGradient(260, 220, 60, 260, 220, 820); + bgGrad.addColorStop(0, color + "33"); + bgGrad.addColorStop(0.55, color + "0a"); + bgGrad.addColorStop(1, "#0b0d1100"); + ctx.fillStyle = bgGrad; + ctx.fillRect(0, 0, W, H); + + // Outer border + ctx.strokeStyle = color + "66"; + ctx.lineWidth = 2; + ctx.strokeRect(1, 1, W - 2, H - 2); + + // Icon block — 380x380 on the left + try { + const svg = iconSvgForCanvas(achievement.icon || "secret", color); + const iconImg = await loadSvgImage(svg); + const ix = 90; + const iy = 125; + const isize = 380; + // Icon glow + ctx.save(); + ctx.shadowColor = color; + ctx.shadowBlur = 40; + ctx.drawImage(iconImg, ix, iy, isize, isize); + ctx.restore(); + } catch (_) { + // Icon render failure is non-fatal; card still useful without it. + } + + // Right column text layout + const rx = 520; + const rMaxWidth = W - rx - 70; + + // Category label (kicker) + ctx.fillStyle = "#8b95a8"; + ctx.font = "600 22px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.textBaseline = "top"; + ctx.fillText((achievement.category || "").toUpperCase(), rx, 112); + + // Achievement name — wrap to 2 lines if needed + ctx.fillStyle = "#ffffff"; + ctx.font = "780 68px system-ui, -apple-system, 'Segoe UI', sans-serif"; + const nameLines = wrapText(ctx, achievement.name || "Achievement", rMaxWidth).slice(0, 2); + let cursorY = 150; + for (let i = 0; i < nameLines.length; i++) { + ctx.fillText(nameLines[i], rx, cursorY); + cursorY += 76; + } + + // Tier badge pill + const badgeLabel = tier.toUpperCase() + " TIER"; + ctx.font = "700 22px ui-monospace, 'SF Mono', Menlo, monospace"; + const badgeWidth = ctx.measureText(badgeLabel).width + 32; + const badgeX = rx; + const badgeY = cursorY + 14; + const badgeH = 40; + ctx.fillStyle = color + "1f"; + ctx.strokeStyle = color; + ctx.lineWidth = 1.5; + ctx.beginPath(); + ctx.rect(badgeX, badgeY, badgeWidth, badgeH); + ctx.fill(); + ctx.stroke(); + ctx.fillStyle = color; + ctx.textBaseline = "middle"; + ctx.fillText(badgeLabel, badgeX + 16, badgeY + badgeH / 2 + 1); + ctx.textBaseline = "top"; + + // Description — wrap up to 3 lines + ctx.fillStyle = "#c3cad6"; + ctx.font = "400 26px system-ui, -apple-system, 'Segoe UI', sans-serif"; + const descLines = wrapText(ctx, achievement.description || "", rMaxWidth).slice(0, 3); + let descY = badgeY + badgeH + 28; + for (let i = 0; i < descLines.length; i++) { + ctx.fillText(descLines[i], rx, descY); + descY += 34; + } + + // Progress / stat line (if meaningful) + const progressValue = achievement.progress; + const threshold = achievement.next_threshold; + let statLine = null; + if (progressValue && threshold) { + statLine = progressValue.toLocaleString() + " / " + threshold.toLocaleString(); + } else if (progressValue) { + statLine = progressValue.toLocaleString(); + } + if (statLine) { + ctx.fillStyle = color; + ctx.font = "700 28px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.fillText(statLine, rx, descY + 14); + } + + // Footer watermark + ctx.fillStyle = "#8b95a8"; + ctx.font = "600 20px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.textBaseline = "bottom"; + ctx.fillText("HERMES AGENT · hermes-agent.nousresearch.com", 70, H - 40); + + // "UNLOCKED" stamp upper-right + ctx.textBaseline = "top"; + ctx.fillStyle = color; + ctx.font = "800 24px ui-monospace, 'SF Mono', Menlo, monospace"; + const stamp = "◆ UNLOCKED"; + const stampW = ctx.measureText(stamp).width; + ctx.fillText(stamp, W - 70 - stampW, 70); + + return await new Promise(function (resolve, reject) { + canvas.toBlob(function (blob) { + if (blob) resolve(blob); else reject(new Error("canvas.toBlob returned null")); + }, "image/png"); + }); + } + + function ShareDialog({ achievement, onClose }) { + const [status, setStatus] = hooks.useState("rendering"); // rendering | ready | copied | error + const [errorMsg, setErrorMsg] = hooks.useState(null); + const [previewUrl, setPreviewUrl] = hooks.useState(null); + const blobRef = React.useRef(null); + + hooks.useEffect(function () { + let cancelled = false; + let createdUrl = null; + buildShareImage(achievement).then(function (blob) { + if (cancelled) return; + blobRef.current = blob; + createdUrl = URL.createObjectURL(blob); + setPreviewUrl(createdUrl); + setStatus("ready"); + }).catch(function (err) { + if (cancelled) return; + setErrorMsg(String(err && err.message || err)); + setStatus("error"); + }); + return function () { + cancelled = true; + if (createdUrl) URL.revokeObjectURL(createdUrl); + }; + }, [achievement.id]); + + function download() { + if (!blobRef.current) return; + const url = URL.createObjectURL(blobRef.current); + const a = document.createElement("a"); + a.href = url; + a.download = "hermes-achievement-" + (achievement.id || "badge") + ".png"; + document.body.appendChild(a); + a.click(); + a.remove(); + setTimeout(function () { URL.revokeObjectURL(url); }, 1000); + } + + async function copyToClipboard() { + if (!blobRef.current) return; + try { + if (!navigator.clipboard || !window.ClipboardItem) { + throw new Error("Clipboard image copy not supported in this browser — use Download instead."); + } + await navigator.clipboard.write([ + new window.ClipboardItem({ "image/png": blobRef.current }), + ]); + setStatus("copied"); + setTimeout(function () { setStatus("ready"); }, 1800); + } catch (err) { + setErrorMsg(String(err && err.message || err)); + setStatus("error"); + } + } + + // Build the pre-filled tweet text. Keep it short so X doesn't truncate + // when the user hasn't attached the PNG yet — they'll copy-image and + // paste in the same flow. + function tweetText() { + const tierPart = achievement.tier ? (achievement.tier + " tier ") : ""; + return "Just unlocked " + tierPart + "\"" + achievement.name + "\" in Hermes Agent ☤\n\n" + + "@NousResearch · https://hermes-agent.nousresearch.com"; + } + + function shareOnX() { + const url = "https://x.com/intent/post?text=" + encodeURIComponent(tweetText()); + window.open(url, "_blank", "noopener,noreferrer"); + } + + return React.createElement("div", { + className: "ha-share-backdrop", + onClick: function (e) { if (e.target === e.currentTarget) onClose(); }, + }, + React.createElement("div", { className: "ha-share-dialog", role: "dialog", "aria-label": "Share achievement" }, + React.createElement("div", { className: "ha-share-head" }, + React.createElement("strong", null, "Share: " + achievement.name), + React.createElement("button", { className: "ha-share-close", onClick: onClose, "aria-label": "Close" }, "×") + ), + React.createElement("div", { className: "ha-share-preview" }, + status === "rendering" && React.createElement("div", { className: "ha-share-placeholder" }, "Rendering…"), + previewUrl && React.createElement("img", { src: previewUrl, alt: achievement.name + " share card" }) + ), + status === "error" && React.createElement("div", { className: "ha-share-error" }, errorMsg || "Something went wrong."), + React.createElement("div", { className: "ha-share-actions" }, + React.createElement("button", { + className: "ha-share-btn ha-share-btn-primary", + onClick: shareOnX, + title: "Opens X with a pre-filled post", + }, "Share on X"), + React.createElement("button", { + className: "ha-share-btn", + onClick: copyToClipboard, + disabled: status !== "ready" && status !== "copied", + title: "Copy the image to paste into your post", + }, status === "copied" ? "Copied ✓" : "Copy image"), + React.createElement("button", { + className: "ha-share-btn", + onClick: download, + disabled: status !== "ready" && status !== "copied", + }, "Download PNG") + ), + React.createElement("p", { className: "ha-share-hint" }, + "Share on X opens a pre-filled post in a new tab. Click Copy image first if you want the 1200×630 badge attached — X lets you paste it right into the tweet composer. Download PNG saves the file for use anywhere." + ) + ) + ); + } + function StatCard(props) { return React.createElement(C.Card, { className: "ha-stat" }, React.createElement(C.CardContent, { className: "ha-stat-content" }, @@ -170,6 +460,7 @@ const targetTier = achievement.next_tier || achievement.tier; const tierLabel = achievement.tier ? achievement.tier : (targetTier ? "Target " + targetTier : (state === "secret" ? "Hidden" : (unlocked ? "Complete" : "Objective"))); const progressText = state === "secret" ? "hidden" : (progress + (achievement.next_threshold ? " / " + achievement.next_threshold : "")); + const [shareOpen, setShareOpen] = hooks.useState(false); return React.createElement(C.Card, { className: cn("ha-card", "ha-state-" + state, tierClass(achievement.tier || achievement.next_tier)) }, React.createElement(C.CardContent, { className: "ha-card-content" }, React.createElement("div", { className: "ha-card-head" }, @@ -180,7 +471,13 @@ ), React.createElement("div", { className: "ha-badges" }, React.createElement("span", { className: "ha-state-badge" }, stateLabel), - React.createElement("span", { className: "ha-tier-badge" }, tierLabel) + React.createElement("span", { className: "ha-tier-badge" }, tierLabel), + state === "unlocked" && React.createElement("button", { + className: "ha-share-trigger", + onClick: function () { setShareOpen(true); }, + title: "Share this achievement", + "aria-label": "Share " + achievement.name, + }, "Share") ) ), React.createElement("p", { className: "ha-description" }, achievement.description), @@ -200,7 +497,11 @@ ), React.createElement("span", { className: "ha-progress-text" }, progressText) ) - ) + ), + shareOpen && React.createElement(ShareDialog, { + achievement: achievement, + onClose: function () { setShareOpen(false); }, + }) ); } diff --git a/plugins/hermes-achievements/dashboard/dist/style.css b/plugins/hermes-achievements/dashboard/dist/style.css index fc0e138f4ea..2b4321ec254 100644 --- a/plugins/hermes-achievements/dashboard/dist/style.css +++ b/plugins/hermes-achievements/dashboard/dist/style.css @@ -118,3 +118,29 @@ .ha-scan-banner-text p { margin: .25rem 0 0; font-size: .78rem; line-height: 1.35; color: var(--color-muted-foreground); text-transform: none; letter-spacing: normal; } .ha-scan-progress-track { height: .4rem; border: 1px solid color-mix(in srgb, #67e8f9 28%, var(--color-border)); background: rgba(0,0,0,.22); overflow: hidden; } .ha-scan-progress-fill { height: 100%; background: linear-gradient(90deg, #67e8f9, color-mix(in srgb, #67e8f9 48%, white)); transition: width .4s ease-out; } + +/* Share achievement — trigger button on unlocked cards + modal dialog. + * Added to the vendored bundle (on top of the upstream PCinkusz base). + * Canvas rendering is pure client-side, no backend, no network. + */ +.ha-share-trigger { border: 1px solid color-mix(in srgb, var(--ha-tier) 58%, var(--color-border)); color: var(--ha-tier); background: color-mix(in srgb, var(--ha-tier) 8%, transparent); padding: .18rem .42rem; font-size: .66rem; text-transform: uppercase; letter-spacing: .08em; font-family: var(--font-mono, ui-monospace, monospace); cursor: pointer; margin-top: .05rem; transition: background .12s ease, border-color .12s ease; } +.ha-share-trigger:hover { background: color-mix(in srgb, var(--ha-tier) 20%, transparent); border-color: var(--ha-tier); } +.ha-share-trigger:focus-visible { outline: 2px solid var(--ha-tier); outline-offset: 2px; } + +.ha-share-backdrop { position: fixed; inset: 0; z-index: 1000; background: rgba(4,6,10,.72); backdrop-filter: blur(6px); display: flex; align-items: center; justify-content: center; padding: 1.5rem; animation: ha-fade-in .14s ease-out; } +.ha-share-dialog { width: min(760px, 100%); max-height: calc(100vh - 3rem); overflow: auto; border: 1px solid color-mix(in srgb, var(--color-border) 70%, var(--color-ring)); background: color-mix(in srgb, var(--color-card) 94%, #000); box-shadow: 0 24px 60px rgba(0,0,0,.55); display: flex; flex-direction: column; gap: .9rem; padding: 1rem 1.1rem 1.1rem; } +.ha-share-head { display: flex; align-items: center; justify-content: space-between; gap: .75rem; } +.ha-share-head strong { font-size: .82rem; text-transform: uppercase; letter-spacing: .1em; font-family: var(--font-mono, ui-monospace, monospace); color: var(--color-foreground); } +.ha-share-close { width: 1.9rem; height: 1.9rem; display: grid; place-items: center; border: 1px solid var(--color-border); background: transparent; color: var(--color-muted-foreground); font-size: 1.1rem; cursor: pointer; line-height: 1; } +.ha-share-close:hover { color: var(--color-foreground); border-color: var(--color-ring); } +.ha-share-preview { position: relative; border: 1px solid var(--color-border); background: #0b0d11; overflow: hidden; aspect-ratio: 1200 / 630; } +.ha-share-preview img { display: block; width: 100%; height: 100%; object-fit: contain; } +.ha-share-placeholder { position: absolute; inset: 0; display: grid; place-items: center; color: var(--color-muted-foreground); font-family: var(--font-mono, ui-monospace, monospace); font-size: .82rem; text-transform: uppercase; letter-spacing: .1em; animation: ha-pulse 1.4s ease-in-out infinite; border-radius: 0; } +.ha-share-error { border: 1px solid #ef4444; color: #fecaca; background: color-mix(in srgb, #ef4444 10%, transparent); padding: .55rem .7rem; font-size: .78rem; font-family: var(--font-mono, ui-monospace, monospace); } +.ha-share-actions { display: flex; gap: .55rem; flex-wrap: wrap; } +.ha-share-btn { border: 1px solid var(--color-border); background: color-mix(in srgb, var(--color-card) 72%, transparent); color: var(--color-foreground); padding: .5rem .85rem; font-size: .82rem; font-family: var(--font-mono, ui-monospace, monospace); text-transform: uppercase; letter-spacing: .08em; cursor: pointer; transition: border-color .12s ease, background .12s ease; } +.ha-share-btn:hover:not(:disabled) { border-color: var(--color-ring); background: color-mix(in srgb, var(--color-primary) 16%, var(--color-card)); } +.ha-share-btn:disabled { opacity: .5; cursor: not-allowed; } +.ha-share-btn-primary { border-color: #ffffff; color: #ffffff; background: #000000; } +.ha-share-btn-primary:hover:not(:disabled) { background: #1a1a1a; border-color: #67e8f9; color: #67e8f9; } +.ha-share-hint { margin: 0; color: var(--color-muted-foreground); font-size: .76rem; line-height: 1.45; } diff --git a/plugins/hermes-achievements/dashboard/manifest.json b/plugins/hermes-achievements/dashboard/manifest.json index 02c4050f34e..5fcc39313bb 100644 --- a/plugins/hermes-achievements/dashboard/manifest.json +++ b/plugins/hermes-achievements/dashboard/manifest.json @@ -3,7 +3,7 @@ "label": "Achievements", "description": "Steam-style achievements for vibe coding and agentic Hermes workflows.", "icon": "Star", - "version": "0.3.1", + "version": "0.4.0", "tab": { "path": "/achievements", "position": "after:analytics" }, "entry": "dist/index.js", "css": "dist/style.css", From 8c8f95bc8e4e5d8fb7f06be8154afc3488fab787 Mon Sep 17 00:00:00 2001 From: h0tp-ftw <141889580+h0tp-ftw@users.noreply.github.com> Date: Sun, 26 Apr 2026 00:35:59 -0600 Subject: [PATCH 135/171] fix(gateway): show friendly error when service is not installed Instead of an unhelpful CalledProcessError traceback when running `hermes gateway start/stop/restart` without first installing the service, check for the unit file and exit with an actionable install hint. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --- hermes_cli/gateway.py | 12 ++++++++++++ tests/hermes_cli/test_gateway_service.py | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 7dec83cbff9..dff0a4aa755 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -1971,6 +1971,15 @@ def systemd_uninstall(system: bool = False): print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled") +def _require_service_installed(action: str, system: bool = False) -> None: + unit_path = get_systemd_unit_path(system=system) + if not unit_path.exists(): + scope_flag = " --system" if system else "" + print(f"✗ Gateway service is not installed") + print(f" Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}") + sys.exit(1) + + def systemd_start(system: bool = False): system = _select_systemd_scope(system) if system: @@ -1980,6 +1989,7 @@ def systemd_start(system: bool = False): # reachable (common on fresh RHEL/Debian SSH sessions without linger). # Raises UserSystemdUnavailableError with a remediation message. _preflight_user_systemd() + _require_service_installed("start", system=system) refresh_systemd_unit_if_needed(system=system) _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30) print(f"✓ {_service_scope_label(system).capitalize()} service started") @@ -1990,6 +2000,7 @@ def systemd_stop(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("stop") + _require_service_installed("stop", system=system) _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90) print(f"✓ {_service_scope_label(system).capitalize()} service stopped") @@ -2001,6 +2012,7 @@ def systemd_restart(system: bool = False): _require_root_for_system_service("restart") else: _preflight_user_systemd() + _require_service_installed("restart", system=system) refresh_systemd_unit_if_needed(system=system) from gateway.status import get_running_pid diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index a2e3869c8c8..210c9c144e7 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -141,6 +141,27 @@ class TestSystemdServiceRefresh: assert ["systemctl", "--user", "daemon-reload"] in calls +class TestRequireServiceInstalled: + def test_exits_with_install_hint_when_unit_missing(self, tmp_path, monkeypatch, capsys): + unit_path = tmp_path / "hermes-gateway.service" + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + + with pytest.raises(SystemExit) as exc_info: + gateway_cli._require_service_installed("start") + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "not installed" in out + assert "hermes gateway install" in out + + def test_passes_when_unit_exists(self, tmp_path, monkeypatch): + unit_path = tmp_path / "hermes-gateway.service" + unit_path.write_text("[Unit]\n", encoding="utf-8") + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + + gateway_cli._require_service_installed("start") + + class TestGeneratedSystemdUnits: def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): unit = gateway_cli.generate_systemd_unit(system=False) @@ -521,6 +542,7 @@ class TestGatewaySystemServiceRouting: calls = [] monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False) + monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None) monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system))) monkeypatch.setattr( "gateway.status.get_running_pid", @@ -575,6 +597,7 @@ class TestGatewaySystemServiceRouting: def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys): monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False) + monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None) monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None) monkeypatch.setattr( "gateway.status.read_runtime_status", From 3ccf723bf999d02698e44a30e1d6a9a90d7713f7 Mon Sep 17 00:00:00 2001 From: cong <274902531@qq.com> Date: Mon, 27 Apr 2026 21:54:44 +0800 Subject: [PATCH 136/171] fix(gateway): read context_length from custom_providers in session info header --- gateway/run.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 28d13994bad..6047de32203 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6792,6 +6792,7 @@ class GatewayRunner: base_url = None api_key = None custom_provs = None + data = None try: data = _load_gateway_config() @@ -6814,6 +6815,41 @@ class GatewayRunner: except Exception: pass + # Also check custom_providers for context_length when top-level model.context_length is not set + if config_context_length is None and data: + try: + custom_providers = data.get("custom_providers", []) + if custom_providers: + for cp in custom_providers: + if not isinstance(cp, dict): + continue + cp_model = cp.get("model") or "" + cp_models = cp.get("models") or {} + # Match provider model to current model + if cp_model and cp_model == model: + raw_cp_ctx = cp.get("context_length") + if raw_cp_ctx is not None: + try: + config_context_length = int(raw_cp_ctx) + break + except (TypeError, ValueError): + pass + # Also check per-model context_length + if isinstance(cp_models, dict): + model_entry = cp_models.get(model) + if isinstance(model_entry, dict): + model_ctx = model_entry.get("context_length") + else: + model_ctx = model_entry + if model_ctx is not None and isinstance(model_ctx, (int, float)): + try: + config_context_length = int(model_ctx) + break + except (TypeError, ValueError): + pass + except Exception: + pass + # Resolve runtime credentials for probing try: runtime = _resolve_runtime_agent_kwargs() From a79b0ec46157efc91537e634a3dcc44a76f6dc7e Mon Sep 17 00:00:00 2001 From: Kathy <kathy@Kathy.local> Date: Sat, 18 Apr 2026 00:26:38 +0800 Subject: [PATCH 137/171] fix: keep Feishu topic replies from falling back to new threads (local patch) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --- gateway/platforms/base.py | 23 ++++++++++++++++++++--- gateway/platforms/feishu.py | 13 ++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 78e0dd7e25c..4d611fdaa53 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -2506,7 +2506,13 @@ class BasePlatformAdapter(ABC): _r = await self._send_with_retry( chat_id=event.source.chat_id, content=_text, - reply_to=event.message_id, + reply_to=( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU + and event.source.thread_id + and event.reply_to_message_id + else event.message_id + ), metadata=thread_meta, ) if _eph_ttl > 0 and _r.success and _r.message_id: @@ -2606,7 +2612,13 @@ class BasePlatformAdapter(ABC): _r = await self._send_with_retry( chat_id=event.source.chat_id, content=_text, - reply_to=event.message_id, + reply_to=( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU + and event.source.thread_id + and event.reply_to_message_id + else event.message_id + ), metadata=_thread_meta, ) if _eph_ttl > 0 and _r.success and _r.message_id: @@ -2810,10 +2822,15 @@ class BasePlatformAdapter(ABC): # Send the text portion if text_content: logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id) + _reply_anchor = ( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU and event.source.thread_id and event.reply_to_message_id + else event.message_id + ) result = await self._send_with_retry( chat_id=event.source.chat_id, content=text_content, - reply_to=event.message_id, + reply_to=_reply_anchor, metadata=_thread_metadata, ) _record_delivery(result) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index a6b522c4a24..ac920bab69d 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -2757,9 +2757,11 @@ class FeishuAdapter(BasePlatformAdapter): if hint: text = f"{hint}\n\n{text}" if text else hint + thread_id = getattr(message, "thread_id", None) or getattr(message, "root_id", None) or None reply_to_message_id = ( getattr(message, "parent_id", None) or getattr(message, "upper_message_id", None) + or getattr(message, "root_id", None) or None ) reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None @@ -2791,7 +2793,7 @@ class FeishuAdapter(BasePlatformAdapter): chat_type=self._resolve_source_chat_type(chat_info=chat_info, event_chat_type=chat_type), user_id=sender_profile["user_id"], user_name=sender_profile["user_name"], - thread_id=getattr(message, "thread_id", None) or None, + thread_id=thread_id, user_id_alt=sender_profile["user_id_alt"], is_bot=is_bot, ) @@ -4227,6 +4229,15 @@ class FeishuAdapter(BasePlatformAdapter): if active_reply_to and not self._response_succeeded(response): code = getattr(response, "code", None) if code in _FEISHU_REPLY_FALLBACK_CODES: + if (metadata or {}).get("thread_id"): + logger.warning( + "[Feishu] Reply to %s failed in thread %s (code %s — message withdrawn/missing); " + "skipping top-level fallback to avoid creating a new topic", + active_reply_to, + (metadata or {}).get("thread_id"), + code, + ) + return response logger.warning( "[Feishu] Reply to %s failed (code %s — message withdrawn/missing); " "falling back to new message in chat %s", From 1bd5ac7f2f839cd047366749ebbbf901220c7afe Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:53:44 -0700 Subject: [PATCH 138/171] fix(self-improvement-loop): bump background-review budget to 16 and suppress status leaks (#19710) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The background memory/skill review fork had two user-visible issues: 1. max_iterations=8 was too tight for multi-step reviews. A review that needs to skill_view one or two candidate skills, add a memory entry, and patch a skill routinely blew the budget — surfacing an 'Iteration budget exhausted (8/8)' warning to the user and leaving the review half-finished. 2. Mid-review lifecycle messages leaked into the user's terminal past the existing quiet_mode + redirect_stdout/stderr guards. _emit_status and _emit_warning route through _vprint(force=True) -> _print_fn / status_callback, which bypass sys.stdout entirely. The stdout redirect only catches raw print() calls. Changes: - Bump the review fork's max_iterations from 8 to 16. - Set review_agent.suppress_status_output = True on the fork. This short-circuits _vprint unconditionally so _emit_status/_emit_warning emissions (iteration-budget warnings, rate-limit retries, compression messages) never reach the user. The only user-visible output remains the compact final summary line ('💾 Self-improvement review: ...') which is printed via self._safe_print on the *main* agent (outside the fork's redirect/suppress scope). Summarizer filter is already correct — _summarize_background_review_actions only surfaces tool calls with data.get('success') is truthy, so failed attempts and reasoning text never reach the summary line. --- run_agent.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index e3823551dc1..17b8b01db1f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3611,7 +3611,7 @@ class AIAgent: _parent_runtime = self._current_main_runtime() review_agent = AIAgent( model=self.model, - max_iterations=8, + max_iterations=16, quiet_mode=True, platform=self.platform, provider=self.provider, @@ -3629,6 +3629,14 @@ class AIAgent: review_agent._user_profile_enabled = self._user_profile_enabled review_agent._memory_nudge_interval = 0 review_agent._skill_nudge_interval = 0 + # Suppress all status/warning emits from the fork so the + # user only sees the final successful-action summary. + # Without this, mid-review "Iteration budget exhausted", + # rate-limit retries, compression warnings, and other + # lifecycle messages bubble up through _emit_status -> + # _vprint and leak past the stdout redirect (they go via + # _print_fn/status_callback, which bypass sys.stdout). + review_agent.suppress_status_output = True review_agent.run_conversation( user_message=prompt, From d3b22b76d8b63f81c4f70a1d1aae748b883484ab Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:54:02 -0700 Subject: [PATCH 139/171] fix(kanban): enforce worker task-ownership on destructive tool calls (#19713) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #19534 (security). A worker spawned by the kanban dispatcher has HERMES_KANBAN_TASK set to its own task id. The destructive tools (kanban_complete, kanban_block, kanban_heartbeat) resolved task_id via _default_task_id() which preferred an explicit arg over the env var, with no ownership check — so a buggy or prompt-injected worker could complete / block / heartbeat any OTHER task (sibling, cross-tenant, anything) by supplying its id. Reporter's repro: worker for t_A passed task_id=t_B to kanban_complete and got {"ok": true}. Fix: add _enforce_worker_task_ownership(tid). If HERMES_KANBAN_TASK is set and tid doesn't match, return a structured tool error with guidance to use kanban_comment (for information handoff across tasks) or kanban_create (for follow-up work). Orchestrator profiles (no env var, but kanban toolset enabled per #18968) are exempt — their job is routing and sometimes includes closing out child tasks. Kept unrestricted (deliberately): - kanban_show — workers legitimately read parent/sibling handoff context - kanban_comment — cross-task comments are the handoff mechanism - kanban_create — orchestrator fan-out, worker follow-up spawning - kanban_link — parent/child linking Tests: 5 new regression tests in tests/tools/test_kanban_tools.py covering the grid (worker-attacks-foreign ×3 tools, worker-own-task preserved, orchestrator-unrestricted). 36/36 pass. --- tests/tools/test_kanban_tools.py | 118 +++++++++++++++++++++++++++++++ tools/kanban_tools.py | 41 +++++++++++ 2 files changed, 159 insertions(+) diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index 1217e7c7387..9031d81d8eb 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -492,3 +492,121 @@ def test_kanban_guidance_prompt_size_bounded(monkeypatch, tmp_path): assert 1_500 < len(KANBAN_GUIDANCE) < 4_096, ( f"KANBAN_GUIDANCE is {len(KANBAN_GUIDANCE)} chars — too short (missing?) or too long" ) + + +# --------------------------------------------------------------------------- +# Worker task-ownership enforcement (regression tests for #19534) +# --------------------------------------------------------------------------- +# +# A worker process has HERMES_KANBAN_TASK set to its own task id. The +# destructive tools (kanban_complete, kanban_block, kanban_heartbeat) +# must refuse to operate on any OTHER task id, even if the caller +# supplies an explicit `task_id` argument. Workers legitimately call +# kanban_show / kanban_comment / kanban_create / kanban_link on other +# tasks, so those are unrestricted. +# +# Orchestrator profiles (no HERMES_KANBAN_TASK in env) are intentionally +# exempt — their job is routing, and they sometimes close out child +# tasks on behalf of the child. + + +def test_worker_complete_rejects_foreign_task_id(worker_env): + """A worker cannot complete a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_complete({"task_id": other, "summary": "HIJACK"}) + d = json.loads(out) + assert d.get("ok") is not True + assert "refusing to mutate" in d.get("error", "") + + # Sibling task must be untouched. + conn = kb.connect() + try: + assert kb.get_task(conn, other).status == "ready" + finally: + conn.close() + + +def test_worker_block_rejects_foreign_task_id(worker_env): + """A worker cannot block a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_block({"task_id": other, "reason": "evil"}) + d = json.loads(out) + assert "refusing to mutate" in d.get("error", "") + + conn = kb.connect() + try: + assert kb.get_task(conn, other).status == "ready" + finally: + conn.close() + + +def test_worker_heartbeat_rejects_foreign_task_id(worker_env): + """A worker cannot heartbeat a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + # Put sibling in running state so heartbeat would otherwise succeed. + conn.execute("UPDATE tasks SET status='running' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_heartbeat({"task_id": other}) + d = json.loads(out) + assert "refusing to mutate" in d.get("error", "") + + +def test_worker_complete_own_task_still_works(worker_env): + """The ownership check doesn't break the normal own-task happy path.""" + from tools import kanban_tools as kt + # Both implicit (no task_id arg) and explicit (matching env) must work. + out = kt._handle_complete({"task_id": worker_env, "summary": "explicit own"}) + d = json.loads(out) + assert d.get("ok") is True and d.get("task_id") == worker_env + + +def test_orchestrator_complete_any_task_allowed(monkeypatch, tmp_path): + """Orchestrator profiles (no HERMES_KANBAN_TASK) can still complete + any task via explicit task_id. The check only applies to workers.""" + monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False) + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + from pathlib import Path as _P + monkeypatch.setattr(_P, "home", lambda: tmp_path) + + from hermes_cli import kanban_db as kb + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="child to close out") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (tid,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_complete({"task_id": tid, "summary": "orchestrator close"}) + d = json.loads(out) + assert d.get("ok") is True and d.get("task_id") == tid diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index d0023a30784..1f99f6896cb 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -79,6 +79,38 @@ def _default_task_id(arg: Optional[str]) -> Optional[str]: return env_tid or None +def _enforce_worker_task_ownership(tid: str) -> Optional[str]: + """Reject worker-driven destructive calls on foreign task IDs. + + A process spawned by the dispatcher has ``HERMES_KANBAN_TASK`` set + to its own task id. Tools like ``kanban_complete`` / ``kanban_block`` + / ``kanban_heartbeat`` mutate run-lifecycle state, so a buggy or + prompt-injected worker that passed an explicit ``task_id`` for some + other task could corrupt sibling or cross-tenant runs (see #19534). + + Orchestrator profiles (kanban toolset enabled but **no** + ``HERMES_KANBAN_TASK`` in env) aren't subject to this check — their + job is routing, and they sometimes legitimately close out child + tasks or reopen blocked ones. Workers are narrowly scoped to their + one task. + + Returns ``None`` when the call is allowed, or a tool-error string + when it must be rejected. Callers should ``return`` the error + verbatim. + """ + env_tid = os.environ.get("HERMES_KANBAN_TASK") + if not env_tid: + # Orchestrator or CLI context — no task-scope restriction. + return None + if tid != env_tid: + return tool_error( + f"worker is scoped to task {env_tid}; refusing to mutate " + f"{tid}. Use kanban_comment to hand off information to other " + f"tasks, or kanban_create to spawn follow-up work." + ) + return None + + def _connect(): """Import + connect lazily so the module imports cleanly in non-kanban contexts (e.g. test rigs that import every tool module).""" @@ -172,6 +204,9 @@ def _handle_complete(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err summary = args.get("summary") metadata = args.get("metadata") result = args.get("result") @@ -210,6 +245,9 @@ def _handle_block(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err reason = args.get("reason") if not reason or not str(reason).strip(): return tool_error("reason is required — explain what input you need") @@ -238,6 +276,9 @@ def _handle_heartbeat(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err note = args.get("note") try: kb, conn = _connect() From 69fc6d9c1e82ec87ec08765f10e92e8d08029851 Mon Sep 17 00:00:00 2001 From: QifengKuang <k2767567815@gmail.com> Date: Mon, 4 May 2026 04:54:18 -0700 Subject: [PATCH 140/171] fix(telegram): fall back to document on any send_photo failure, not just dim errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadens the existing fallback (previously only fired for Photo_invalid_dimensions) to cover every send_photo exception class: rate limits, corrupt file markers, format edge cases. The expected dimension case still logs at INFO (document is the right path); all other cases log at WARNING with exc_info so they're visible in logs. If send_document itself fails, we still fall back to the base adapter's text-only 'Image: /path' rendering as a last resort. Salvage of #15837 — original PR author QifengKuang proposed the broader try/except-style fallback. Adapted to keep the existing INFO-vs-WARNING log split for dimension errors (the expected case). Co-authored-by: QifengKuang <k2767567815@gmail.com> --- gateway/platforms/telegram.py | 46 ++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 247b5fbb932..167d47237e4 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2268,14 +2268,36 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: error_str = str(e) - # Check for dimension-related errors - fallback to document mode - if "Photo_invalid_dimensions" in error_str or "PHOTO_INVALID_DIMENSIONS" in error_str: + # Dimension-related errors are the expected case for valid image + # files that Telegram just refuses as photos (screenshots, extreme + # aspect ratios). Log at INFO because the document fallback is + # the correct path. Any other send_photo failure also falls back + # to document (rate limits, corrupt file markers, format edge + # cases), but at WARNING because it's unexpected and worth + # surfacing in logs. + is_dim_error = ( + "Photo_invalid_dimensions" in error_str + or "PHOTO_INVALID_DIMENSIONS" in error_str + ) + if is_dim_error: logger.info( - "[%s] Image dimensions exceed Telegram photo limits, sending as document: %s", + "[%s] Image dimensions exceed Telegram photo limits, " + "sending as document: %s", self.name, image_path, ) - # Fallback to sending as document (file) - no dimension limits, only 50MB size limit + else: + logger.warning( + "[%s] Failed to send Telegram local image as photo, " + "trying document fallback: %s", + self.name, + e, + exc_info=True, + ) + # Fallback to sending as document (file) — no dimension limit, + # only 50MB size limit. If even that fails, fall back to the + # base adapter's text-only "Image: /path" rendering. + try: return await self.send_document( chat_id=chat_id, file_path=image_path, @@ -2284,13 +2306,15 @@ class TelegramAdapter(BasePlatformAdapter): reply_to=reply_to, metadata=metadata, ) - logger.error( - "[%s] Failed to send Telegram local image, falling back to base adapter: %s", - self.name, - e, - exc_info=True, - ) - return await super().send_image_file(chat_id, image_path, caption, reply_to) + except Exception as doc_err: + logger.error( + "[%s] Failed to send Telegram local image as document, " + "falling back to base adapter: %s", + self.name, + doc_err, + exc_info=True, + ) + return await super().send_image_file(chat_id, image_path, caption, reply_to) async def send_document( self, From a175f395776a83e54ac838ade06ad3b837051249 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 04:54:55 -0700 Subject: [PATCH 141/171] feat(nous): persist Nous OAuth across profiles via shared token store (#19712) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the Codex auto-import UX. On successful Nous login (either `hermes auth add nous --type oauth` or `hermes login nous`), tokens are mirrored to `$HERMES_SHARED_AUTH_DIR/nous_auth.json` (default `~/.hermes/shared/nous_auth.json`, outside any named profile's HERMES_HOME). On next login in a new profile, the flow offers to import those credentials ("Import these credentials? [Y/n]") and rehydrates via a forced refresh+mint instead of running the full device-code flow. Runtime refresh in any profile syncs the rotated refresh_token back to the shared store so sibling profiles don't hit stale-token fallback after rotation. The volatile 24h agent_key is NOT persisted to the shared store — only the long-lived OAuth tokens are cross-profile useful. - `HERMES_SHARED_AUTH_DIR` env var for tests + custom layouts - Pytest seat belt mirrors the existing `_auth_file_path` guard so forgetting to redirect the store in a test fails loudly - File mode 0600 where platform supports it - Runtime credential resolution is unchanged — shared store is only consulted during the login flow, so profile isolation at runtime is preserved - Stale refresh_token + portal-down cases gracefully fall back to device-code Addresses a user report from Mike Nguyen: running `hermes --profile <name> auth add nous --type oauth` for every new profile is unnecessary friction now that Codex has a shared-import flow via `~/.codex/auth.json`. --- hermes_cli/auth.py | 270 ++++++++++++++++++- hermes_cli/auth_commands.py | 41 +++ tests/hermes_cli/test_auth_nous_provider.py | 283 ++++++++++++++++++++ 3 files changed, 583 insertions(+), 11 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 56e72d5eb07..5b63d41eb1f 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -2589,6 +2589,208 @@ def _poll_for_token( # Nous Portal — token refresh, agent key minting, model discovery # ============================================================================= +# ----------------------------------------------------------------------------- +# Shared Nous token store — lets OAuth credentials persist across profiles +# so a new `hermes --profile <name> auth add nous --type oauth` can one-tap +# import instead of running the full device-code flow every time. +# +# File lives at ${HERMES_SHARED_AUTH_DIR}/nous_auth.json, defaulting to +# ~/.hermes/shared/nous_auth.json. It is OUTSIDE any named profile's +# HERMES_HOME so named profiles (which typically live under +# ~/.hermes/profiles/<name>/) all see the same file. +# +# Written on successful login and on every runtime refresh so the stored +# refresh_token stays current even if one profile refreshes and rotates it. +# If ever the stored refresh_token does go stale server-side, import fails +# gracefully and the user falls back to the normal device-code flow. +# ----------------------------------------------------------------------------- + +NOUS_SHARED_STORE_FILENAME = "nous_auth.json" + + +def _nous_shared_auth_dir() -> Path: + """Resolve the directory that holds the shared Nous token store. + + Honors ``HERMES_SHARED_AUTH_DIR`` so tests can redirect it to a tmp + path without touching the real user's home. Defaults to + ``~/.hermes/shared/``. + """ + override = os.getenv("HERMES_SHARED_AUTH_DIR", "").strip() + if override: + return Path(override).expanduser() + return Path.home() / ".hermes" / "shared" + + +def _nous_shared_store_path() -> Path: + path = _nous_shared_auth_dir() / NOUS_SHARED_STORE_FILENAME + # Seat belt: if pytest is running and this resolves to a path under the + # real user's home, refuse rather than silently corrupt cross-profile + # state. Tests must set HERMES_SHARED_AUTH_DIR to a tmp_path (conftest + # does not do this automatically — mirror the _auth_file_path() guard + # so forgetting to set it fails loudly instead of writing to the real + # shared store). + if os.environ.get("PYTEST_CURRENT_TEST"): + real_home_shared = ( + Path.home() / ".hermes" / "shared" / NOUS_SHARED_STORE_FILENAME + ).resolve(strict=False) + try: + resolved = path.resolve(strict=False) + except Exception: + resolved = path + if resolved == real_home_shared: + raise RuntimeError( + f"Refusing to touch real user shared Nous auth store during test run: " + f"{path}. Set HERMES_SHARED_AUTH_DIR to a tmp_path in your test fixture." + ) + return path + + +def _write_shared_nous_state(state: Dict[str, Any]) -> None: + """Persist a minimal copy of the Nous OAuth state to the shared store. + + Best-effort: any failure is swallowed after logging. The shared store + is a convenience layer; the per-profile auth.json remains the source + of truth. + + We deliberately omit the short-lived ``agent_key`` (24h TTL, profile- + specific) — only the long-lived OAuth tokens are cross-profile useful. + """ + refresh_token = state.get("refresh_token") + access_token = state.get("access_token") + if not (isinstance(refresh_token, str) and refresh_token.strip()): + # No refresh_token = nothing worth sharing across profiles + return + if not (isinstance(access_token, str) and access_token.strip()): + return + + shared = { + "_schema": 1, + "access_token": access_token, + "refresh_token": refresh_token, + "token_type": state.get("token_type") or "Bearer", + "scope": state.get("scope") or DEFAULT_NOUS_SCOPE, + "client_id": state.get("client_id") or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": state.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL, + "inference_base_url": state.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL, + "obtained_at": state.get("obtained_at"), + "expires_at": state.get("expires_at"), + "updated_at": datetime.now(timezone.utc).isoformat(), + } + try: + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(json.dumps(shared, indent=2, sort_keys=True)) + try: + os.chmod(tmp, 0o600) + except OSError: + pass + os.replace(tmp, path) + _oauth_trace( + "nous_shared_store_written", + path=str(path), + refresh_token_fp=_token_fingerprint(refresh_token), + ) + except Exception as exc: + logger.debug("Failed to write shared Nous auth store: %s", exc) + + +def _read_shared_nous_state() -> Optional[Dict[str, Any]]: + """Return the shared Nous OAuth state if present and well-formed. + + Returns ``None`` when the file is missing, unreadable, malformed, or + lacks required fields. Callers should treat ``None`` as "no shared + credentials available — fall through to device-code". + """ + try: + path = _nous_shared_store_path() + except RuntimeError: + # Test seat belt tripped — treat as missing + return None + if not path.is_file(): + return None + try: + payload = json.loads(path.read_text()) + except (OSError, ValueError) as exc: + logger.debug("Shared Nous auth store at %s is unreadable: %s", path, exc) + return None + if not isinstance(payload, dict): + return None + refresh_token = payload.get("refresh_token") + access_token = payload.get("access_token") + if not (isinstance(refresh_token, str) and refresh_token.strip()): + return None + if not (isinstance(access_token, str) and access_token.strip()): + return None + return payload + + +def _try_import_shared_nous_state( + *, + timeout_seconds: float = 15.0, + min_key_ttl_seconds: int = 5 * 60, +) -> Optional[Dict[str, Any]]: + """Attempt to rehydrate Nous OAuth state from the shared store. + + Reads the shared file (if present), runs a forced refresh+mint using + the stored refresh_token to produce a fresh access_token + agent_key + scoped to this profile, and returns the full auth_state dict ready + for ``persist_nous_credentials()``. + + Returns ``None`` when no shared state is available or the rehydrate + fails for any reason (expired refresh_token, portal unreachable, + etc.) — caller should then fall through to the normal device-code + flow. + """ + shared = _read_shared_nous_state() + if not shared: + return None + + # Build a full state dict so refresh_nous_oauth_from_state has every + # field it needs. force_refresh=True gets us a fresh access_token + # for this profile; force_mint=True gets us a fresh agent_key. + state: Dict[str, Any] = { + "access_token": shared.get("access_token"), + "refresh_token": shared.get("refresh_token"), + "client_id": shared.get("client_id") or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": shared.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL, + "inference_base_url": shared.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL, + "token_type": shared.get("token_type") or "Bearer", + "scope": shared.get("scope") or DEFAULT_NOUS_SCOPE, + "obtained_at": shared.get("obtained_at"), + "expires_at": shared.get("expires_at"), + "agent_key": None, + "agent_key_expires_at": None, + "tls": {"insecure": False, "ca_bundle": None}, + } + + try: + refreshed = refresh_nous_oauth_from_state( + state, + min_key_ttl_seconds=min_key_ttl_seconds, + timeout_seconds=timeout_seconds, + force_refresh=True, + force_mint=True, + ) + except AuthError as exc: + _oauth_trace( + "nous_shared_import_failed", + error_type=type(exc).__name__, + error_code=getattr(exc, "code", None), + ) + logger.debug("Shared Nous import failed: %s", exc) + return None + except Exception as exc: + _oauth_trace( + "nous_shared_import_failed", + error_type=type(exc).__name__, + ) + logger.debug("Shared Nous import failed: %s", exc) + return None + + return refreshed + + def _refresh_access_token( *, client: httpx.Client, @@ -2991,6 +3193,12 @@ def persist_nous_credentials( _save_provider_state(auth_store, "nous", state) _save_auth_store(auth_store) + # Mirror to the shared store so a new profile can one-tap import + # these credentials via `hermes auth add nous --type oauth`. Best- + # effort: any I/O failure is logged and swallowed (the per-profile + # auth.json is still the source of truth). + _write_shared_nous_state(state) + pool = load_pool("nous") return next( (e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE), @@ -3059,6 +3267,11 @@ def resolve_nous_runtime_credentials( refresh_token_fp=_token_fingerprint(state.get("refresh_token")), access_token_fp=_token_fingerprint(state.get("access_token")), ) + # Mirror post-refresh state to the shared store so sibling + # profiles don't hold stale refresh_tokens after rotation. + # Best-effort — any failure is logged and swallowed inside + # _write_shared_nous_state. + _write_shared_nous_state(state) verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0) @@ -4600,17 +4813,47 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: ) try: - auth_state = _nous_device_code_login( - portal_base_url=getattr(args, "portal_url", None), - inference_base_url=getattr(args, "inference_url", None), - client_id=getattr(args, "client_id", None) or pconfig.client_id, - scope=getattr(args, "scope", None) or pconfig.scope, - open_browser=not getattr(args, "no_browser", False), - timeout_seconds=timeout_seconds, - insecure=insecure, - ca_bundle=ca_bundle, - min_key_ttl_seconds=5 * 60, - ) + auth_state = None + + # Codex-style auto-import: before launching a fresh device-code + # flow, check the shared store for an existing Nous credential + # from any other profile. If present, offer to rehydrate it. + shared = _read_shared_nous_state() + if shared: + try: + shared_path = _nous_shared_store_path() + except RuntimeError: + shared_path = None + print() + if shared_path: + print(f"Found existing Nous OAuth credentials at {shared_path}") + else: + print("Found existing shared Nous OAuth credentials") + try: + do_import = input("Import these credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "y" + if do_import in ("", "y", "yes"): + print("Rehydrating Nous session from shared credentials...") + auth_state = _try_import_shared_nous_state( + timeout_seconds=timeout_seconds, + min_key_ttl_seconds=5 * 60, + ) + if auth_state is None: + print("Could not refresh shared credentials — falling back to device-code login.") + + if auth_state is None: + auth_state = _nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None), + inference_base_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None) or pconfig.client_id, + scope=getattr(args, "scope", None) or pconfig.scope, + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=timeout_seconds, + insecure=insecure, + ca_bundle=ca_bundle, + min_key_ttl_seconds=5 * 60, + ) inference_base_url = auth_state["inference_base_url"] @@ -4627,6 +4870,11 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: _save_provider_state(auth_store, "nous", auth_state) saved_to = _save_auth_store(auth_store) + # Mirror to the shared store so other profiles can one-tap import + # these credentials. Best-effort: any I/O failure is logged and + # swallowed inside the helper. + _write_shared_nous_state(auth_state) + print() print("Login successful!") print(f" Auth state: {saved_to}") diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index a9eb206647d..a29776aea23 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -245,6 +245,47 @@ def auth_add_command(args) -> None: return if provider == "nous": + # Codex-style auto-import: if a shared Nous credential lives at + # ~/.hermes/shared/nous_auth.json (written by any previous + # successful login), offer to import it instead of running the + # full device-code flow. This makes `hermes --profile <name> + # auth add nous --type oauth` a one-tap operation for users who + # run multiple profiles. + shared = auth_mod._read_shared_nous_state() + if shared: + try: + path = auth_mod._nous_shared_store_path() + except RuntimeError: + path = None + print() + if path: + print(f"Found existing Nous OAuth credentials at {path}") + else: + print("Found existing shared Nous OAuth credentials") + try: + do_import = input("Import these credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "y" + if do_import in ("", "y", "yes"): + print("Rehydrating Nous session from shared credentials...") + rehydrated = auth_mod._try_import_shared_nous_state( + timeout_seconds=getattr(args, "timeout", None) or 15.0, + min_key_ttl_seconds=max( + 60, int(getattr(args, "min_key_ttl_seconds", 5 * 60)) + ), + ) + if rehydrated is not None: + custom_label = (getattr(args, "label", None) or "").strip() or None + entry = auth_mod.persist_nous_credentials(rehydrated, label=custom_label) + shown_label = entry.label if entry is not None else label_from_token( + rehydrated.get("access_token", ""), _oauth_default_label(provider, 1), + ) + print(f'Imported {provider} OAuth credentials: "{shown_label}"') + return + # Rehydrate failed (expired refresh_token, portal down, etc.) + # — fall through to device-code flow. + print("Could not refresh shared credentials — falling back to device-code login.") + creds = auth_mod._nous_device_code_login( portal_base_url=getattr(args, "portal_url", None), inference_base_url=getattr(args, "inference_url", None), diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index a8e337c1a0d..d0e24aeaabe 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -896,3 +896,286 @@ def test_refresh_non_reuse_error_keeps_original_description(): assert "Refresh session has been revoked" in str(exc_info.value) # Must not have been rewritten with the reuse message. assert "external process" not in str(exc_info.value).lower() + + +# ============================================================================= +# Shared Nous token store — cross-profile persistence (Codex-style auto-import) +# ============================================================================= + + +@pytest.fixture +def shared_store_env(tmp_path, monkeypatch): + """Redirect HERMES_SHARED_AUTH_DIR to a tmp_path. + + Required for every test that exercises the shared Nous store — the + in-auth.py seat belt refuses to touch the real user's shared store + under pytest, so tests that forget this fixture fail loudly instead + of corrupting real state. + """ + shared_dir = tmp_path / "shared" + monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(shared_dir)) + return shared_dir + + +def test_shared_store_seat_belt_refuses_real_home_under_pytest(monkeypatch): + """Without HERMES_SHARED_AUTH_DIR override, the seat belt must trip. + + Mirrors the existing ``_auth_file_path`` seat belt: forgetting to + redirect this store in a test must fail loudly instead of silently + writing to the user's real ``~/.hermes/shared/`` across CI runs. + """ + from hermes_cli.auth import _nous_shared_store_path + + monkeypatch.delenv("HERMES_SHARED_AUTH_DIR", raising=False) + + with pytest.raises(RuntimeError, match="shared Nous auth store"): + _nous_shared_store_path() + + +def test_shared_store_honors_env_override(tmp_path, monkeypatch): + """HERMES_SHARED_AUTH_DIR must redirect the path.""" + from hermes_cli.auth import _nous_shared_store_path, NOUS_SHARED_STORE_FILENAME + + custom_dir = tmp_path / "custom_shared" + monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(custom_dir)) + + path = _nous_shared_store_path() + assert path == custom_dir / NOUS_SHARED_STORE_FILENAME + + +def test_shared_store_read_missing_returns_none(shared_store_env): + """Missing file → ``_read_shared_nous_state()`` returns None.""" + from hermes_cli.auth import _read_shared_nous_state + + assert _read_shared_nous_state() is None + + +def test_shared_store_read_malformed_returns_none(shared_store_env): + """Unreadable / non-JSON file → None, not an exception.""" + from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state + + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("{ not json") + + assert _read_shared_nous_state() is None + + +def test_shared_store_read_missing_required_fields_returns_none(shared_store_env): + """Payload without refresh_token → None (nothing worth importing).""" + from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state + + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"_schema": 1, "access_token": "abc"})) + + assert _read_shared_nous_state() is None + + +def test_shared_store_write_and_read_roundtrip(shared_store_env): + """Write → read must preserve refresh_token + OAuth URLs.""" + from hermes_cli.auth import ( + _nous_shared_store_path, + _read_shared_nous_state, + _write_shared_nous_state, + ) + + _write_shared_nous_state(_full_state_fixture()) + + path = _nous_shared_store_path() + assert path.is_file() + + # Permissions should be 0600 where the platform supports it. + mode = path.stat().st_mode & 0o777 + assert mode == 0o600 or mode == 0o644 # 0o644 on platforms without chmod + + loaded = _read_shared_nous_state() + assert loaded is not None + assert loaded["refresh_token"] == "refresh-tok" + assert loaded["access_token"] == "access-tok" + assert loaded["portal_base_url"] == "https://portal.example.com" + assert loaded["inference_base_url"] == "https://inference.example.com/v1" + # Volatile agent_key MUST NOT be persisted to the shared store + # (24h TTL, profile-specific — only long-lived OAuth tokens are + # cross-profile useful). + assert "agent_key" not in loaded + + +def test_shared_store_write_skips_when_refresh_token_missing(shared_store_env): + """Write is a no-op when refresh_token is absent (nothing to share).""" + from hermes_cli.auth import _nous_shared_store_path, _write_shared_nous_state + + state = dict(_full_state_fixture()) + state["refresh_token"] = "" + + _write_shared_nous_state(state) + + assert not _nous_shared_store_path().is_file() + + +def test_persist_nous_credentials_mirrors_to_shared_store( + tmp_path, monkeypatch, shared_store_env, +): + """persist_nous_credentials must populate BOTH per-profile auth.json + AND the shared store, so a future profile's `hermes auth add nous + --type oauth` can one-tap import instead of redoing device-code. + """ + from hermes_cli.auth import ( + _nous_shared_store_path, + _read_shared_nous_state, + persist_nous_credentials, + ) + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + persist_nous_credentials(_full_state_fixture()) + + # Per-profile auth.json populated + payload = json.loads((hermes_home / "auth.json").read_text()) + assert "nous" in payload.get("providers", {}) + + # Shared store populated with the same refresh_token + shared = _read_shared_nous_state() + assert shared is not None + assert shared["refresh_token"] == "refresh-tok" + + # Shared file path lives under the tmp override, NOT the real home + assert str(_nous_shared_store_path()).startswith(str(shared_store_env)) + + +def test_try_import_shared_returns_none_when_store_missing(shared_store_env): + """No shared store → no rehydrate (fall through to device-code).""" + from hermes_cli.auth import _try_import_shared_nous_state + + assert _try_import_shared_nous_state() is None + + +def test_try_import_shared_returns_none_on_refresh_failure( + shared_store_env, monkeypatch, +): + """If the portal rejects the stored refresh_token (revoked, expired, + portal down), _try_import_shared_nous_state must return None so the + login flow falls back to a fresh device-code run. + """ + from hermes_cli import auth as auth_mod + + # Seed the shared store + auth_mod._write_shared_nous_state(_full_state_fixture()) + + # Make refresh fail + def _boom(*_args, **_kwargs): + raise AuthError( + "Refresh session has been revoked", + provider="nous", + code="invalid_grant", + relogin_required=True, + ) + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom) + + assert auth_mod._try_import_shared_nous_state() is None + + +def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch): + """Happy path: stored refresh_token is accepted, forced refresh+mint + returns a fresh access_token + agent_key, and the returned dict has + every field persist_nous_credentials() needs. + """ + from hermes_cli import auth as auth_mod + + auth_mod._write_shared_nous_state(_full_state_fixture()) + + def _fake_refresh(state, **kwargs): + # Simulate portal returning fresh tokens + a new agent_key + assert kwargs.get("force_refresh") is True + assert kwargs.get("force_mint") is True + return { + **state, + "access_token": "fresh-access-tok", + "refresh_token": "fresh-refresh-tok", # rotated + "agent_key": "new-agent-key", + "agent_key_expires_at": "2026-04-19T22:00:00+00:00", + } + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh) + + result = auth_mod._try_import_shared_nous_state() + + assert result is not None + assert result["access_token"] == "fresh-access-tok" + assert result["refresh_token"] == "fresh-refresh-tok" + assert result["agent_key"] == "new-agent-key" + # Preserved from shared state + assert result["portal_base_url"] == "https://portal.example.com" + assert result["client_id"] == "hermes-cli" + + +def test_shared_store_survives_across_profile_switch( + tmp_path, monkeypatch, shared_store_env, +): + """End-to-end: profile A logs in → shared store populated → profile B + (different HERMES_HOME) sees the same shared state and can rehydrate + without re-running device-code. + """ + from hermes_cli import auth as auth_mod + + # Profile A: login, which mirrors to shared store + profile_a = tmp_path / "profile_a" + profile_a.mkdir(parents=True, exist_ok=True) + (profile_a / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(profile_a)) + auth_mod.persist_nous_credentials(_full_state_fixture()) + + # Profile A's auth.json has nous + a_payload = json.loads((profile_a / "auth.json").read_text()) + assert "nous" in a_payload.get("providers", {}) + + # Profile B: fresh HERMES_HOME, no auth yet, but the shared store + # persists — _read_shared_nous_state() must still return the tokens. + profile_b = tmp_path / "profile_b" + profile_b.mkdir(parents=True, exist_ok=True) + (profile_b / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(profile_b)) + + # B's own auth.json has no nous + b_payload = json.loads((profile_b / "auth.json").read_text()) + assert "nous" not in b_payload.get("providers", {}) + + # But the shared store is visible + shared = auth_mod._read_shared_nous_state() + assert shared is not None + assert shared["refresh_token"] == "refresh-tok" + + # And a successful rehydrate + persist lands nous into profile B + def _fake_refresh(state, **kwargs): + return { + **state, + "access_token": "b-access-tok", + "refresh_token": "b-refresh-tok", + "agent_key": "b-agent-key", + "agent_key_expires_at": "2026-04-19T22:00:00+00:00", + } + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh) + result = auth_mod._try_import_shared_nous_state() + assert result is not None + + auth_mod.persist_nous_credentials(result) + + b_payload = json.loads((profile_b / "auth.json").read_text()) + assert "nous" in b_payload.get("providers", {}) + assert b_payload["providers"]["nous"]["refresh_token"] == "b-refresh-tok" + + # Shared store was updated with the rotated refresh_token too + shared_after = auth_mod._read_shared_nous_state() + assert shared_after is not None + assert shared_after["refresh_token"] == "b-refresh-tok" From 6f864f8f942b3532bea8e10584024a509bd248b4 Mon Sep 17 00:00:00 2001 From: ms-alan <chenb19870707@gmail.com> Date: Mon, 27 Apr 2026 00:05:49 +0800 Subject: [PATCH 142/171] fix(redact): add code_file param to skip false-positive ENV/JSON patterns ENV-assignment and JSON-field regex patterns in redact_sensitive_text() cause false positives when reading source code files: - MAX_TOKENS=*** triggers the ENV assignment pattern - "apiKey": "test" in test fixtures triggers the JSON field pattern Add code_file=False parameter. When code_file=True, skip only the ENV-assignment and JSON-field regex passes; all other patterns (prefixes, auth headers, private keys, DB connstrings, JWTs, URL secrets) are still applied. Update file_tools.py (read_file and search_files) to pass code_file=True so agent code analysis is not polluted by false-positive redactions. Closes #15934 --- agent/redact.py | 28 +++++++++++++++++----------- tools/file_tools.py | 4 ++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/agent/redact.py b/agent/redact.py index 970ad5adfb3..afdee652888 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -305,13 +305,18 @@ def _redact_form_body(text: str) -> str: return _redact_query_string(text.strip()) -def redact_sensitive_text(text: str, *, force: bool = False) -> str: +def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str: """Apply all redaction patterns to a block of text. Safe to call on any string -- non-matching text passes through unchanged. Disabled by default — enable via security.redact_secrets: true in config.yaml. Set force=True for safety boundaries that must never return raw secrets regardless of the user's global logging redaction preference. + + Set code_file=True to skip the ENV-assignment and JSON-field regex + patterns when the text is known to be source code (e.g. MAX_TOKENS=*** + constants, "apiKey": "test" fixtures). Prefix patterns, auth headers, + private keys, DB connstrings, JWTs, and URL secrets are still redacted. """ if text is None: return None @@ -325,17 +330,18 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str: # Known prefixes (sk-, ghp_, etc.) text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text) - # ENV assignments: OPENAI_API_KEY=sk-abc... - def _redact_env(m): - name, quote, value = m.group(1), m.group(2), m.group(3) - return f"{name}={quote}{_mask_token(value)}{quote}" - text = _ENV_ASSIGN_RE.sub(_redact_env, text) + # ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives) + if not code_file: + def _redact_env(m): + name, quote, value = m.group(1), m.group(2), m.group(3) + return f"{name}={quote}{_mask_token(value)}{quote}" + text = _ENV_ASSIGN_RE.sub(_redact_env, text) - # JSON fields: "apiKey": "value" - def _redact_json(m): - key, value = m.group(1), m.group(2) - return f'{key}: "{_mask_token(value)}"' - text = _JSON_FIELD_RE.sub(_redact_json, text) + # JSON fields: "apiKey": "***" (skip for code files — false positives) + def _redact_json(m): + key, value = m.group(1), m.group(2) + return f'{key}: "{_mask_token(value)}"' + text = _JSON_FIELD_RE.sub(_redact_json, text) # Authorization headers text = _AUTH_HEADER_RE.sub( diff --git a/tools/file_tools.py b/tools/file_tools.py index 6022eee9124..106bd295be9 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -570,7 +570,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Redact secrets (after guard check to skip oversized content) ── if result.content: - result.content = redact_sensitive_text(result.content) + result.content = redact_sensitive_text(result.content, code_file=True) result_dict["content"] = result.content # Large-file hint: if the file is big and the caller didn't ask @@ -993,7 +993,7 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", if hasattr(result, 'matches'): for m in result.matches: if hasattr(m, 'content') and m.content: - m.content = redact_sensitive_text(m.content) + m.content = redact_sensitive_text(m.content, code_file=True) result_dict = result.to_dict() if count >= 3: From fdf9343c51467c12c5bc8f89b488340f6d14b7dc Mon Sep 17 00:00:00 2001 From: Nikolay Gusev <ngusev@astralinux.ru> Date: Mon, 4 May 2026 04:58:35 -0700 Subject: [PATCH 143/171] fix(tools): wrap bare scalars in single-element list for array-typed args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Open-weight models (DeepSeek, Qwen, GLM) sometimes emit tool calls like `{"urls": "https://a.com"}` when the tool schema declares `type: array`. The call was JSON-valid but semantically wrong, and `coerce_tool_args` would pass the bare string through — the tool then failed with a confusing type error. `coerce_tool_args` now wraps non-list, non-null values in a single-element list when the schema declares `array`. Strings still go through `_coerce_value` first so JSON-encoded arrays (`'["a","b"]'`) parse correctly and nullable `"null"` still becomes `None`. `None` itself is preserved — tools with sensible defaults already handle it, and we don't want to silently mask a deliberate null. Salvaged from #19652 (NikolayGusev-astra) — the broader validate-then- repair layer had several issues (duplicated existing coercion, mis-classified `old_string` as a path field, prepended non-JSON prefixes to tool results that break downstream JSON parsing, hardcoded offset/limit defaults unsuitable for non-read_file tools). The one genuinely new capability is wrapping bare scalars, which is implemented here directly inside the existing coercion path. Co-authored-by: Nikolay Gusev <ngusev@astralinux.ru> --- model_tools.py | 41 +++++++++++++-- scripts/release.py | 1 + tests/run_agent/test_tool_arg_coercion.py | 62 +++++++++++++++++++++-- 3 files changed, 98 insertions(+), 6 deletions(-) diff --git a/model_tools.py b/model_tools.py index 2eb31ab0df7..8721e9ee6a7 100644 --- a/model_tools.py +++ b/model_tools.py @@ -511,6 +511,12 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``, and union types (``"type": ["integer", "string"]``). + + Also wraps bare scalar values in a single-element list when the schema + declares ``"type": "array"``. Open-weight models (DeepSeek, Qwen, GLM) + sometimes emit ``{"urls": "https://a.com"}`` when the tool expects + ``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool + failure on what is otherwise a well-formed call. """ if not args or not isinstance(args, dict): return args @@ -523,13 +529,42 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: if not properties: return args - for key, value in args.items(): - if not isinstance(value, str): - continue + for key, value in list(args.items()): prop_schema = properties.get(key) if not prop_schema: continue expected = prop_schema.get("type") + + # Wrap bare non-list values when the schema declares ``array``. + # Strings still go through _coerce_value first so JSON-encoded + # arrays (``'["a","b"]'``) get parsed and nullable ``"null"`` + # becomes ``None`` rather than ``["null"]``. + # ``None`` itself is preserved — we don't know whether the model + # meant "omit" or "empty list", and tools with sensible defaults + # (e.g. read_file's normalize_read_pagination) already handle it. + if expected == "array" and value is not None and not isinstance(value, (list, tuple)): + if isinstance(value, str): + coerced = _coerce_value(value, expected, schema=prop_schema) + if coerced is not value: + # _coerce_value handled it (JSON-parsed list or + # nullable "null" → None). + args[key] = coerced + continue + args[key] = [value] + logger.info( + "coerce_tool_args: wrapped bare string in list for %s.%s", + tool_name, key, + ) + continue + args[key] = [value] + logger.info( + "coerce_tool_args: wrapped bare %s in list for %s.%s", + type(value).__name__, tool_name, key, + ) + continue + + if not isinstance(value, str): + continue if not expected and not _schema_allows_null(prop_schema): continue coerced = _coerce_value(value, expected, schema=prop_schema) diff --git a/scripts/release.py b/scripts/release.py index cfafa36e2a6..bc2dc1d26d3 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -48,6 +48,7 @@ AUTHOR_MAP = { "127238744+teknium1@users.noreply.github.com": "teknium1", "159539633+MottledShadow@users.noreply.github.com": "MottledShadow", "aludwin+gh@gmail.com": "adamludwin", + "ngusev@astralinux.ru": "NikolayGusev-astra", "2093036+exiao@users.noreply.github.com": "exiao", "rylen.anil@gmail.com": "rylena", "godnanijatin@gmail.com": "jatingodnani", diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py index a9d768bdcf1..d9ac5dd20fa 100644 --- a/tests/run_agent/test_tool_arg_coercion.py +++ b/tests/run_agent/test_tool_arg_coercion.py @@ -297,13 +297,69 @@ class TestCoerceToolArgs: result = coerce_tool_args("test_tool", args) assert result["stages"] is None - def test_invalid_json_array_preserved_as_string(self): - """If the string isn't valid JSON, pass it through — let the tool decide.""" + def test_invalid_json_array_wrapped_in_single_element_list(self): + """A bare string gets wrapped into ``[value]`` when the schema says array. + + Open-weight models (DeepSeek, Qwen, GLM) sometimes emit + ``{"urls": "https://a.com"}`` when the tool expects a list. + Wrapping produces a valid dispatch rather than a confusing tool + failure. This supersedes the earlier "pass the string through" + behavior — no real tool handles a bare string as an array + gracefully. + """ schema = self._mock_schema({"items": {"type": "array"}}) with patch("model_tools.registry.get_schema", return_value=schema): args = {"items": "not-json"} result = coerce_tool_args("test_tool", args) - assert result["items"] == "not-json" + assert result["items"] == ["not-json"] + + def test_bare_string_wrapped_as_array(self): + """Bare string on array field → single-element list.""" + schema = self._mock_schema({"urls": {"type": "array", "items": {"type": "string"}}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"urls": "https://a.com"} + result = coerce_tool_args("test_tool", args) + assert result["urls"] == ["https://a.com"] + + def test_bare_int_wrapped_as_array(self): + """Bare non-string scalars (int, bool, float) also get wrapped.""" + schema = self._mock_schema({"ids": {"type": "array", "items": {"type": "integer"}}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"ids": 5} + result = coerce_tool_args("test_tool", args) + assert result["ids"] == [5] + + def test_bare_dict_wrapped_as_array(self): + """Bare dict on array field → single-element list.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": {"a": 1}} + result = coerce_tool_args("test_tool", args) + assert result["items"] == [{"a": 1}] + + def test_none_on_array_field_preserved(self): + """``None`` is never wrapped — tools with defaults handle it.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": None} + result = coerce_tool_args("test_tool", args) + assert result["items"] is None + + def test_existing_list_passthrough(self): + """An already-valid list is not touched.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": ["a", "b"]} + result = coerce_tool_args("test_tool", args) + assert result["items"] == ["a", "b"] + + def test_json_encoded_array_still_parses(self): + """JSON-encoded strings still parse (not double-wrapped).""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": '["a","b"]'} + result = coerce_tool_args("test_tool", args) + assert result["items"] == ["a", "b"] def test_extra_args_without_schema_left_alone(self): """Args not in the schema properties are not touched.""" From ff3d2773e2a3aab49f282b9b075b2e0d07b18560 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 05:02:23 -0700 Subject: [PATCH 144/171] feat(kanban): auto-subscribe gateway chat on tool-driven kanban_create (#19718) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #19479. When an orchestrator agent calls kanban_create from a gateway session (e.g. a Telegram user delegating to an orchestrator profile), auto- subscribe the originating (platform, chat, thread, user) to the new task's terminal events. Mirrors the behavior of the /kanban create slash command in gateway/run.py so tool-driven creation is at parity with human-driven creation. Without this, a user who interacts with an orchestrator exclusively via the gateway never receives blocked / completed / gave_up notifications for tasks the orchestrator created on their behalf — silently breaking the gateway-first multi-agent flow the reporter describes. Reads the context-local HERMES_SESSION_* vars via get_session_env() (not os.environ — those are contextvars for asyncio concurrency safety). Falls through cleanly in CLI / cron contexts with no session active (subscribed=False in the response). Best-effort: if the gateway module isn't importable (test rigs stubbing gateway.*), the task still creates, we just skip the subscription. Response gains a 'subscribed' bool so the orchestrator knows whether terminal events will land back in the originating chat or whether it needs to poll / unblock manually. Tests: 4 new in tests/tools/test_kanban_tools.py covering CLI/no-subscribe, telegram/gateway-auto-subscribe, discord-DM/no- thread subscribe, and partial-ctx/no-chat_id no-subscribe. 40/40 kanban tool tests pass. --- tests/tools/test_kanban_tools.py | 100 +++++++++++++++++++++++++++++++ tools/kanban_tools.py | 30 ++++++++++ 2 files changed, 130 insertions(+) diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index 9031d81d8eb..fdde48a2aa7 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -610,3 +610,103 @@ def test_orchestrator_complete_any_task_allowed(monkeypatch, tmp_path): out = kt._handle_complete({"task_id": tid, "summary": "orchestrator close"}) d = json.loads(out) assert d.get("ok") is True and d.get("task_id") == tid + + +# --------------------------------------------------------------------------- +# kanban_create auto-subscribe to gateway notifications (#19479) +# --------------------------------------------------------------------------- +# +# When an orchestrator agent (running under the gateway) calls kanban_create, +# the originating (platform, chat, thread) should be auto-subscribed to the +# new task's terminal events — matching the /kanban create slash-command +# behavior in gateway/run.py. In CLI / cron contexts (no session vars set), +# no subscription row is written. + + +def test_create_no_subscribe_in_cli_context(worker_env): + """Classic CLI: no gateway session vars -> no notify subscription.""" + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + out = kt._handle_create({"title": "cli task", "assignee": "peer"}) + d = json.loads(out) + assert d.get("ok") is True + assert d.get("subscribed") is False + conn = kb.connect() + try: + assert kb.list_notify_subs(conn, d["task_id"]) == [] + finally: + conn.close() + + +def test_create_auto_subscribes_in_gateway_context(worker_env): + """Gateway session vars set -> auto-subscribe the originating source.""" + from gateway.session_context import set_session_vars, clear_session_vars + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + + tokens = set_session_vars( + platform="telegram", + chat_id="1234567", + thread_id="42", + user_id="u_alice", + ) + try: + out = kt._handle_create({"title": "gateway task", "assignee": "peer"}) + d = json.loads(out) + assert d.get("ok") is True + assert d.get("subscribed") is True + conn = kb.connect() + try: + subs = kb.list_notify_subs(conn, d["task_id"]) + finally: + conn.close() + assert len(subs) == 1 + assert subs[0]["platform"] == "telegram" + assert subs[0]["chat_id"] == "1234567" + assert subs[0]["thread_id"] == "42" + assert subs[0]["user_id"] == "u_alice" + finally: + clear_session_vars(tokens) + + +def test_create_subscribe_without_thread_id(worker_env): + """DM / no-thread platforms subscribe without a thread_id.""" + from gateway.session_context import set_session_vars, clear_session_vars + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + + tokens = set_session_vars(platform="discord", chat_id="ch_dm_789") + try: + out = kt._handle_create({"title": "dm task", "assignee": "peer"}) + d = json.loads(out) + assert d.get("subscribed") is True + conn = kb.connect() + try: + subs = kb.list_notify_subs(conn, d["task_id"]) + finally: + conn.close() + assert len(subs) == 1 + assert subs[0]["thread_id"] == "" + assert subs[0]["user_id"] is None + finally: + clear_session_vars(tokens) + + +def test_create_no_subscribe_when_chat_id_missing(worker_env): + """Partial gateway context (platform but no chat_id) -> no subscription.""" + from gateway.session_context import set_session_vars, clear_session_vars + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + + tokens = set_session_vars(platform="telegram", chat_id="") + try: + out = kt._handle_create({"title": "partial ctx", "assignee": "peer"}) + d = json.loads(out) + assert d.get("subscribed") is False + conn = kb.connect() + try: + assert kb.list_notify_subs(conn, d["task_id"]) == [] + finally: + conn.close() + finally: + clear_session_vars(tokens) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index 1f99f6896cb..baa5f2210a6 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -380,10 +380,40 @@ def _handle_create(args: dict, **kw) -> str: skills=skills, created_by=os.environ.get("HERMES_PROFILE") or "worker", ) + # Auto-subscribe the originating gateway source (if any) to the + # new task's terminal events. Mirrors the behavior of the + # `/kanban create` slash command in gateway/run.py so that + # tool-driven creation (orchestrator agents calling kanban_create) + # gets the same blocked/completed/gave_up notifications as human- + # driven creation. No-op in CLI / cron contexts where no gateway + # session context is active. See issue #19479. + subscribed = False + try: + from gateway.session_context import get_session_env + platform = get_session_env("HERMES_SESSION_PLATFORM") + chat_id = get_session_env("HERMES_SESSION_CHAT_ID") + thread_id = get_session_env("HERMES_SESSION_THREAD_ID") or None + user_id = get_session_env("HERMES_SESSION_USER_ID") or None + if platform and chat_id: + kb.add_notify_sub( + conn, + task_id=new_tid, + platform=platform, + chat_id=chat_id, + thread_id=thread_id, + user_id=user_id, + ) + subscribed = True + except Exception: + # Subscription is best-effort; don't fail the whole create + # if the gateway context module isn't importable (e.g. in + # test rigs that stub out gateway.*). + logger.debug("kanban_create notify-sub skipped", exc_info=True) new_task = kb.get_task(conn, new_tid) return _ok( task_id=new_tid, status=new_task.status if new_task else None, + subscribed=subscribed, ) finally: conn.close() From 25b7b0f8e6a359ba05e1e16fd2f74293daba6ea4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 05:02:59 -0700 Subject: [PATCH 145/171] chore(release): AUTHOR_MAP entries for Tier 1f salvage batch --- scripts/release.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index bc2dc1d26d3..2edc78aed65 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -693,6 +693,18 @@ AUTHOR_MAP = { "xudavid429@gmail.com": "YX234", "kathy@Kathy.local": "julysir", "274902531@qq.com": "JanCong", + "225304168+e-shizz@users.noreply.github.com": "e-shizz", + "vincent_hh@users.noreply.github.com": "VinVC", + "1243352777@qq.com": "zons-zhaozhy", + "dejie.guo@gmail.com": "JayGwod", + "52840391+swithek@users.noreply.github.com": "swithek", + "raipratik0101@gmail.com": "PratikRai0101", + "code@sasha.id": "sasha-id", + "chen.yunbo@xydigit.com": "chenyunbo411", + "openclaw@local": "Asce66", + "59465365+0xsir0000@users.noreply.github.com": "0xsir0000", + "lisanhu2014@hotmail.com": "lisanhu", + "0668001438@zte.com.cn": "chenyunbo411", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", From 3fb35520c6f50626050f3cce16199984f1623004 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 4 May 2026 05:04:01 -0700 Subject: [PATCH 146/171] revert: auto-subscribe gateway chat on tool-driven kanban_create (#19718) (#19721) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts ff3d2773e2. Teknium reviewed the merged PR and decided this behavior isn't wanted — tool-driven kanban_create should not mirror the slash-command path's auto-subscribe. Orchestrators that want their originating chat notified can call kanban_notify-subscribe explicitly; we're not going to make it implicit. --- tests/tools/test_kanban_tools.py | 100 ------------------------------- tools/kanban_tools.py | 30 ---------- 2 files changed, 130 deletions(-) diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index fdde48a2aa7..9031d81d8eb 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -610,103 +610,3 @@ def test_orchestrator_complete_any_task_allowed(monkeypatch, tmp_path): out = kt._handle_complete({"task_id": tid, "summary": "orchestrator close"}) d = json.loads(out) assert d.get("ok") is True and d.get("task_id") == tid - - -# --------------------------------------------------------------------------- -# kanban_create auto-subscribe to gateway notifications (#19479) -# --------------------------------------------------------------------------- -# -# When an orchestrator agent (running under the gateway) calls kanban_create, -# the originating (platform, chat, thread) should be auto-subscribed to the -# new task's terminal events — matching the /kanban create slash-command -# behavior in gateway/run.py. In CLI / cron contexts (no session vars set), -# no subscription row is written. - - -def test_create_no_subscribe_in_cli_context(worker_env): - """Classic CLI: no gateway session vars -> no notify subscription.""" - from tools import kanban_tools as kt - from hermes_cli import kanban_db as kb - out = kt._handle_create({"title": "cli task", "assignee": "peer"}) - d = json.loads(out) - assert d.get("ok") is True - assert d.get("subscribed") is False - conn = kb.connect() - try: - assert kb.list_notify_subs(conn, d["task_id"]) == [] - finally: - conn.close() - - -def test_create_auto_subscribes_in_gateway_context(worker_env): - """Gateway session vars set -> auto-subscribe the originating source.""" - from gateway.session_context import set_session_vars, clear_session_vars - from tools import kanban_tools as kt - from hermes_cli import kanban_db as kb - - tokens = set_session_vars( - platform="telegram", - chat_id="1234567", - thread_id="42", - user_id="u_alice", - ) - try: - out = kt._handle_create({"title": "gateway task", "assignee": "peer"}) - d = json.loads(out) - assert d.get("ok") is True - assert d.get("subscribed") is True - conn = kb.connect() - try: - subs = kb.list_notify_subs(conn, d["task_id"]) - finally: - conn.close() - assert len(subs) == 1 - assert subs[0]["platform"] == "telegram" - assert subs[0]["chat_id"] == "1234567" - assert subs[0]["thread_id"] == "42" - assert subs[0]["user_id"] == "u_alice" - finally: - clear_session_vars(tokens) - - -def test_create_subscribe_without_thread_id(worker_env): - """DM / no-thread platforms subscribe without a thread_id.""" - from gateway.session_context import set_session_vars, clear_session_vars - from tools import kanban_tools as kt - from hermes_cli import kanban_db as kb - - tokens = set_session_vars(platform="discord", chat_id="ch_dm_789") - try: - out = kt._handle_create({"title": "dm task", "assignee": "peer"}) - d = json.loads(out) - assert d.get("subscribed") is True - conn = kb.connect() - try: - subs = kb.list_notify_subs(conn, d["task_id"]) - finally: - conn.close() - assert len(subs) == 1 - assert subs[0]["thread_id"] == "" - assert subs[0]["user_id"] is None - finally: - clear_session_vars(tokens) - - -def test_create_no_subscribe_when_chat_id_missing(worker_env): - """Partial gateway context (platform but no chat_id) -> no subscription.""" - from gateway.session_context import set_session_vars, clear_session_vars - from tools import kanban_tools as kt - from hermes_cli import kanban_db as kb - - tokens = set_session_vars(platform="telegram", chat_id="") - try: - out = kt._handle_create({"title": "partial ctx", "assignee": "peer"}) - d = json.loads(out) - assert d.get("subscribed") is False - conn = kb.connect() - try: - assert kb.list_notify_subs(conn, d["task_id"]) == [] - finally: - conn.close() - finally: - clear_session_vars(tokens) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index baa5f2210a6..1f99f6896cb 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -380,40 +380,10 @@ def _handle_create(args: dict, **kw) -> str: skills=skills, created_by=os.environ.get("HERMES_PROFILE") or "worker", ) - # Auto-subscribe the originating gateway source (if any) to the - # new task's terminal events. Mirrors the behavior of the - # `/kanban create` slash command in gateway/run.py so that - # tool-driven creation (orchestrator agents calling kanban_create) - # gets the same blocked/completed/gave_up notifications as human- - # driven creation. No-op in CLI / cron contexts where no gateway - # session context is active. See issue #19479. - subscribed = False - try: - from gateway.session_context import get_session_env - platform = get_session_env("HERMES_SESSION_PLATFORM") - chat_id = get_session_env("HERMES_SESSION_CHAT_ID") - thread_id = get_session_env("HERMES_SESSION_THREAD_ID") or None - user_id = get_session_env("HERMES_SESSION_USER_ID") or None - if platform and chat_id: - kb.add_notify_sub( - conn, - task_id=new_tid, - platform=platform, - chat_id=chat_id, - thread_id=thread_id, - user_id=user_id, - ) - subscribed = True - except Exception: - # Subscription is best-effort; don't fail the whole create - # if the gateway context module isn't importable (e.g. in - # test rigs that stub out gateway.*). - logger.debug("kanban_create notify-sub skipped", exc_info=True) new_task = kb.get_task(conn, new_tid) return _ok( task_id=new_tid, status=new_task.status if new_task else None, - subscribed=subscribed, ) finally: conn.close() From 026a5e47df53ed84c2b6d3573d605fe7a93b8611 Mon Sep 17 00:00:00 2001 From: giwaov <giwavictor9@gmail.com> Date: Mon, 27 Apr 2026 13:45:35 +0100 Subject: [PATCH 147/171] fix(cli): preserve Windows hidden-dir paths in markdown --- cli.py | 23 +++++++++++++++++++++++ tests/cli/test_cli_markdown_rendering.py | 17 +++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/cli.py b/cli.py index e8c804a9e89..3b9f6af5311 100644 --- a/cli.py +++ b/cli.py @@ -1226,6 +1226,28 @@ def _strip_markdown_syntax(text: str) -> str: return plain.strip("\n") +_WINDOWS_PATH_WITH_DOT_SEGMENT_RE = re.compile( + r"(?i)(?:\b[a-z]:\\|\\\\)[^\s`]*\\\.[^\s`]*" +) + + +def _preserve_windows_dot_segments_for_markdown(text: str) -> str: + r"""Keep Windows path separators before hidden directories in Markdown. + + CommonMark treats ``\.`` as an escaped literal dot, so Rich Markdown would + render ``D:\repo\.ai`` as ``D:\repo.ai``. Doubling only that separator + inside Windows path-looking tokens preserves the path without changing + ordinary markdown escapes like ``1\. not a list``. + """ + if "\\." not in text: + return text + + def _protect(match: re.Match[str]) -> str: + return re.sub(r"(?<!\\)\\(?=\.)", r"\\\\", match.group(0)) + + return _WINDOWS_PATH_WITH_DOT_SEGMENT_RE.sub(_protect, text) + + def _render_final_assistant_content(text: str, mode: str = "render"): """Render final assistant content as markdown, stripped text, or raw text.""" from rich.markdown import Markdown @@ -1237,6 +1259,7 @@ def _render_final_assistant_content(text: str, mode: str = "render"): return _rich_text_from_ansi(text or "") plain = _rich_text_from_ansi(text or "").plain + plain = _preserve_windows_dot_segments_for_markdown(plain) return Markdown(plain) diff --git a/tests/cli/test_cli_markdown_rendering.py b/tests/cli/test_cli_markdown_rendering.py index 01f0bab6c64..032c8875b3a 100644 --- a/tests/cli/test_cli_markdown_rendering.py +++ b/tests/cli/test_cli_markdown_rendering.py @@ -22,6 +22,23 @@ def test_final_assistant_content_uses_markdown_renderable(): assert "two" in output +def test_final_assistant_content_preserves_windows_hidden_dir_paths(): + renderable = _render_final_assistant_content( + r"D:\Projects\SourceCode\hermes-agent\.ai\skills" + "\\" + ) + + output = _render_to_text(renderable) + assert r"D:\Projects\SourceCode\hermes-agent\.ai\skills" + "\\" in output + + +def test_final_assistant_content_keeps_non_path_markdown_escapes(): + renderable = _render_final_assistant_content(r"1\. Not an ordered list") + + output = _render_to_text(renderable) + assert "1. Not an ordered list" in output + assert r"1\." not in output + + def test_final_assistant_content_strips_ansi_before_markdown_rendering(): renderable = _render_final_assistant_content("\x1b[31m# Title\x1b[0m") From d29f90e89d0263d390a71b359e1afa4f5a91e1e9 Mon Sep 17 00:00:00 2001 From: Dejie Guo <dejie.guo@gmail.com> Date: Mon, 27 Apr 2026 12:09:53 +0800 Subject: [PATCH 148/171] fix(error_classifier): avoid large-context false overflow heuristics Generic 400 and server-disconnect heuristics used absolute token/message-count fallbacks that are too aggressive for 1M context sessions. Gate those absolute fallbacks to smaller context windows while preserving relative pressure checks. Fixes #16351 --- agent/error_classifier.py | 14 ++++++++++-- tests/agent/test_error_classifier.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 86e99ec1ac5..67feaa4304f 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -520,7 +520,12 @@ def classify_api_error( is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS) if is_disconnect and not status_code: - is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200 + # Absolute token/message-count thresholds are only a proxy for smaller + # context windows. Large-context sessions can have hundreds of + # messages while still being far below their actual token budget. + is_large = approx_tokens > context_length * 0.6 or ( + context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200) + ) if is_large: return _result( FailoverReason.context_overflow, @@ -766,7 +771,12 @@ def _classify_400( if not err_body_msg: err_body_msg = str(body.get("message") or "").strip().lower() is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "") - is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80 + # Absolute token/message-count thresholds are only a proxy for smaller + # context windows. Large-context sessions can have many messages while + # still being far below their actual token budget. + is_large = approx_tokens > context_length * 0.4 or ( + context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80) + ) if is_generic and is_large: return result_fn( diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index 9d52c7bdf28..5a287973490 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -410,6 +410,24 @@ class TestClassifyApiError: result = classify_api_error(e, approx_tokens=1000, context_length=200000) assert result.reason == FailoverReason.format_error + def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self): + """Large-context sessions should not overflow solely due to message count.""" + e = MockAPIError( + "Error", + status_code=400, + body={"error": {"message": "Error"}}, + ) + result = classify_api_error( + e, + provider="openai-codex", + model="gpt-5.5", + approx_tokens=74320, + context_length=1_000_000, + num_messages=432, + ) + assert result.reason == FailoverReason.format_error + assert result.should_compress is False + # ── Server disconnect + large session ── def test_disconnect_large_session_context_overflow(self): @@ -425,6 +443,20 @@ class TestClassifyApiError: result = classify_api_error(e, approx_tokens=5000, context_length=200000) assert result.reason == FailoverReason.timeout + def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self): + """Large-context disconnects should not overflow solely due to message count.""" + e = Exception("server disconnected without sending complete message") + result = classify_api_error( + e, + provider="openai-codex", + model="gpt-5.5", + approx_tokens=74320, + context_length=1_000_000, + num_messages=432, + ) + assert result.reason == FailoverReason.timeout + assert result.should_compress is False + # ── Provider-specific: Anthropic thinking signature ── def test_anthropic_thinking_signature(self): From b7bbc62503d54cd95de413df7cda2e802fec0206 Mon Sep 17 00:00:00 2001 From: swithek <52840391+swithek@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:50:40 +0200 Subject: [PATCH 149/171] fix(compressor): _prune_old_tool_results boundary direction --- agent/context_compressor.py | 11 ++++++- tests/agent/test_context_compressor.py | 41 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 44d54d530c3..69151a117a8 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -554,7 +554,16 @@ class ContextCompressor(ContextEngine): break accumulated += msg_tokens boundary = i - prune_boundary = max(boundary, len(result) - min_protect) + # Translate the budget walk into a "protected count", apply the + # floor in count-space (where `max` reads naturally: protect at + # least `min_protect` messages or whatever the budget reserved, + # whichever is more), then convert back to a prune boundary. + # Doing this in index-space with `max` would invert the direction + # (smaller index = MORE protected), so a generous budget would + # silently get truncated back down to `min_protect`. + budget_protect_count = len(result) - boundary + protected_count = max(budget_protect_count, min_protect) + prune_boundary = len(result) - protected_count else: prune_boundary = len(result) - protect_tail_count diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 8d1de377b0e..fd88cc7a96e 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection: assert isinstance(cut, int) assert 0 <= cut <= len(messages) + def test_generous_budget_protects_everything_floor_does_not_override( + self, budget_compressor + ): + """A budget that covers the whole transcript must prune nothing — + ``protect_tail_count`` is a minimum floor, not a ceiling.""" + c = budget_compressor + + # 100 alternating assistant/tool messages. Each tool result has + # *unique* content so the dedup pass (Pass 1, which is independent + # of prune_boundary) is a no-op and we isolate the boundary logic. + messages = [] + for i in range(50): + messages.append({ + "role": "assistant", "content": None, + "tool_calls": [{ + "id": f"c{i}", + "type": "function", + "function": {"name": "noop", "arguments": "{}"}, + }], + }) + messages.append({ + "role": "tool", + "tool_call_id": f"c{i}", + "content": f"unique-tool-output-{i:03d}-" + ("x" * 250), + }) + + # Budget large enough to cover the whole transcript many times over, + # so the budget walk completes without hitting its break condition + # and the boundary lands at 0 ("protect everything"). + _, pruned = c._prune_old_tool_results( + messages, + protect_tail_count=20, + protect_tail_tokens=10_000_000, + ) + + assert pruned == 0, ( + "budget said protect everything, but the floor still pruned " + f"{pruned} messages — protect_tail_count is acting as a ceiling, " + "not a minimum floor" + ) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets.""" From 6cf7a9e330cadabd2a0b7ae21f25dc400fc3aa63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E8=BF=90=E6=B3=A20668001438?= <chen.yunbo@xydigit.com> Date: Mon, 27 Apr 2026 10:43:52 +0800 Subject: [PATCH 150/171] fix(vision): preserve explicit provider auth with custom base_url Keep the configured vision provider when base_url is overridden so credential-pool lookup still resolves provider-specific API keys (e.g. ZAI_API_KEY), and add a regression test for this path. --- agent/auxiliary_client.py | 9 +++-- tests/agent/test_vision_resolved_args.py | 44 ++++++++++++++++++------ 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 0c688d23dca..4c706748a0b 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2648,8 +2648,11 @@ def resolve_vision_provider_client( return resolved_provider, sync_client, final_model if resolved_base_url: + provider_for_base_override = ( + requested if requested and requested not in ("", "auto") else "custom" + ) client, final_model = resolve_provider_client( - "custom", + provider_for_base_override, model=resolved_model, async_mode=async_mode, explicit_base_url=resolved_base_url, @@ -2657,8 +2660,8 @@ def resolve_vision_provider_client( api_mode=resolved_api_mode, ) if client is None: - return "custom", None, None - return "custom", client, final_model + return provider_for_base_override, None, None + return provider_for_base_override, client, final_model if requested == "auto": # Vision auto-detection order: diff --git a/tests/agent/test_vision_resolved_args.py b/tests/agent/test_vision_resolved_args.py index aace4357849..6558effadda 100644 --- a/tests/agent/test_vision_resolved_args.py +++ b/tests/agent/test_vision_resolved_args.py @@ -13,16 +13,13 @@ def test_vision_call_uses_resolved_provider_args(): usage=MagicMock(prompt_tokens=10, completion_tokens=5), ) - with ( - patch( - "agent.auxiliary_client._resolve_task_provider_model", - return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"), - ), - patch( - "agent.auxiliary_client.resolve_vision_provider_client", - return_value=("my-resolved-provider", fake_client, "my-resolved-model"), - ) as mock_vision, - ): + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"), + ), patch( + "agent.auxiliary_client.resolve_vision_provider_client", + return_value=("my-resolved-provider", fake_client, "my-resolved-model"), + ) as mock_vision: call_llm( "vision", provider="raw-provider", @@ -38,3 +35,30 @@ def test_vision_call_uses_resolved_provider_args(): assert call_args.kwargs["model"] == "my-resolved-model" assert call_args.kwargs["base_url"] == "http://resolved" assert call_args.kwargs["api_key"] == "resolved-key" + + +def test_vision_base_url_override_keeps_explicit_provider(): + """Explicit provider should still drive credential resolution with custom base_url.""" + from agent.auxiliary_client import resolve_vision_provider_client + + fake_client = MagicMock() + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=( + "zai", + "glm-4v", + "https://open.bigmodel.cn/api/paas/v4", + None, + "chat_completions", + ), + ), patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(fake_client, "glm-4v"), + ) as mock_resolve: + provider, client, model = resolve_vision_provider_client() + + assert provider == "zai" + assert client is fake_client + assert model == "glm-4v" + assert mock_resolve.call_args.args[0] == "zai" + assert mock_resolve.call_args.kwargs["explicit_base_url"] == "https://open.bigmodel.cn/api/paas/v4" From 0443484115fb6f3a664defd3969ec6206786d625 Mon Sep 17 00:00:00 2001 From: OpenClaw Bot <openclaw@local> Date: Sun, 26 Apr 2026 15:19:48 +0800 Subject: [PATCH 151/171] fix(qqbot): honor proxy env vars for websocket --- gateway/platforms/qqbot/adapter.py | 13 ++++++++- tests/gateway/test_qqbot.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py index c6e5d428c6e..f8d7aed7872 100644 --- a/gateway/platforms/qqbot/adapter.py +++ b/gateway/platforms/qqbot/adapter.py @@ -397,13 +397,24 @@ class QQAdapter(BasePlatformAdapter): await self._session.close() self._session = None - self._session = aiohttp.ClientSession() + # Honor WSL proxy env for QQ WebSocket. Hermes upgrades overwrite this + # local patch, so QQ can regress to direct-connect timeouts after update. + self._session = aiohttp.ClientSession(trust_env=True) + ws_proxy = ( + os.getenv("WSS_PROXY") + or os.getenv("wss_proxy") + or os.getenv("HTTPS_PROXY") + or os.getenv("https_proxy") + or os.getenv("ALL_PROXY") + or os.getenv("all_proxy") + ) self._ws = await self._session.ws_connect( gateway_url, headers={ "User-Agent": build_user_agent(), }, timeout=CONNECT_TIMEOUT_SECONDS, + proxy=ws_proxy, ) logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url) diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py index a5aeb62516a..a01bb946ad0 100644 --- a/tests/gateway/test_qqbot.py +++ b/tests/gateway/test_qqbot.py @@ -191,6 +191,50 @@ class TestVoiceAttachmentSSRFProtection: assert kwargs.get("follow_redirects") is True assert kwargs.get("event_hooks", {}).get("response") == [_ssrf_redirect_guard] + +# --------------------------------------------------------------------------- +# WebSocket proxy handling +# --------------------------------------------------------------------------- + +class TestQQWebSocketProxy: + @pytest.mark.asyncio + async def test_open_ws_honors_proxy_env(self, monkeypatch): + from gateway.platforms.qqbot import QQAdapter + + for key in ( + "WSS_PROXY", + "wss_proxy", + "HTTPS_PROXY", + "https_proxy", + "ALL_PROXY", + "all_proxy", + ): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:7897") + + adapter = QQAdapter(_make_config(app_id="a", client_secret="b")) + + seen_session_kwargs = {} + seen_ws_kwargs = {} + + class FakeSession: + def __init__(self, **kwargs): + seen_session_kwargs.update(kwargs) + self.closed = False + + async def close(self): + self.closed = True + + async def ws_connect(self, *args, **kwargs): + seen_ws_kwargs.update(kwargs) + return mock.AsyncMock(closed=False) + + with mock.patch("gateway.platforms.qqbot.adapter.aiohttp.ClientSession", side_effect=FakeSession): + await adapter._open_ws("wss://api.sgroup.qq.com/websocket") + + assert seen_session_kwargs.get("trust_env") is True + assert seen_ws_kwargs.get("proxy") == "http://127.0.0.1:7897" + # --------------------------------------------------------------------------- # _strip_at_mention # --------------------------------------------------------------------------- From 52882dade6f96bf88f37535925dbaeed8616cbe7 Mon Sep 17 00:00:00 2001 From: 0xsir0000 <59465365+0xsir0000@users.noreply.github.com> Date: Mon, 27 Apr 2026 17:51:11 +0800 Subject: [PATCH 152/171] fix(agent): include name field on every role:tool message for Gemini compatibility (#16478) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini's OpenAI-compatibility endpoint strictly requires the `name` field on `role: tool` messages — it returns HTTP 400 ("Request contains an invalid argument") when the function name is missing. OpenAI/Anthropic/ ollama tolerate the absence, so the gap stays invisible until the conversation accumulates a tool turn and the user routes it through Gemini (direct API or via ollama-cloud proxy). Fix: add a `_get_tool_call_name_static()` helper alongside the existing `_get_tool_call_id_static()`, and populate `name` at every site that constructs a `role: tool` message — the pre-call sanitizer stub, the tool-call args repair marker, both interrupt-skip paths, both result-append paths (parallel + sequential), the invalid-tool-name recovery, the invalid-JSON-args recovery, and the exception fallback. Each call site was already in scope of the function name (`function_name`, `skipped_name`, `name`, or a dict tool_call), so the change is local — no new lookups, no behavior change for providers that already worked. Fixes #16478 --- run_agent.py | 27 ++++++++++++++++ tests/run_agent/test_agent_guardrails.py | 31 +++++++++++++++++++ .../test_tool_call_args_sanitizer.py | 1 + 3 files changed, 59 insertions(+) diff --git a/run_agent.py b/run_agent.py index 17b8b01db1f..c8388bd0ae2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5064,6 +5064,23 @@ class AIAgent: return tc.get("call_id", "") or tc.get("id", "") or "" return getattr(tc, "call_id", "") or getattr(tc, "id", "") or "" + @staticmethod + def _get_tool_call_name_static(tc) -> str: + """Extract function name from a tool_call entry (dict or object). + + Gemini's OpenAI-compatibility endpoint requires every `role: tool` + message to carry the matching function name. OpenAI/Anthropic/ollama + tolerate its absence, so the field is best-effort: callers fall back + to "" and the message still works elsewhere. + """ + if isinstance(tc, dict): + fn = tc.get("function") + if isinstance(fn, dict): + return fn.get("name", "") or "" + return "" + fn = getattr(tc, "function", None) + return getattr(fn, "name", "") or "" + _VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"}) @staticmethod @@ -5126,6 +5143,7 @@ class AIAgent: if cid in missing_results: patched.append({ "role": "tool", + "name": AIAgent._get_tool_call_name_static(tc), "content": "[Result unavailable — see context summary above]", "tool_call_id": cid, }) @@ -9030,6 +9048,7 @@ class AIAgent: insert_at, { "role": "tool", + "name": function_name if function_name != "?" else "", "tool_call_id": tool_call_id, "content": marker, }, @@ -9434,6 +9453,7 @@ class AIAgent: for tc in tool_calls: messages.append({ "role": "tool", + "name": tc.function.name, "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]", "tool_call_id": tc.id, }) @@ -9775,6 +9795,7 @@ class AIAgent: tool_msg = { "role": "tool", + "name": name, "content": function_result, "tool_call_id": tc.id, } @@ -9812,6 +9833,7 @@ class AIAgent: skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", + "name": skipped_name, "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]", "tool_call_id": skipped_tc.id, } @@ -10162,6 +10184,7 @@ class AIAgent: tool_msg = { "role": "tool", + "name": function_name, "content": function_result, "tool_call_id": tool_call.id } @@ -10188,6 +10211,7 @@ class AIAgent: skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", + "name": skipped_name, "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]", "tool_call_id": skipped_tc.id } @@ -13110,6 +13134,7 @@ class AIAgent: content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call." messages.append({ "role": "tool", + "name": tc.function.name, "tool_call_id": tc.id, "content": content, }) @@ -13201,6 +13226,7 @@ class AIAgent: tool_result = "Skipped: other tool call in this response had invalid JSON." messages.append({ "role": "tool", + "name": tc.function.name, "tool_call_id": tc.id, "content": tool_result, }) @@ -13717,6 +13743,7 @@ class AIAgent: if tc["id"] not in answered_ids: err_msg = { "role": "tool", + "name": AIAgent._get_tool_call_name_static(tc), "tool_call_id": tc["id"], "content": f"Error executing tool: {error_msg}", } diff --git a/tests/run_agent/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py index 032057d59f1..b222b3320e2 100644 --- a/tests/run_agent/test_agent_guardrails.py +++ b/tests/run_agent/test_agent_guardrails.py @@ -263,3 +263,34 @@ class TestGetToolCallIdStatic: def test_object_without_id_attr(self): tc = types.SimpleNamespace() assert AIAgent._get_tool_call_id_static(tc) == "" + + +# --------------------------------------------------------------------------- +# _get_tool_call_name_static +# --------------------------------------------------------------------------- + +class TestGetToolCallNameStatic: + + def test_dict_with_valid_name(self): + assert AIAgent._get_tool_call_name_static( + {"id": "call_1", "function": {"name": "terminal", "arguments": "{}"}} + ) == "terminal" + + def test_dict_with_missing_function(self): + assert AIAgent._get_tool_call_name_static({"id": "call_1"}) == "" + + def test_dict_with_none_function(self): + assert AIAgent._get_tool_call_name_static({"id": "call_1", "function": None}) == "" + + def test_dict_with_none_name(self): + assert AIAgent._get_tool_call_name_static( + {"function": {"name": None, "arguments": "{}"}} + ) == "" + + def test_object_with_valid_name(self): + tc = make_tc("read_file") + assert AIAgent._get_tool_call_name_static(tc) == "read_file" + + def test_object_without_function_attr(self): + tc = types.SimpleNamespace(id="call_1") + assert AIAgent._get_tool_call_name_static(tc) == "" diff --git a/tests/run_agent/test_tool_call_args_sanitizer.py b/tests/run_agent/test_tool_call_args_sanitizer.py index 79f4d82c5a1..57ba9839fac 100644 --- a/tests/run_agent/test_tool_call_args_sanitizer.py +++ b/tests/run_agent/test_tool_call_args_sanitizer.py @@ -96,6 +96,7 @@ def test_marker_message_inserted_when_missing(): assert repaired == 1 assert messages[1] == { "role": "tool", + "name": "read_file", "tool_call_id": "call_1", "content": marker, } From ef8c213e880858dc18af1141d14e9f409b19b1d4 Mon Sep 17 00:00:00 2001 From: Sanhu Li <lisanhu2014@hotmail.com> Date: Sun, 26 Apr 2026 16:02:03 +0800 Subject: [PATCH 153/171] fix(model-switch): soft-accept unlisted openai-codex models --- hermes_cli/models.py | 7 ++- ..._openai_codex_model_validation_fallback.py | 55 +++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 tests/hermes_cli/test_openai_codex_model_validation_fallback.py diff --git a/hermes_cli/models.py b/hermes_cli/models.py index d7bae9ab09e..984685e6c3e 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3185,11 +3185,12 @@ def validate_requested_model( if suggestions: suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Model `{requested}` was not found in the OpenAI Codex model listing." + f"Note: `{requested}` was not found in the OpenAI Codex model listing. " + "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID." f"{suggestion_text}" ), } diff --git a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py new file mode 100644 index 00000000000..e33dbe2ba44 --- /dev/null +++ b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py @@ -0,0 +1,55 @@ +"""Regression tests for OpenAI Codex model validation when the listing lags behind +actually usable backend model IDs. + +The bug: `/model` and `switch_model()` reject `gpt-5.3-codex-spark` because the +OpenAI Codex listing omits it, even though direct runtime calls with +`--provider openai-codex -m gpt-5.3-codex-spark` succeed. +""" + +from unittest.mock import patch + +from hermes_cli.model_switch import switch_model +from hermes_cli.models import validate_requested_model + + +def test_openai_codex_unknown_but_plausible_model_is_accepted_with_warning(): + """If the Codex listing is incomplete, `/model` should soft-accept the model + with a warning instead of hard-rejecting it. + """ + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"], + ): + result = validate_requested_model("gpt-5.3-codex-spark", "openai-codex") + + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "gpt-5.3-codex-spark" in result["message"] + assert "OpenAI Codex model listing" in result["message"] + assert "Similar models" in result["message"] + assert "gpt-5.3-codex" in result["message"] + + +def test_switch_model_allows_openai_codex_model_missing_from_listing(): + """switch_model() should succeed for Codex models that the runtime accepts + even when the listing has not caught up yet. + """ + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"], + ): + result = switch_model( + "gpt-5.3-codex-spark", + current_provider="openai-codex", + current_model="gpt-5.4", + current_base_url="", + current_api_key="", + user_providers=None, + ) + + assert result.success is True + assert result.new_model == "gpt-5.3-codex-spark" + assert result.target_provider == "openai-codex" + assert result.warning_message + assert "OpenAI Codex model listing" in result.warning_message From b46b0c98885c78c171d8bd52aee5dd28e082acec Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Mon, 27 Apr 2026 06:18:33 -0700 Subject: [PATCH 154/171] fix(backup): floor pre-update backup_keep to 1 so the new backup survives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `updates.backup_keep: 0` (or any negative value) wiped the freshly- created pre-update zip: _prune_pre_update_backups(backup_dir, keep=0): backups = sorted(..., reverse=True) # newest first, includes # the zip we just wrote for p in backups[0:]: # = all of them p.unlink() The wrapper in `main.py` then printed `Saved: <path>` for a file that no longer existed (the size lookup is wrapped in `try/except OSError` which silently degrades to "0 B"), leaving operators believing they had a recovery point when they had none. This is a real footgun because some config systems treat 0 as "keep unlimited"; here it does the opposite — every backup is destroyed right after creation. Fix: clamp `keep` to a minimum of 1 inside `_prune_pre_update_backups` since that helper is only invoked immediately after a fresh backup is written. Operators who genuinely want no backups should set `updates.pre_update_backup: false` (which gates creation entirely) rather than relying on `backup_keep: 0`. Also extends the `backup_keep` config docstring to spell out the floor and point at `pre_update_backup: false` as the off-switch. ## Tests Three regression tests added in `TestPreUpdateBackup`: - `test_keep_zero_does_not_delete_freshly_created_backup` — asserts the file persists after `keep=0` - `test_keep_negative_does_not_delete_freshly_created_backup` — same for negative values - `test_keep_zero_still_prunes_older_backups` — proves the floor only protects the new backup; older ones are still rotated out Verified the new tests fail on origin/main (without the floor) and pass with it; full `tests/hermes_cli/test_backup.py` suite green (84 tests). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- hermes_cli/backup.py | 12 +++++++-- hermes_cli/config.py | 5 +++- tests/hermes_cli/test_backup.py | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 20ddb3c87d4..dce199a5ab4 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -793,9 +793,17 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int: Returns the number of files deleted. Only touches files matching ``pre-update-*.zip`` so hand-made zips dropped in the same directory are never touched. + + ``keep`` is floored to 1 because this helper is only called immediately + after a fresh backup is written: deleting that backup right after the + user paid the disk/CPU cost to create it would leave them worse off + than no backup at all (and the wrapper in ``main.py`` would still print + a misleading ``Saved: <path>`` line for a file that no longer exists). + Operators who genuinely don't want a backup should set + ``updates.pre_update_backup: false`` in config — that gates creation. """ - if keep < 0: - keep = 0 + if keep < 1: + keep = 1 if not backup_dir.exists(): return 0 diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 98317a9043f..0f34d985280 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1286,7 +1286,10 @@ DEFAULT_CONFIG = { # for a single update run. "pre_update_backup": False, # How many pre-update backup zips to retain. Older ones are pruned - # automatically after each successful backup. + # automatically after each successful backup. Values below 1 are + # floored to 1 — the backup just created is always preserved. To + # disable backups entirely, set ``pre_update_backup: false`` above + # rather than ``backup_keep: 0``. "backup_keep": 5, }, diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index 9a99a035faa..ab7ba21370a 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -1374,6 +1374,53 @@ class TestPreUpdateBackup: from hermes_cli.backup import create_pre_update_backup assert create_pre_update_backup(hermes_home=tmp_path / "does-not-exist") is None + def test_keep_zero_does_not_delete_freshly_created_backup(self, hermes_home): + """Regression: ``backup_keep: 0`` previously triggered ``backups[0:]`` + in the pruner — wiping the just-created zip and leaving the user + with no recovery point. The floor (keep>=1) preserves the new file + regardless of misconfiguration; users who don't want backups should + set ``pre_update_backup: false`` instead. + """ + from hermes_cli.backup import create_pre_update_backup + out = create_pre_update_backup(hermes_home=hermes_home, keep=0) + assert out is not None + assert out.exists(), ( + "keep=0 silently deleted the freshly-created backup; floor " + "should preserve the just-written file." + ) + + def test_keep_negative_does_not_delete_freshly_created_backup(self, hermes_home): + """Mirror coverage: any value <1 should be floored, not literally + applied as a slice index.""" + from hermes_cli.backup import create_pre_update_backup + out = create_pre_update_backup(hermes_home=hermes_home, keep=-3) + assert out is not None + assert out.exists() + + def test_keep_zero_still_prunes_older_backups(self, hermes_home): + """The floor preserves the new backup but should NOT regress the + rotation behaviour for older zips: a third call with keep=0 must + still remove pre-existing backups beyond the (floored) limit of 1. + """ + import time as _t + from hermes_cli.backup import create_pre_update_backup + + first = create_pre_update_backup(hermes_home=hermes_home, keep=5) + _t.sleep(1.05) + second = create_pre_update_backup(hermes_home=hermes_home, keep=5) + _t.sleep(1.05) + third = create_pre_update_backup(hermes_home=hermes_home, keep=0) + + remaining = { + p.name for p in (hermes_home / "backups").iterdir() + if p.name.startswith("pre-update-") + } + assert third.name in remaining, "Floor must preserve the new backup" + assert first.name not in remaining and second.name not in remaining, ( + f"keep=0 floor of 1 should still prune older backups; " + f"remaining={remaining}" + ) + class TestRunPreUpdateBackup: """Tests for the ``_run_pre_update_backup`` wrapper in main.py — From 6b4ccb9b148573f0c9a675b9ed24528824b0d87f Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sat, 25 Apr 2026 23:24:56 -0700 Subject: [PATCH 155/171] fix(session-search): report source from resolved parent, not FTS5 child session (#15909) When a delegation child session (e.g. source='telegram') contains the FTS5 hit but _resolve_to_parent() maps it to a different root session (source='api_server'), the result entry was still reporting the child's source because the loop discarded session_meta as `_` and fell back to match_info.get('source'), which carries the child session's value. Use the resolved parent's session_meta for source, model, and started_at with match_info as a fallback, so the output accurately reflects the session the user actually interacted with. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- tests/tools/test_session_search.py | 62 ++++++++++++++++++++++++++++++ tools/session_search_tool.py | 15 ++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index 304387e1fe5..468a492ad8e 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -498,3 +498,65 @@ class TestSessionSearch: assert result["count"] == 0 assert result["results"] == [] assert result["sessions_searched"] == 0 + + def test_source_from_resolved_parent_not_fts5_child(self): + """source in output must reflect the resolved parent session, not the child that matched FTS5. + + Regression test for #15909: when a delegation child session (source='telegram') + resolves to a parent (source='api_server'), the result entry must report + 'api_server', not 'telegram'. + """ + from unittest.mock import MagicMock, AsyncMock, patch as _patch + from tools.session_search_tool import session_search + + mock_db = MagicMock() + # FTS5 hit is in the child delegation session which carries source='telegram' + mock_db.search_messages.return_value = [ + { + "session_id": "child_sid", + "content": "hello world", + "source": "telegram", # child session source — wrong value to surface + "session_started": 1709400000, + "model": "gpt-4o-mini", + }, + ] + + def _get_session(session_id): + if session_id == "child_sid": + return { + "id": "child_sid", + "parent_session_id": "parent_sid", + "source": "telegram", + "started_at": 1709400000, + "model": "gpt-4o-mini", + } + if session_id == "parent_sid": + return { + "id": "parent_sid", + "parent_session_id": None, + "source": "api_server", # correct parent source + "started_at": 1709300000, + "model": "gpt-4o-mini", + } + return None + + mock_db.get_session.side_effect = _get_session + mock_db.get_messages_as_conversation.return_value = [ + {"role": "user", "content": "hello world"}, + {"role": "assistant", "content": "hi there"}, + ] + + with _patch( + "tools.session_search_tool.async_call_llm", + new_callable=AsyncMock, + side_effect=RuntimeError("no provider"), + ): + result = json.loads(session_search(query="hello world", db=mock_db)) + + assert result["success"] is True + assert result["count"] == 1 + entry = result["results"][0] + assert entry["session_id"] == "parent_sid", "should report resolved parent session ID" + assert entry["source"] == "api_server", ( + f"source should be parent's 'api_server', got {entry['source']!r}" + ) diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index c043ede6a78..efc450b322e 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -486,7 +486,7 @@ def session_search( }, ensure_ascii=False) summaries = [] - for (session_id, match_info, conversation_text, _), result in zip(tasks, results): + for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results): if isinstance(result, Exception): logging.warning( "Failed to summarize session %s: %s", @@ -494,11 +494,18 @@ def session_search( ) result = None + # Prefer resolved parent session metadata over FTS5 match metadata. + # match_info carries source/model from the *child* session that contained + # the FTS5 hit; after _resolve_to_parent() the session_id points to the + # root, so session_meta has the authoritative platform/source for the + # session the user actually cares about (#15909). entry = { "session_id": session_id, - "when": _format_timestamp(match_info.get("session_started")), - "source": match_info.get("source", "unknown"), - "model": match_info.get("model"), + "when": _format_timestamp( + session_meta.get("started_at") or match_info.get("session_started") + ), + "source": session_meta.get("source") or match_info.get("source", "unknown"), + "model": session_meta.get("model") or match_info.get("model"), } if result: From 0cc63043e085dc6c12bc80007b6e6e3fafb7b3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E6=B3=A5=E8=B1=86?= <1243352777@qq.com> Date: Sun, 26 Apr 2026 14:37:12 +0800 Subject: [PATCH 156/171] fix(delegation): increase heartbeat stale thresholds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The heartbeat stale detection was too aggressive: - idle: 5 * 30s = 150s — LLM inference on slow providers (Zhipu/GLM) frequently exceeds 150s, causing heartbeat to stop prematurely - in-tool: 20 * 30s = 600s — borderline for long tool calls When heartbeat stops, parent._last_activity_ts freezes, eventually triggering gateway timeout and killing the entire delegation. New thresholds: - idle: 15 * 30s = 450s — accommodates slow LLM inference - in-tool: 40 * 30s = 1200s — accommodates long-running tool calls child_timeout_seconds (config: delegation.child_timeout_seconds) remains the hard cap for total delegation duration. --- tools/delegate_tool.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 56556316625..c288e7b28ab 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -483,8 +483,8 @@ _HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during de # The idle ceiling stays tight so genuinely stuck children don't mask the gateway # timeout. The in-tool ceiling is much higher so legit long-running tools get # time to finish; child_timeout_seconds (default 600s) is still the hard cap. -_HEARTBEAT_STALE_CYCLES_IDLE = 5 # 5 * 30s = 150s idle between turns → stale -_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20 # 20 * 30s = 600s stuck on same tool → stale +_HEARTBEAT_STALE_CYCLES_IDLE = 15 # 15 * 30s = 450s idle between turns → stale +_HEARTBEAT_STALE_CYCLES_IN_TOOL = 40 # 40 * 30s = 1200s stuck on same tool → stale DEFAULT_TOOLSETS = ["terminal", "file", "web"] From 0e9416036aa4fe3a600a48aefe4212fa77191190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E6=B3=A5=E8=B1=86?= <1243352777@qq.com> Date: Sun, 26 Apr 2026 14:55:06 +0800 Subject: [PATCH 157/171] test: add unit tests for heartbeat stale threshold increase --- .../tools/test_heartbeat_stale_thresholds.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/tools/test_heartbeat_stale_thresholds.py diff --git a/tests/tools/test_heartbeat_stale_thresholds.py b/tests/tools/test_heartbeat_stale_thresholds.py new file mode 100644 index 00000000000..fb7db68efb9 --- /dev/null +++ b/tests/tools/test_heartbeat_stale_thresholds.py @@ -0,0 +1,35 @@ +"""Tests for delegate heartbeat stale threshold configuration.""" + +import pytest + + +class TestHeartbeatStaleThresholds: + """Verify the heartbeat stale threshold constants are correct.""" + + def test_idle_cycles_value(self): + """IDLE stale cycles should be 15 (15 * 30s = 450s).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IDLE + assert _HEARTBEAT_STALE_CYCLES_IDLE == 15 + + def test_in_tool_cycles_value(self): + """IN_TOOL stale cycles should be 40 (40 * 30s = 1200s).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IN_TOOL + assert _HEARTBEAT_STALE_CYCLES_IN_TOOL == 40 + + def test_idle_timeout_seconds(self): + """Effective idle stale timeout: 15 * 30 = 450s (> typical LLM response time).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IDLE, _HEARTBEAT_INTERVAL + effective = _HEARTBEAT_STALE_CYCLES_IDLE * _HEARTBEAT_INTERVAL + assert effective == 450 + assert effective > 300 # Must be > 5 minutes for slow LLM responses + + def test_in_tool_timeout_seconds(self): + """Effective in-tool stale timeout: 40 * 30 = 1200s (= 20 minutes).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IN_TOOL, _HEARTBEAT_INTERVAL + effective = _HEARTBEAT_STALE_CYCLES_IN_TOOL * _HEARTBEAT_INTERVAL + assert effective == 1200 + + def test_interval_unchanged(self): + """Heartbeat interval should remain 30s.""" + from tools.delegate_tool import _HEARTBEAT_INTERVAL + assert _HEARTBEAT_INTERVAL == 30 From 5d6431c11454bf9d5ef4973505dc7a35cb153c58 Mon Sep 17 00:00:00 2001 From: VinVC <vincent_hh@users.noreply.github.com> Date: Sun, 3 May 2026 10:49:34 +0800 Subject: [PATCH 158/171] fix(doctor): resolve merge conflicts, add kimi-coding-cn test - Rebased on upstream/main to resolve conflicts - Added test_run_doctor_accepts_kimi_coding_cn_provider test - All 30 tests pass --- tests/hermes_cli/test_doctor.py | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py index 4a5981c07a7..de80e240d1c 100644 --- a/tests/hermes_cli/test_doctor.py +++ b/tests/hermes_cli/test_doctor.py @@ -481,6 +481,46 @@ def test_run_doctor_accepts_hermes_provider_ids_that_catalog_aliases( ) + + +def test_run_doctor_accepts_kimi_coding_cn_provider(monkeypatch, tmp_path): + home = tmp_path / ".hermes" + home.mkdir(parents=True, exist_ok=True) + (home / ".env").write_text("KIMI_CN_API_KEY=***\n", encoding="utf-8") + (home / "config.yaml").write_text( + "model:\n" + " provider: kimi-coding-cn\n" + " default: kimi-k2.6\n", + encoding="utf-8", + ) + + monkeypatch.setattr(doctor_mod, "HERMES_HOME", home) + monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project") + monkeypatch.setattr(doctor_mod, "_DHH", str(home)) + (tmp_path / "project").mkdir(exist_ok=True) + + fake_model_tools = types.SimpleNamespace( + check_tool_availability=lambda *a, **kw: ([], []), + TOOLSET_REQUIREMENTS={}, + ) + monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools) + + try: + from hermes_cli import auth as _auth_mod + monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_auth_status", lambda provider: {"logged_in": True}) + except Exception: + pass + + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + doctor_mod.run_doctor(Namespace(fix=False)) + + out = buf.getvalue() + assert "model.provider 'kimi-coding-cn' is not a recognised provider" not in out + + def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path): home = tmp_path / ".hermes" home.mkdir(parents=True, exist_ok=True) From 42d72b59223dca923f8bbc4c723c54837b282fc8 Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Mon, 4 May 2026 05:13:46 -0700 Subject: [PATCH 159/171] fix(status): add missing popular provider API keys to hermes status display Closes #16082. `hermes status` silently omitted four widely-used LLM providers (Google/Gemini, DeepSeek, xAI/Grok, NVIDIA NIM) from the API Keys and API-Key Providers sections. Add them, along with tuple-valued env var support (first found wins) so Google can accept either GOOGLE_API_KEY or GEMINI_API_KEY. Also deduplicates the "NVIDIA" and "NVIDIA NIM" rows that were both pointing at NVIDIA_API_KEY. Salvage of #16159 (core behavior preserved + NVIDIA dedup fixup on top of the tuple-support refactor). Co-authored-by: briandevans <252620095+briandevans@users.noreply.github.com> --- hermes_cli/status.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 3a4219fd4b5..9a40c8d9b78 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -122,11 +122,16 @@ def show_status(args): print() print(color("◆ API Keys", Colors.CYAN, Colors.BOLD)) - keys = { + # Values may be a single env var name (str) or a tuple of alternates (first found wins). + keys: dict[str, str | tuple[str, ...]] = { "OpenRouter": "OPENROUTER_API_KEY", "OpenAI": "OPENAI_API_KEY", - "NVIDIA": "NVIDIA_API_KEY", - "Z.AI/GLM": "GLM_API_KEY", + "Anthropic": ("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN"), + "Google / Gemini": ("GOOGLE_API_KEY", "GEMINI_API_KEY"), + "DeepSeek": "DEEPSEEK_API_KEY", + "xAI / Grok": "XAI_API_KEY", + "NVIDIA NIM": "NVIDIA_API_KEY", + "Z.AI / GLM": "GLM_API_KEY", "Kimi": "KIMI_API_KEY", "StepFun Step Plan": "STEPFUN_API_KEY", "MiniMax": "MINIMAX_API_KEY", @@ -140,11 +145,25 @@ def show_status(args): "WandB": "WANDB_API_KEY", "ElevenLabs": "ELEVENLABS_API_KEY", "GitHub": "GITHUB_TOKEN", - "NVIDIA NIM": "NVIDIA_API_KEY", } - for name, env_var in keys.items(): - value = get_env_value(env_var) or "" + def _resolve_env(env_ref) -> str: + """Return first non-empty env var value from a str or tuple of names.""" + if isinstance(env_ref, tuple): + for candidate in env_ref: + v = get_env_value(candidate) or "" + if v: + return v + return "" + return get_env_value(env_ref) or "" + + for name, env_ref in keys.items(): + # Anthropic already has a dedicated lookup below; keep that as the + # single source of truth (it also resolves OAuth tokens), skip here + # so we don't print two "Anthropic" rows. + if name == "Anthropic": + continue + value = _resolve_env(env_ref) has_key = bool(value) display = redact_key(value) if not show_all else value print(f" {name:<12} {check_mark(has_key)} {display}") From 0b5fd40a01f6d48549a2d9130e0cb1443be1900c Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sat, 25 Apr 2026 19:22:10 -0700 Subject: [PATCH 160/171] =?UTF-8?q?fix(delegate):=20correct=20=5Fspawn=5Fc?= =?UTF-8?q?hild=20=E2=86=92=20=5Fbuild=5Fchild=5Fagent=20in=20comments?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- tests/tools/test_delegate.py | 15 +++++++++------ tools/delegate_tool.py | 29 ++++++++++++++++++----------- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 089c46da09b..dfe35ea19c8 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -821,7 +821,9 @@ class TestDelegationCredentialResolution(unittest.TestCase): self.assertEqual(creds["api_key"], "local-key") self.assertEqual(creds["api_mode"], "chat_completions") - def test_direct_endpoint_falls_back_to_openai_api_key_env(self): + def test_direct_endpoint_returns_none_api_key_when_not_configured(self): + # When base_url is set without api_key, api_key should be None so + # _build_child_agent inherits the parent's key (effective_api_key = override or parent). parent = _make_mock_parent(depth=0) cfg = { "model": "qwen2.5-coder", @@ -829,10 +831,11 @@ class TestDelegationCredentialResolution(unittest.TestCase): } with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False): creds = _resolve_delegation_credentials(cfg, parent) - self.assertEqual(creds["api_key"], "env-openai-key") + self.assertIsNone(creds["api_key"]) self.assertEqual(creds["provider"], "custom") - def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self): + def test_direct_endpoint_no_raise_when_only_provider_env_key_present(self): + # Even if OPENAI_API_KEY is absent, no ValueError — _build_child_agent uses parent key. parent = _make_mock_parent(depth=0) cfg = { "model": "qwen2.5-coder", @@ -846,9 +849,9 @@ class TestDelegationCredentialResolution(unittest.TestCase): }, clear=False, ): - with self.assertRaises(ValueError) as ctx: - _resolve_delegation_credentials(cfg, parent) - self.assertIn("OPENAI_API_KEY", str(ctx.exception)) + creds = _resolve_delegation_credentials(cfg, parent) + self.assertIsNone(creds["api_key"]) + self.assertEqual(creds["provider"], "custom") @patch("hermes_cli.runtime_provider.resolve_runtime_provider") def test_nous_provider_resolves_nous_credentials(self, mock_resolve): diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index c288e7b28ab..5968697e943 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -2237,11 +2237,17 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: """Resolve credentials for subagent delegation. If ``delegation.base_url`` is configured, subagents use that direct - OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is - configured, the full credential bundle (base_url, api_key, api_mode, - provider) is resolved via the runtime provider system — the same path used - by CLI/gateway startup. This lets subagents run on a completely different - provider:model pair. + OpenAI-compatible endpoint. ``delegation.api_key`` overrides the key; when + omitted, ``api_key`` is returned as ``None`` so ``_build_child_agent`` + inherits the parent agent's key (``effective_api_key = override_api_key or + parent_api_key``). This lets providers that store their key outside + ``OPENAI_API_KEY`` (e.g. ``MINIMAX_API_KEY``, ``DASHSCOPE_API_KEY``) work + without a duplicate config entry. + + Otherwise, if ``delegation.provider`` is configured, the full credential + bundle (base_url, api_key, api_mode, provider) is resolved via the runtime + provider system — the same path used by CLI/gateway startup. This lets + subagents run on a completely different provider:model pair. If neither base_url nor provider is configured, returns None values so the child inherits everything from the parent agent. @@ -2254,12 +2260,13 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: configured_api_key = str(cfg.get("api_key") or "").strip() or None if configured_base_url: - api_key = configured_api_key or os.getenv("OPENAI_API_KEY", "").strip() - if not api_key: - raise ValueError( - "Delegation base_url is configured but no API key was found. " - "Set delegation.api_key or OPENAI_API_KEY." - ) + # When delegation.api_key is not set, return None so _build_child_agent + # falls back to the parent agent's API key via the credential inheritance + # path (effective_api_key = override_api_key or parent_api_key). This + # lets providers that store their key in a non-OPENAI_API_KEY env var + # (e.g. MINIMAX_API_KEY, DASHSCOPE_API_KEY) work without requiring + # callers to duplicate the key under delegation.api_key. + api_key = configured_api_key # None → inherited from parent in _build_child_agent base_lower = configured_base_url.lower() provider = "custom" From 7a8ee8b29d86dcb7019677504f5c63587dc70b3b Mon Sep 17 00:00:00 2001 From: Pratik Rai <raipratik0101@gmail.com> Date: Mon, 27 Apr 2026 00:39:11 +0530 Subject: [PATCH 161/171] fix(gateway): deduplicate Weixin messages by content fingerprint --- gateway/platforms/weixin.py | 11 ++++++++-- tests/gateway/test_weixin.py | 42 +++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 3fd7174270c..482692ee7a1 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -1333,6 +1333,15 @@ class WeixinAdapter(BasePlatformAdapter): if message_id and self._dedup.is_duplicate(message_id): return + # Secondary content-fingerprint dedup for text messages + item_list = message.get("item_list") or [] + text = _extract_text(item_list) + if text: + content_key = f"content:{sender_id}:{hashlib.md5(text.encode()).hexdigest()}" + if self._dedup.is_duplicate(content_key): + logger.debug("[%s] Content-dedup: skipping duplicate message from %s", self.name, sender_id) + return + chat_type, effective_chat_id = _guess_chat_type(message, self._account_id) if chat_type == "group": if self._group_policy == "disabled": @@ -1347,8 +1356,6 @@ class WeixinAdapter(BasePlatformAdapter): self._token_store.set(self._account_id, sender_id, context_token) asyncio.create_task(self._maybe_fetch_typing_ticket(sender_id, context_token or None)) - item_list = message.get("item_list") or [] - text = _extract_text(item_list) media_paths: List[str] = [] media_types: List[str] = [] diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py index 506936f7110..8deccf18cb7 100644 --- a/tests/gateway/test_weixin.py +++ b/tests/gateway/test_weixin.py @@ -5,7 +5,7 @@ import base64 import json import os from pathlib import Path -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, Mock, patch from gateway.config import PlatformConfig from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides @@ -788,3 +788,43 @@ class TestIsStaleSessionRet: def test_success_codes_are_not_stale(self): assert weixin._is_stale_session_ret(0, 0, "") is False assert weixin._is_stale_session_ret(None, None, "unknown error") is False + + +class TestWeixinContentDedup: + """Regression tests for Issue #16182 — upstream API sends duplicate content + with different message_ids, bypassing message_id deduplication. + """ + + def test_duplicate_content_with_different_message_ids_is_dropped(self): + adapter = _make_adapter() + adapter._poll_session = object() + adapter.handle_message = AsyncMock() + + base_msg = { + "from_user_id": "wxid_user1", + "item_list": [{"type": 1, "text_item": {"text": "hello world"}}], + } + + asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-1"})) + asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-2"})) + + assert adapter.handle_message.await_count == 1 + event = adapter.handle_message.await_args[0][0] + assert event.text == "hello world" + + def test_content_dedup_not_called_for_messages_without_text(self): + adapter = _make_adapter() + adapter._poll_session = object() + adapter.handle_message = AsyncMock() + adapter._dedup.is_duplicate = Mock(return_value=False) + + empty_msg = { + "from_user_id": "wxid_user1", + "message_id": "msg-1", + "item_list": [], + } + asyncio.run(adapter._process_message(empty_msg)) + + assert adapter.handle_message.await_count == 0 + # is_duplicate should only be called for message_id, never for content + assert all("content:" not in str(call) for call in adapter._dedup.is_duplicate.call_args_list) From 83080772f28793ac388d9218c394019ff8554ad0 Mon Sep 17 00:00:00 2001 From: 0668001438 <0668001438@zte.com.cn> Date: Mon, 27 Apr 2026 11:06:41 +0800 Subject: [PATCH 162/171] fix(delegation): honor provider override for subagents Clear inherited provider preference filters when delegation.provider is set so delegated children do not route back to the parent provider. Add a regression test for cross-provider delegation with parent OpenRouter filters. Closes #10653 --- tests/tools/test_delegate.py | 42 ++++++++++++++++++++++++++++++++++++ tools/delegate_tool.py | 25 +++++++++++++++++---- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index dfe35ea19c8..c45de2a581f 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -980,6 +980,48 @@ class TestDelegationProviderIntegration(unittest.TestCase): self.assertNotEqual(kwargs["base_url"], parent.base_url) self.assertNotEqual(kwargs["api_key"], parent.api_key) + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_provider_override_clears_parent_openrouter_filters( + self, mock_creds, mock_cfg + ): + """Delegated provider should not inherit parent provider-preference filters.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + } + mock_creds.return_value = { + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + parent.providers_allowed = ["anthropic/claude-3.5-sonnet"] + parent.providers_ignored = ["openai/gpt-4o-mini"] + parent.providers_order = ["google/gemini-2.5-pro"] + parent.provider_sort = "price" + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", + "completed": True, + "api_calls": 1, + } + MockAgent.return_value = mock_child + + delegate_task(goal="Cross-provider test", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["provider"], "openrouter") + self.assertIsNone(kwargs["providers_allowed"]) + self.assertIsNone(kwargs["providers_ignored"]) + self.assertIsNone(kwargs["providers_order"]) + self.assertIsNone(kwargs["provider_sort"]) + @patch("tools.delegate_tool._load_config") @patch("tools.delegate_tool._resolve_delegation_credentials") def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg): diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 5968697e943..5c7c431b253 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1032,6 +1032,23 @@ def _build_child_agent( # fallback_model parameter (which handles both list and dict forms). parent_fallback = getattr(parent_agent, "_fallback_chain", None) or None + # Inherit the parent's OpenRouter provider-preference filters by default + # (so subagents routed to the same provider honour the same routing + # constraints). BUT: when `delegation.provider` is set the user is + # explicitly asking the child to run on a different provider, and + # parent-level OpenRouter filters (e.g. `only=["Anthropic"]`) would + # silently force the child back onto the parent's provider. Clear the + # filters in that case so the delegated provider is honoured. + child_providers_allowed = getattr(parent_agent, "providers_allowed", None) + child_providers_ignored = getattr(parent_agent, "providers_ignored", None) + child_providers_order = getattr(parent_agent, "providers_order", None) + child_provider_sort = getattr(parent_agent, "provider_sort", None) + if override_provider: + child_providers_allowed = None + child_providers_ignored = None + child_providers_order = None + child_provider_sort = None + child = AIAgent( base_url=effective_base_url, api_key=effective_api_key, @@ -1056,10 +1073,10 @@ def _build_child_agent( thinking_callback=child_thinking_cb, session_db=getattr(parent_agent, "_session_db", None), parent_session_id=getattr(parent_agent, "session_id", None), - providers_allowed=parent_agent.providers_allowed, - providers_ignored=parent_agent.providers_ignored, - providers_order=parent_agent.providers_order, - provider_sort=parent_agent.provider_sort, + providers_allowed=child_providers_allowed, + providers_ignored=child_providers_ignored, + providers_order=child_providers_order, + provider_sort=child_provider_sort, tool_progress_callback=child_progress_cb, iteration_budget=None, # fresh budget per subagent ) From ce22301dc650219140e4d7d267a8d07c015f53d5 Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:34:10 -0700 Subject: [PATCH 163/171] test(sms): use clear=True in test_missing_phone_number_is_non_retryable Prevents pre-existing TWILIO_PHONE_NUMBER or SMS_WEBHOOK_URL values in the outer test environment from leaking into the assertion context. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --- gateway/platforms/sms.py | 14 +++++++----- tests/gateway/test_sms.py | 46 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/gateway/platforms/sms.py b/gateway/platforms/sms.py index 161949dab3d..2cf7db69b74 100644 --- a/gateway/platforms/sms.py +++ b/gateway/platforms/sms.py @@ -10,7 +10,7 @@ Shares credentials with the optional telephony skill — same env vars: Gateway-specific env vars: - SMS_WEBHOOK_PORT (default 8080) - - SMS_WEBHOOK_HOST (default 0.0.0.0) + - SMS_WEBHOOK_HOST (default 127.0.0.1) - SMS_WEBHOOK_URL (public URL for Twilio signature validation — required) - SMS_INSECURE_NO_SIGNATURE (true to disable signature validation — dev only) - SMS_ALLOWED_USERS (comma-separated E.164 phone numbers) @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) TWILIO_API_BASE = "https://api.twilio.com/2010-04-01/Accounts" MAX_SMS_LENGTH = 1600 # ~10 SMS segments DEFAULT_WEBHOOK_PORT = 8080 -DEFAULT_WEBHOOK_HOST = "0.0.0.0" +DEFAULT_WEBHOOK_HOST = "127.0.0.1" def check_sms_requirements() -> bool: @@ -91,19 +91,23 @@ class SmsAdapter(BasePlatformAdapter): from aiohttp import web if not self._from_number: - logger.error("[sms] TWILIO_PHONE_NUMBER not set — cannot send replies") + msg = "[sms] TWILIO_PHONE_NUMBER not set — cannot send replies" + logger.error(msg) + self._set_fatal_error("sms_missing_phone_number", msg, retryable=False) return False insecure_no_sig = os.getenv("SMS_INSECURE_NO_SIGNATURE", "").lower() == "true" if not self._webhook_url and not insecure_no_sig: - logger.error( + msg = ( "[sms] Refusing to start: SMS_WEBHOOK_URL is required for Twilio " "signature validation. Set it to the public URL configured in your " "Twilio console (e.g. https://example.com/webhooks/twilio). " "For local development without validation, set " - "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production).", + "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production)." ) + logger.error(msg) + self._set_fatal_error("sms_missing_webhook_url", msg, retryable=False) return False if insecure_no_sig and not self._webhook_url: diff --git a/tests/gateway/test_sms.py b/tests/gateway/test_sms.py index 524d540f810..e3ec86d90af 100644 --- a/tests/gateway/test_sms.py +++ b/tests/gateway/test_sms.py @@ -169,9 +169,9 @@ class TestSmsRequirements: class TestWebhookHostConfig: """Verify SMS_WEBHOOK_HOST env var and default.""" - def test_default_host_is_all_interfaces(self): + def test_default_host_is_localhost(self): from gateway.platforms.sms import DEFAULT_WEBHOOK_HOST - assert DEFAULT_WEBHOOK_HOST == "0.0.0.0" + assert DEFAULT_WEBHOOK_HOST == "127.0.0.1" def test_host_from_env(self): from gateway.platforms.sms import SmsAdapter @@ -242,6 +242,48 @@ class TestStartupGuard: result = await adapter.connect() assert result is False + @pytest.mark.asyncio + async def test_missing_webhook_url_is_non_retryable(self): + adapter = self._make_adapter() + await adapter.connect() + assert adapter.has_fatal_error is True + assert adapter.fatal_error_retryable is False + assert "sms_missing_webhook_url" == adapter.fatal_error_code + + @pytest.mark.asyncio + async def test_missing_phone_number_is_non_retryable(self): + from gateway.platforms.sms import SmsAdapter + + env = { + "TWILIO_ACCOUNT_SID": "ACtest", + "TWILIO_AUTH_TOKEN": "tok", + "TWILIO_PHONE_NUMBER": "", + "SMS_WEBHOOK_URL": "", + } + with patch.dict(os.environ, env, clear=True): + pc = PlatformConfig(enabled=True, api_key="tok") + adapter = SmsAdapter(pc) + await adapter.connect() + assert adapter.has_fatal_error is True + assert adapter.fatal_error_retryable is False + assert adapter.fatal_error_code == "sms_missing_phone_number" + + @pytest.mark.asyncio + async def test_insecure_flag_does_not_set_fatal_error(self): + mock_session = AsyncMock() + with patch.dict(os.environ, {"SMS_INSECURE_NO_SIGNATURE": "true"}), \ + patch("aiohttp.web.AppRunner") as mock_runner_cls, \ + patch("aiohttp.web.TCPSite") as mock_site_cls, \ + patch("aiohttp.ClientSession", return_value=mock_session): + mock_runner_cls.return_value.setup = AsyncMock() + mock_runner_cls.return_value.cleanup = AsyncMock() + mock_site_cls.return_value.start = AsyncMock() + adapter = self._make_adapter() + result = await adapter.connect() + assert result is True + assert adapter.has_fatal_error is False + await adapter.disconnect() + @pytest.mark.asyncio async def test_insecure_flag_allows_start_without_url(self): mock_session = AsyncMock() From 74c1b946e00c89b3b7ff315033d579ccb653de2d Mon Sep 17 00:00:00 2001 From: ygd58 <buraysandro9@gmail.com> Date: Mon, 4 May 2026 05:26:57 -0700 Subject: [PATCH 164/171] fix(browser): inject --no-sandbox for root and AppArmor userns restrictions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VPS/Docker and some Ubuntu 23.10+ hosts, Chromium refuses to start without --no-sandbox: - uid=0 (root): hard requirement (VPS/Docker deployments) - AppArmor apparmor_restrict_unprivileged_userns=1 (Ubuntu 23.10+): non-root too, under systemd or unprivileged containers Detect both conditions and inject AGENT_BROWSER_CHROME_FLAGS with --no-sandbox --disable-dev-shm-usage when the user hasn't already set the flags themselves. Salvage of #15771 — only the browser_tool.py fix is cherry-picked. The PR's accompanying MCP preset addition (new feature surface) was dropped so the bug fix can land independently. Co-authored-by: ygd58 <buraysandro9@gmail.com> --- tools/browser_tool.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 768cec7f714..f394e5b2f67 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1482,6 +1482,34 @@ def _run_browser_command( if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env: idle_ms = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000) browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = idle_ms + + # Inject --no-sandbox when needed (issue #15765): + # - Running as root: Chromium always refuses to start without it + # - Ubuntu 23.10+ / AppArmor systems: unprivileged user namespaces + # are restricted, causing Chromium to exit with "No usable sandbox" + # even for non-root users running under systemd or containers. + if "AGENT_BROWSER_CHROME_FLAGS" not in browser_env: + _needs_sandbox_bypass = False + if hasattr(os, "geteuid") and os.geteuid() == 0: + _needs_sandbox_bypass = True + logger.debug("browser: running as root — injecting --no-sandbox") + else: + # Detect AppArmor user namespace restrictions (Ubuntu 23.10+) + _userns_restrict = "/proc/sys/kernel/apparmor_restrict_unprivileged_userns" + try: + with open(_userns_restrict) as _f: + if _f.read().strip() == "1": + _needs_sandbox_bypass = True + logger.debug( + "browser: AppArmor userns restrictions detected — " + "injecting --no-sandbox" + ) + except OSError: + pass + if _needs_sandbox_bypass: + browser_env["AGENT_BROWSER_CHROME_FLAGS"] = ( + "--no-sandbox --disable-dev-shm-usage" + ) # Use temp files for stdout/stderr instead of pipes. # agent-browser starts a background daemon that inherits file From eeb05cf556433c529935f71aa5ed6b234d1507c8 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Mon, 4 May 2026 18:22:41 +0530 Subject: [PATCH 165/171] docs: default custom tool creation to plugins Steers custom tool creation toward the plugin route by default. The adding-tools.md guide is now explicitly for built-in core Hermes tools only. Key fixes: - Plugin quickstart: ctx.register_tool() now uses correct keyword-arg API (name=, toolset=, schema=, handler=) instead of broken 3-arg call - Handler signature: (params, **kwargs) instead of (params) - Handler return: json.dumps({...}) instead of plain string - AGENTS.md: mentions plugin route before built-in tool instructions - learning-path.md: plugins listed before core tool development - contributing.md: separates plugin vs core tool paths Based on PR #13138 by @helix4u. --- AGENTS.md | 11 +++++++++- website/docs/developer-guide/adding-tools.md | 17 ++++++++++++-- website/docs/developer-guide/contributing.md | 3 ++- website/docs/getting-started/learning-path.md | 17 ++++++++------ website/docs/user-guide/features/plugins.md | 22 +++++++++++++++---- 5 files changed, 55 insertions(+), 15 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index df14c68df2a..f09258061fd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -257,7 +257,16 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes ## Adding New Tools -Requires changes in **2 files**: +For most custom or local-only tools, do **not** edit Hermes core. Use the plugin +route instead: create `~/.hermes/plugins/<name>/plugin.yaml` and +`~/.hermes/plugins/<name>/__init__.py`, then register tools with +`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be +enabled or disabled without touching `tools/` or `toolsets.py`. + +Use the built-in route below only when the user is explicitly contributing a new +core Hermes tool that should ship in the base system. + +Built-in/core tools require changes in **2 files**: **1. Create `tools/your_tool.py`:** ```python diff --git a/website/docs/developer-guide/adding-tools.md b/website/docs/developer-guide/adding-tools.md index f1ab79f31ef..6bd4c7cca4a 100644 --- a/website/docs/developer-guide/adding-tools.md +++ b/website/docs/developer-guide/adding-tools.md @@ -8,6 +8,18 @@ description: "How to add a new tool to Hermes Agent — schemas, handlers, regis Before writing a tool, ask yourself: **should this be a [skill](creating-skills.md) instead?** +:::warning Built-in Core Tools Only +This page is for adding a **built-in Hermes tool** to the repository itself. +If you want a personal, project-local, or otherwise custom tool without +modifying Hermes core, use the plugin route instead: + +- [Plugins](/docs/user-guide/features/plugins) +- [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) + +Default to plugins for most custom tool creation. Only follow this page when +you explicitly want to ship a new built-in tool in `tools/` and `toolsets.py`. +::: + Make it a **Skill** when the capability can be expressed as instructions + shell commands + existing tools (arXiv search, git workflows, Docker management, PDF processing). Make it a **Tool** when it requires end-to-end integration with API keys, custom processing logic, binary data handling, or streaming (browser automation, TTS, vision analysis). @@ -21,7 +33,7 @@ Adding a tool touches **2 files**: Any `tools/*.py` file with a top-level `registry.register()` call is auto-discovered at startup — no manual import list required. -## Step 1: Create the Tool File +## Step 1: Create the Built-in Tool File Every tool file follows the same structure: @@ -106,7 +118,7 @@ registry.register( - The `handler` receives `(args: dict, **kwargs)` where `args` is the LLM's tool call arguments ::: -## Step 2: Add to a Toolset +## Step 2: Add the Built-in Tool to a Toolset In `toolsets.py`, add the tool name: @@ -192,6 +204,7 @@ OPTIONAL_ENV_VARS = { - [ ] Tool file created with handler, schema, check function, and registration - [ ] Added to appropriate toolset in `toolsets.py` +- [ ] Confirmed this really should be a built-in/core tool and not a plugin - [ ] Handler returns JSON strings, errors returned as `{"error": "..."}` - [ ] Optional: API key added to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` - [ ] Optional: Added to `toolset_distributions.py` for batch processing diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index f75fd85ebb2..8cfa618ad6a 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -22,7 +22,8 @@ We value contributions in this order: ## Common contribution paths -- Building a new tool? Start with [Adding Tools](./adding-tools.md) +- Building a custom/local tool without modifying Hermes core? Start with [Build a Hermes Plugin](../guides/build-a-hermes-plugin.md) +- Building a new built-in core tool for Hermes itself? Start with [Adding Tools](./adding-tools.md) - Building a new skill? Start with [Creating Skills](./creating-skills.md) - Building a new inference provider? Start with [Adding Providers](./adding-providers.md) diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md index 41170ccccdb..79953751a1e 100644 --- a/website/docs/getting-started/learning-path.md +++ b/website/docs/getting-started/learning-path.md @@ -80,15 +80,18 @@ Cron jobs let Hermes Agent run tasks on a schedule — daily summaries, periodic Extend Hermes Agent with your own tools and reusable skill packages. -1. [Tools Overview](/docs/user-guide/features/tools) -2. [Skills Overview](/docs/user-guide/features/skills) -3. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) -4. [Architecture](/docs/developer-guide/architecture) -5. [Adding Tools](/docs/developer-guide/adding-tools) -6. [Creating Skills](/docs/developer-guide/creating-skills) +1. [Plugins](/docs/user-guide/features/plugins) +2. [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) +3. [Tools Overview](/docs/user-guide/features/tools) +4. [Skills Overview](/docs/user-guide/features/skills) +5. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) +6. [Architecture](/docs/developer-guide/architecture) +7. [Adding Tools](/docs/developer-guide/adding-tools) +8. [Creating Skills](/docs/developer-guide/creating-skills) :::tip -Tools are individual functions the agent can call. Skills are bundles of tools, prompts, and configuration packaged together. Start with tools, graduate to skills. +For most custom tool creation, start with plugins. The [Adding Tools](/docs/developer-guide/adding-tools) +page is for built-in Hermes core development, not the usual user/custom-tool path. ::: ### "I want to train models" diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index 0e99fd12d2e..ee198882258 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -9,6 +9,11 @@ description: "Extend Hermes with custom tools, hooks, and integrations via the p Hermes has a plugin system for adding custom tools, hooks, and integrations without modifying core code. +If you want to create a custom tool for yourself, your team, or one project, +this is usually the right path. The developer guide's +[Adding Tools](/docs/developer-guide/adding-tools) page is for built-in Hermes +core tools that live in `tools/` and `toolsets.py`. + **→ [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin)** — step-by-step guide with a complete working example. ## Quick overview @@ -42,6 +47,8 @@ description: A minimal example plugin ```python """Minimal Hermes plugin — registers a tool and a hook.""" +import json + def register(ctx): # --- Tool: hello_world --- @@ -60,11 +67,18 @@ def register(ctx): }, } - def handle_hello(params): + def handle_hello(params, **kwargs): + del kwargs name = params.get("name", "World") - return f"Hello, {name}! 👋 (from the hello-world plugin)" + return json.dumps({"success": True, "greeting": f"Hello, {name}!"}) - ctx.register_tool("hello_world", schema, handle_hello) + ctx.register_tool( + name="hello_world", + toolset="hello_world", + schema=schema, + handler=handle_hello, + description="Return a friendly greeting for the given name.", + ) # --- Hook: log every tool call --- def on_tool_call(tool_name, params, result): @@ -81,7 +95,7 @@ Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable | Capability | How | |-----------|-----| -| Add tools | `ctx.register_tool(name, schema, handler)` | +| Add tools | `ctx.register_tool(name=..., toolset=..., schema=..., handler=...)` | | Add hooks | `ctx.register_hook("post_tool_call", callback)` | | Add slash commands | `ctx.register_command(name, handler, description)` — adds `/name` in CLI and gateway sessions | | Add CLI commands | `ctx.register_cli_command(name, help, setup_fn, handler_fn)` — adds `hermes <plugin> <subcommand>` | From a7417f8a4a413196dac350e357dec43b8f8eb3e0 Mon Sep 17 00:00:00 2001 From: JasonOA888 <jason@outland.art> Date: Mon, 4 May 2026 13:01:28 +0800 Subject: [PATCH 166/171] fix(compressor): skip non-string tool content in summarization pass to prevent AttributeError Commit 408dd8aa added a non-string guard for Pass 1 (dedup), but the same pattern exists in Pass 2 (summarization/pruning) where content.startswith() and len() are called on potentially non-string tool content. When a provider returns tool results with non-string content (e.g. dict or int from llama.cpp or similar), the pruning pass crashes with AttributeError. Add the same isinstance(content, str) guard to Pass 2 for consistency. --- agent/context_compressor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 69151a117a8..f9111f96004 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -600,6 +600,8 @@ class ContextCompressor(ContextEngine): # Skip multimodal content (list of content blocks) if isinstance(content, list): continue + if not isinstance(content, str): + continue if not content or content == _PRUNED_TOOL_PLACEHOLDER: continue # Skip already-deduplicated or previously-summarized results From d89e7a3cd42eb7cb30ee06e73cf2b4abbaee3248 Mon Sep 17 00:00:00 2001 From: bobashopcashier <77253505+bobashopcashier@users.noreply.github.com> Date: Sun, 3 May 2026 21:02:16 -0700 Subject: [PATCH 167/171] fix(anthropic): restrict fast mode to Opus 4.6 (Anthropic API contract) Per https://platform.claude.com/docs/en/build-with-claude/fast-mode: "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast with an unsupported model returns an error." Pre-fix, _is_anthropic_fast_model() returned True for any claude-* model, so /fast on Opus 4.7 (or Sonnet/Haiku) would persist agent.service_tier=fast in config.yaml and the adapter would inject extra_body["speed"] = "fast" on every subsequent request. Opus 4.7 returns: HTTP 400: 'claude-opus-4-7' does not support the `speed` parameter. This wedged sessions across model upgrades (a user who ran /fast on Opus 4.6 and later switched the default model to 4.7 hit a hard 400 on every turn until they manually edited config.yaml). Changes: - _is_anthropic_fast_model: gate on "opus-4-6" / "opus-4.6" only - anthropic_adapter: add _supports_fast_mode predicate as defensive guard so stale request_overrides on an unsupported model are dropped silently instead of 400'ing - Tests: flip the assertions that mirrored the bug (Sonnet/Haiku/Opus 4.7 asserting fast-mode support) to match the documented API contract --- agent/anthropic_adapter.py | 24 +++++++- hermes_cli/models.py | 14 ++++- tests/agent/test_anthropic_adapter.py | 39 ++++++++++++ tests/cli/test_fast_command.py | 85 +++++++++++++++++++++------ 4 files changed, 138 insertions(+), 24 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 8c468e8686b..bb1b33fcc82 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7") # Models where temperature/top_p/top_k return 400 if set to non-default values. # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it. _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7") +_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6") # ── Max output token limits per Anthropic model ─────────────────────── # Source: Anthropic docs + Cline model catalog. Anthropic's API requires @@ -219,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool: return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS) +def _supports_fast_mode(model: str) -> bool: + """Return True for models that support Anthropic Fast Mode (speed=fast). + + Per Anthropic docs, fast mode is currently supported on Opus 4.6 only. + Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7) + returns HTTP 400. This guard prevents silently 400'ing when stale config + or older callers leave fast mode enabled across a model upgrade. + """ + return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS) + + # Beta headers for enhanced features (sent with ALL auth types). # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the # beta headers are still accepted (harmless no-op) but not required. Kept @@ -1932,9 +1944,15 @@ def build_anthropic_kwargs( # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x - # output speed. Only for native Anthropic endpoints — third-party - # providers would reject the unknown beta header and speed parameter. - if fast_mode and not _is_third_party_anthropic_endpoint(base_url): + # output speed. Per Anthropic docs, fast mode is only supported on + # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter. + # Only for native Anthropic endpoints — third-party providers would + # reject the unknown beta header and speed parameter. + if ( + fast_mode + and not _is_third_party_anthropic_endpoint(base_url) + and _supports_fast_mode(model) + ): kwargs.setdefault("extra_body", {})["speed"] = "fast" # Build extra_headers with ALL applicable betas (the per-request # extra_headers override the client-level anthropic-beta header). diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 984685e6c3e..b1630b3d837 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1740,10 +1740,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool: def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: - """Return True if the model is a Claude model eligible for Anthropic Fast Mode.""" + """Return True if the model is a Claude model eligible for Anthropic Fast Mode. + + Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's + docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode): + "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast + with an unsupported model returns an error." Opus 4.7 explicitly rejects + the ``speed`` parameter with HTTP 400. + """ raw = _strip_vendor_prefix(str(model_id or "")) base = raw.split(":")[0] - return base.startswith("claude-") + if not base.startswith("claude-"): + return False + # Only Opus 4.6 supports fast mode at present. + return "opus-4-6" in base or "opus-4.6" in base def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 2e676aef628..0bb607d7412 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -1113,6 +1113,45 @@ class TestBuildAnthropicKwargs: assert _forbids_sampling_params("claude-opus-4-6") is False assert _forbids_sampling_params("claude-sonnet-4-5") is False + def test_supports_fast_mode_predicate(self): + """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.""" + from agent.anthropic_adapter import _supports_fast_mode + assert _supports_fast_mode("claude-opus-4-6") is True + assert _supports_fast_mode("anthropic/claude-opus-4-6") is True + assert _supports_fast_mode("claude-opus-4-7") is False + assert _supports_fast_mode("claude-sonnet-4-6") is False + assert _supports_fast_mode("claude-haiku-4-5") is False + assert _supports_fast_mode("") is False + + def test_fast_mode_omitted_for_unsupported_model(self): + """fast_mode=True on Opus 4.7 must NOT inject speed=fast (API 400s).""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + # extra_body either absent or doesn't carry "speed" + assert "speed" not in kwargs.get("extra_body", {}) + # No fast-mode beta header should be added either + beta_header = (kwargs.get("extra_headers") or {}).get("anthropic-beta", "") + assert "fast-mode-2026-02-01" not in beta_header + + def test_fast_mode_still_applied_on_opus_46(self): + """Regression guard — fast mode must still work on Opus 4.6.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + assert kwargs.get("extra_body", {}).get("speed") == "fast" + assert "fast-mode-2026-02-01" in kwargs["extra_headers"]["anthropic-beta"] + def test_reasoning_disabled(self): kwargs = build_anthropic_kwargs( model="claude-sonnet-4-20250514", diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py index 343c05658c0..a98ae754444 100644 --- a/tests/cli/test_fast_command.py +++ b/tests/cli/test_fast_command.py @@ -128,17 +128,34 @@ class TestPriorityProcessingModels(unittest.TestCase): assert model_supports_fast_mode(model), f"{model} should support fast mode" def test_all_anthropic_models_supported(self): + """Per Anthropic docs, fast mode is currently Opus 4.6 only. + + Sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + Pre-fix this test asserted all Claude variants supported fast mode, + which mirrored the bug rather than the API contract. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku. + # Supported: Opus 4.6 in any form supported = [ - "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6", - "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", - "claude-haiku-4-5", "claude-3-5-haiku", + "claude-opus-4-6", "claude-opus-4.6", + "anthropic/claude-opus-4-6", "anthropic/claude-opus-4.6", ] for model in supported: assert model_supports_fast_mode(model), f"{model} should support fast mode" + # Unsupported per Anthropic API: Opus 4.7, Sonnet, Haiku + unsupported = [ + "claude-opus-4-7", + "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", + "claude-haiku-4-5", "claude-3-5-haiku", + ] + for model in unsupported: + assert not model_supports_fast_mode(model), ( + f"{model} should NOT support fast mode — Anthropic restricts " + f"speed=fast to Opus 4.6" + ) + def test_codex_models_excluded(self): """Codex models route through Responses API and don't accept service_tier.""" from hermes_cli.models import model_supports_fast_mode @@ -257,18 +274,20 @@ class TestAnthropicFastMode(unittest.TestCase): assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True - def test_anthropic_all_claude_models_supported(self): + def test_anthropic_non_opus46_models_excluded(self): + """Anthropic restricts fast mode to Opus 4.6 — others must be excluded. + + Per https://platform.claude.com/docs/en/build-with-claude/fast-mode, + sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support fast mode — Opus, Sonnet, Haiku. - # The anthropic adapter gates speed=fast on native Anthropic - # endpoints only, so third-party proxies that reject the beta - # are protected downstream (see _is_third_party_anthropic_endpoint). - assert model_supports_fast_mode("claude-sonnet-4-6") is True - assert model_supports_fast_mode("claude-sonnet-4.6") is True - assert model_supports_fast_mode("claude-haiku-4-5") is True - assert model_supports_fast_mode("claude-opus-4-7") is True - assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True + assert model_supports_fast_mode("claude-sonnet-4-6") is False + assert model_supports_fast_mode("claude-sonnet-4.6") is False + assert model_supports_fast_mode("claude-haiku-4-5") is False + assert model_supports_fast_mode("claude-opus-4-7") is False + assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False + assert model_supports_fast_mode("anthropic/claude-opus-4-7") is False def test_non_claude_models_not_anthropic_fast(self): """Non-Claude models should not be treated as Anthropic fast-mode.""" @@ -294,6 +313,17 @@ class TestAnthropicFastMode(unittest.TestCase): result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6") assert result == {"speed": "fast"} + def test_resolve_overrides_returns_none_for_unsupported_claude(self): + """Opus 4.7 and other Claude models don't support fast mode (API 400s). + + Per Anthropic docs, fast mode is currently Opus 4.6 only. + """ + from hermes_cli.models import resolve_fast_mode_overrides + + assert resolve_fast_mode_overrides("claude-opus-4-7") is None + assert resolve_fast_mode_overrides("claude-sonnet-4-6") is None + assert resolve_fast_mode_overrides("claude-haiku-4-5") is None + def test_resolve_overrides_returns_service_tier_for_openai(self): """OpenAI models should still get service_tier, not speed.""" from hermes_cli.models import resolve_fast_mode_overrides @@ -302,13 +332,21 @@ class TestAnthropicFastMode(unittest.TestCase): assert result == {"service_tier": "priority"} def test_is_anthropic_fast_model(self): + """Fast mode is currently Opus 4.6 only — other Claude variants must be excluded.""" from hermes_cli.models import _is_anthropic_fast_model + # Supported: Opus 4.6 in any form assert _is_anthropic_fast_model("claude-opus-4-6") is True assert _is_anthropic_fast_model("claude-opus-4.6") is True - assert _is_anthropic_fast_model("claude-sonnet-4-6") is True - assert _is_anthropic_fast_model("claude-haiku-4-5") is True assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True + assert _is_anthropic_fast_model("claude-opus-4.6:fast") is True + + # Unsupported per Anthropic API contract — would 400 if we sent speed=fast + assert _is_anthropic_fast_model("claude-opus-4-7") is False + assert _is_anthropic_fast_model("claude-sonnet-4-6") is False + assert _is_anthropic_fast_model("claude-haiku-4-5") is False + + # Non-Claude assert _is_anthropic_fast_model("gpt-5.4") is False assert _is_anthropic_fast_model("") is False @@ -320,14 +358,23 @@ class TestAnthropicFastMode(unittest.TestCase): ) assert cli_mod.HermesCLI._fast_command_available(stub) is True - def test_fast_command_exposed_for_anthropic_sonnet(self): - """Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url.""" + def test_fast_command_hidden_for_anthropic_sonnet(self): + """Sonnet doesn't support fast mode (Opus 4.6 only) — /fast must be hidden.""" cli_mod = _import_cli() stub = SimpleNamespace( provider="anthropic", requested_provider="anthropic", model="claude-sonnet-4-6", agent=None, ) - assert cli_mod.HermesCLI._fast_command_available(stub) is True + assert cli_mod.HermesCLI._fast_command_available(stub) is False + + def test_fast_command_hidden_for_anthropic_opus_47(self): + """Opus 4.7 doesn't support fast mode — /fast must be hidden.""" + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="anthropic", requested_provider="anthropic", + model="claude-opus-4-7", agent=None, + ) + assert cli_mod.HermesCLI._fast_command_available(stub) is False def test_fast_command_hidden_for_non_claude_non_openai(self): """Non-Claude, non-OpenAI models should not expose /fast.""" From cfd86dcdb806611bdd0fb4112f05e7e2af87cefc Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 4 May 2026 18:51:05 +0530 Subject: [PATCH 168/171] chore: add bobashopcashier noreply email to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 2edc78aed65..d868fb13692 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -343,6 +343,7 @@ AUTHOR_MAP = { "haileymarshall005@gmail.com": "haileymarshall", "greer.guthrie@gmail.com": "g-guthrie", "kennyx102@gmail.com": "bobashopcashier", + "77253505+bobashopcashier@users.noreply.github.com": "bobashopcashier", "shokatalishaikh95@gmail.com": "areu01or00", "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", From 38adfebe78fb210921ece43c62c46cd966279c5b Mon Sep 17 00:00:00 2001 From: megastary <25355950+megastary@users.noreply.github.com> Date: Fri, 1 May 2026 11:34:21 +0200 Subject: [PATCH 169/171] fix(teams): import prompt/print helpers from cli_output, not config The Teams adapter's interactive_setup() tried to import prompt, prompt_yes_no, print_info, print_success, and print_warning from hermes_cli.config, but those helpers live in hermes_cli.cli_output. Only get_env_value/save_env_value live in hermes_cli.config. This caused 'hermes setup' to crash with ImportError as soon as the user picked Teams in the messaging-platforms wizard. Split the import accordingly. --- plugins/platforms/teams/adapter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/platforms/teams/adapter.py b/plugins/platforms/teams/adapter.py index 945ffa07958..d0a2b7adbc6 100644 --- a/plugins/platforms/teams/adapter.py +++ b/plugins/platforms/teams/adapter.py @@ -592,6 +592,8 @@ def interactive_setup() -> None: from hermes_cli.config import ( get_env_value, save_env_value, + ) + from hermes_cli.cli_output import ( prompt, prompt_yes_no, print_info, From 54e78cadb2e6d16a4f82185b2dc81c9dc176a813 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 4 May 2026 19:19:50 +0530 Subject: [PATCH 170/171] test: add regression test for Teams interactive_setup import fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapted from PR #19188 by @LeonSGP43 — mocks cli_output helpers and verifies interactive_setup persists credentials to .env without crashing. Also adds megastary to AUTHOR_MAP. --- scripts/release.py | 1 + tests/gateway/test_teams.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/scripts/release.py b/scripts/release.py index d868fb13692..7197f3d8330 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -344,6 +344,7 @@ AUTHOR_MAP = { "greer.guthrie@gmail.com": "g-guthrie", "kennyx102@gmail.com": "bobashopcashier", "77253505+bobashopcashier@users.noreply.github.com": "bobashopcashier", + "25355950+megastary@users.noreply.github.com": "megastary", # PR #18325 "shokatalishaikh95@gmail.com": "areu01or00", "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", diff --git a/tests/gateway/test_teams.py b/tests/gateway/test_teams.py index 7a035142ed6..2befceec948 100644 --- a/tests/gateway/test_teams.py +++ b/tests/gateway/test_teams.py @@ -313,9 +313,33 @@ class TestTeamsPluginRegistration: # --------------------------------------------------------------------------- -# Tests: Connect / Disconnect +# Tests: Interactive setup (import fix regression — #18325 / #19173) # --------------------------------------------------------------------------- +class TestTeamsInteractiveSetup: + def test_interactive_setup_persists_credentials(self, tmp_path, monkeypatch): + """Regression for #19173: interactive_setup must import prompt helpers + from hermes_cli.cli_output (not hermes_cli.config) and persist + credentials to .env without crashing. + """ + hermes_home = tmp_path / "hermes" + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import hermes_cli.cli_output as cli_output_mod + + answers = iter(["client-id", "client-secret", "tenant-id", "aad-1, aad-2"]) + monkeypatch.setattr(cli_output_mod, "prompt", lambda *_a, **_kw: next(answers)) + monkeypatch.setattr(cli_output_mod, "prompt_yes_no", lambda *_a, **_kw: True) + monkeypatch.setattr(cli_output_mod, "print_info", lambda *_a, **_kw: None) + monkeypatch.setattr(cli_output_mod, "print_success", lambda *_a, **_kw: None) + monkeypatch.setattr(cli_output_mod, "print_warning", lambda *_a, **_kw: None) + + _teams_mod.interactive_setup() + + env_text = (hermes_home / ".env").read_text(encoding="utf-8") + assert "TEAMS_CLIENT_ID=client-id" in env_text + assert "TEAMS_TENANT_ID=tenant-id" in env_text + class TestTeamsConnect: @pytest.mark.asyncio async def test_connect_fails_without_sdk(self, monkeypatch): From 0ce1b9fe20a53459b37b7ab27dcb88336dbea781 Mon Sep 17 00:00:00 2001 From: asheriif <30965123+asheriif@users.noreply.github.com> Date: Mon, 4 May 2026 18:58:40 +0200 Subject: [PATCH 171/171] fix(tui): preserve prompt separator width (#19340) * fix(tui): preserve prompt separator width * fix(tui): align transcript height estimates with prompt width --- ui-tui/src/__tests__/messages.test.ts | 50 +++++++++++++++++++++ ui-tui/src/__tests__/virtualHeights.test.ts | 7 +++ ui-tui/src/app/useMainApp.ts | 9 ++-- ui-tui/src/components/messageLine.tsx | 6 ++- ui-tui/src/lib/inputMetrics.ts | 10 +++++ ui-tui/src/lib/virtualHeights.ts | 10 ++++- 6 files changed, 85 insertions(+), 7 deletions(-) diff --git a/ui-tui/src/__tests__/messages.test.ts b/ui-tui/src/__tests__/messages.test.ts index 1da4bfd4ae2..1ad2b788df7 100644 --- a/ui-tui/src/__tests__/messages.test.ts +++ b/ui-tui/src/__tests__/messages.test.ts @@ -1,7 +1,13 @@ +import { renderSync } from '@hermes/ink' +import React from 'react' +import { PassThrough } from 'stream' import { describe, expect, it } from 'vitest' +import { MessageLine } from '../components/messageLine.js' import { toTranscriptMessages } from '../domain/messages.js' import { upsert } from '../lib/messages.js' +import { stripAnsi } from '../lib/text.js' +import { DEFAULT_THEME } from '../theme.js' describe('toTranscriptMessages', () => { it('preserves assistant tool-call rows so resume does not drop prior turns', () => { @@ -21,6 +27,50 @@ describe('toTranscriptMessages', () => { }) }) +describe('MessageLine', () => { + it('preserves a separator after compound user prompt glyphs in transcript rows', () => { + const stdout = new PassThrough() + const stdin = new PassThrough() + const stderr = new PassThrough() + let output = '' + + Object.assign(stdout, { columns: 80, isTTY: false, rows: 24 }) + Object.assign(stdin, { isTTY: false }) + Object.assign(stderr, { isTTY: false }) + stdout.on('data', chunk => { + output += chunk.toString() + }) + + const t = { + ...DEFAULT_THEME, + brand: { ...DEFAULT_THEME.brand, prompt: 'Ψ >' } + } + + const instance = renderSync( + React.createElement(MessageLine, { + cols: 80, + msg: { role: 'user', text: 'Okay' }, + t + }), + { + patchConsole: false, + stderr: stderr as NodeJS.WriteStream, + stdin: stdin as NodeJS.ReadStream, + stdout: stdout as NodeJS.WriteStream + } + ) + + instance.unmount() + instance.cleanup() + + const renderedLine = stripAnsi(output) + .split('\n') + .find(line => line.includes('Okay')) + + expect(renderedLine).toContain('Ψ > Okay') + }) +}) + describe('upsert', () => { it('appends when last role differs', () => { expect(upsert([{ role: 'user', text: 'hi' }], 'assistant', 'hello')).toHaveLength(2) diff --git a/ui-tui/src/__tests__/virtualHeights.test.ts b/ui-tui/src/__tests__/virtualHeights.test.ts index 4b05aa39960..f407976db35 100644 --- a/ui-tui/src/__tests__/virtualHeights.test.ts +++ b/ui-tui/src/__tests__/virtualHeights.test.ts @@ -17,6 +17,13 @@ describe('virtual height estimates', () => { expect(estimatedMsgHeight(msg, 35, { compact: false, details: false })).toBeGreaterThan(5) }) + it('uses compound user prompt width when estimating user message wrapping', () => { + const msg: Msg = { role: 'user', text: 'x'.repeat(21) } + + expect(estimatedMsgHeight(msg, 26, { compact: false, details: false, userPrompt: '❯' })).toBe(3) + expect(estimatedMsgHeight(msg, 26, { compact: false, details: false, userPrompt: 'Ψ >' })).toBe(4) + }) + it('includes detail sections when visible', () => { const msg: Msg = { role: 'assistant', text: 'ok', thinking: 'line 1\nline 2', tools: ['Tool A', 'Tool B'] } diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 17924ca4a69..218654f531e 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -17,6 +17,7 @@ import type { import { useGitBranch } from '../hooks/useGitBranch.js' import { useVirtualHistory } from '../hooks/useVirtualHistory.js' import { appendTranscriptMessage } from '../lib/messages.js' +import { composerPromptWidth } from '../lib/inputMetrics.js' import { isMac } from '../lib/platform.js' import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js' import { terminalParityHints } from '../lib/terminalParity.js' @@ -244,7 +245,8 @@ export function useMainApp(gw: GatewayClient) { }, [ui.detailsMode, ui.detailsModeCommandOverride, ui.sections]) const detailsVisible = detailsLayoutKey !== 'hidden:hidden' - const heightCacheKey = `${ui.sid ?? 'draft'}:${cols}:${ui.compact ? '1' : '0'}:${detailsLayoutKey}` + const userPromptWidth = composerPromptWidth(ui.theme.brand.prompt) + const heightCacheKey = `${ui.sid ?? 'draft'}:${cols}:${userPromptWidth}:${ui.compact ? '1' : '0'}:${detailsLayoutKey}` const heightCache = useMemo(() => { let cache = heightCachesRef.current.get(heightCacheKey) @@ -266,9 +268,10 @@ export function useMainApp(gw: GatewayClient) { estimatedMsgHeight(virtualRows[index]!.msg, cols, { compact: ui.compact, details: detailsVisible, - limitHistory: index < virtualRows.length - FULL_RENDER_TAIL_ITEMS + limitHistory: index < virtualRows.length - FULL_RENDER_TAIL_ITEMS, + userPrompt: ui.theme.brand.prompt }), - [cols, detailsVisible, ui.compact, virtualRows] + [cols, detailsVisible, ui.compact, ui.theme.brand.prompt, virtualRows] ) const syncHeightCache = useCallback( diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx index 0bf9ba6d9b4..7bdfb443b7c 100644 --- a/ui-tui/src/components/messageLine.tsx +++ b/ui-tui/src/components/messageLine.tsx @@ -5,6 +5,7 @@ import { LONG_MSG } from '../config/limits.js' import { sectionMode } from '../domain/details.js' import { userDisplay } from '../domain/messages.js' import { ROLE } from '../domain/roles.js' +import { transcriptBodyWidth, transcriptGutterWidth } from '../lib/inputMetrics.js' import { boundedHistoryRenderText, boundedLiveRenderText, @@ -95,6 +96,7 @@ export const MessageLine = memo(function MessageLine({ } const { body, glyph, prefix } = ROLE[msg.role](t) + const gutterWidth = transcriptGutterWidth(msg.role, t.brand.prompt) const showDetails = (toolsMode !== 'hidden' && Boolean(msg.tools?.length)) || (thinkingMode !== 'hidden' && Boolean(thinking)) @@ -163,13 +165,13 @@ export const MessageLine = memo(function MessageLine({ )} <Box> - <NoSelect flexShrink={0} fromLeftEdge width={3}> + <NoSelect flexShrink={0} fromLeftEdge width={gutterWidth}> <Text bold={msg.role === 'user'} color={prefix}> {glyph}{' '} </Text> </NoSelect> - <Box width={Math.max(20, cols - 5)}>{content}</Box> + <Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt)}>{content}</Box> </Box> </Box> ) diff --git a/ui-tui/src/lib/inputMetrics.ts b/ui-tui/src/lib/inputMetrics.ts index 245baae96f1..b5645b43310 100644 --- a/ui-tui/src/lib/inputMetrics.ts +++ b/ui-tui/src/lib/inputMetrics.ts @@ -1,5 +1,7 @@ import { stringWidth } from '@hermes/ink' +import type { Role } from '../types.js' + export const COMPOSER_PROMPT_GAP_WIDTH = 1 let _seg: Intl.Segmenter | null = null @@ -162,6 +164,14 @@ export function composerPromptWidth(promptText: string) { return Math.max(1, stringWidth(promptText)) + COMPOSER_PROMPT_GAP_WIDTH } +export function transcriptGutterWidth(role: Role, userPrompt: string) { + return role === 'user' ? composerPromptWidth(userPrompt) : 3 +} + +export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string) { + return Math.max(20, totalCols - transcriptGutterWidth(role, userPrompt) - 2) +} + export function stableComposerColumns(totalCols: number, promptWidth: number) { // Physical render/wrap width. Always reserve outer composer padding and // prompt prefix. Only reserve the transcript scrollbar gutter when the diff --git a/ui-tui/src/lib/virtualHeights.ts b/ui-tui/src/lib/virtualHeights.ts index 0c673fd93a2..e9439d42dd5 100644 --- a/ui-tui/src/lib/virtualHeights.ts +++ b/ui-tui/src/lib/virtualHeights.ts @@ -1,5 +1,6 @@ import type { Msg } from '../types.js' +import { transcriptBodyWidth } from './inputMetrics.js' import { boundedHistoryRenderText } from './text.js' const hashText = (text: string) => { @@ -38,7 +39,12 @@ export const wrappedLines = (text: string, width: number) => { export const estimatedMsgHeight = ( msg: Msg, cols: number, - { compact, details, limitHistory = false }: { compact: boolean; details: boolean; limitHistory?: boolean } + { + compact, + details, + limitHistory = false, + userPrompt = '' + }: { compact: boolean; details: boolean; limitHistory?: boolean; userPrompt?: string } ) => { if (msg.kind === 'intro') { return msg.info?.version ? 9 : 5 @@ -56,7 +62,7 @@ export const estimatedMsgHeight = ( return Math.max(2, msg.todos.length + 2) } - const bodyWidth = Math.max(20, cols - 5) + const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt) const text = msg.role === 'assistant' && limitHistory ? boundedHistoryRenderText(msg.text) : msg.text let h = wrappedLines(text || ' ', bodyWidth)