From 4d0f2bd241694e91d1172194f7a0f73d2d585ba6 Mon Sep 17 00:00:00 2001
From: Cornna <96944678+ymylive@users.noreply.github.com>
Date: Thu, 28 May 2026 18:35:16 +0800
Subject: [PATCH 001/174] fix(gateway): use FIFO queue for busy_input_mode
pending messages
Closes #28503
---
gateway/run.py | 43 +++++++++++-
tests/gateway/test_queue_consumption.py | 92 +++++++++++++++++++++++++
2 files changed, 134 insertions(+), 1 deletion(-)
diff --git a/gateway/run.py b/gateway/run.py
index 3f950685f1c..917ce2a28cc 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3328,11 +3328,52 @@ class GatewayRunner:
except Exception:
return False
+ # Hard cap on per-session pending follow-ups for busy_input_mode=queue
+ # (and the draining/steer-fallback/subagent-demotion paths that share
+ # this entry point). Without a cap, a stuck agent + a rapid-fire user
+ # could grow the overflow list unboundedly. 32 turns of queued
+ # follow-ups is far beyond any realistic conversational backlog while
+ # still small enough to never threaten memory.
+ _BUSY_QUEUE_MAX_PENDING = 32
+
def _queue_or_replace_pending_event(self, session_key: str, event: MessageEvent) -> None:
adapter = self.adapters.get(event.source.platform)
if not adapter:
return
- merge_pending_message_event(adapter._pending_messages, session_key, event)
+ # #28503 — Previously this called ``merge_pending_message_event``
+ # with the default ``merge_text=False``, which silently OVERWROTE
+ # the single pending slot when consecutive text messages arrived
+ # in ``busy_input_mode: queue``. Route through the FIFO
+ # infrastructure shared with ``/queue`` so each follow-up gets
+ # its own turn in arrival order. Photo bursts still merge into
+ # the head slot via ``merge_pending_message_event`` (album
+ # semantics); everything else appends to the overflow tail.
+ pending_slot = getattr(adapter, "_pending_messages", None)
+ existing = pending_slot.get(session_key) if isinstance(pending_slot, dict) else None
+ if existing is not None and (
+ getattr(existing, "message_type", None) == MessageType.PHOTO
+ or event.message_type == MessageType.PHOTO
+ or bool(getattr(existing, "media_urls", None))
+ or bool(getattr(event, "media_urls", None))
+ ):
+ # Preserve photo-burst / media-merge semantics for the head slot.
+ merge_pending_message_event(
+ adapter._pending_messages,
+ session_key,
+ event,
+ merge_text=event.message_type == MessageType.TEXT,
+ )
+ return
+
+ if self._queue_depth(session_key, adapter=adapter) >= self._BUSY_QUEUE_MAX_PENDING:
+ logger.warning(
+ "Dropping busy-mode follow-up for session %s — pending queue at cap (%d).",
+ session_key,
+ self._BUSY_QUEUE_MAX_PENDING,
+ )
+ return
+
+ self._enqueue_fifo(session_key, event, adapter)
async def _handle_active_session_busy_message(self, event: MessageEvent, session_key: str) -> bool:
# --- Authorization gate (#17775) ---
diff --git a/tests/gateway/test_queue_consumption.py b/tests/gateway/test_queue_consumption.py
index 178d1965af9..792d7b7ea52 100644
--- a/tests/gateway/test_queue_consumption.py
+++ b/tests/gateway/test_queue_consumption.py
@@ -360,3 +360,95 @@ class TestQueueConsumptionAfterCompletion:
e.text for e in runner._queued_events[session_key]
]
assert collected == texts
+
+
+class TestBusyInputModeQueueFifo:
+ """Regression coverage for issue #28503.
+
+ ``busy_input_mode: queue`` rapid follow-ups used to silently overwrite
+ a single pending slot, losing every message except the last. The
+ runner's busy/queue/steer-fallback entry point now routes through
+ the same FIFO infrastructure as ``/queue``, so each follow-up gets
+ its own turn in arrival order.
+ """
+
+ def _make_runner_and_adapter(self):
+ from gateway.run import GatewayRunner
+
+ runner = GatewayRunner.__new__(GatewayRunner)
+ runner._queued_events = {}
+ adapter = _StubAdapter()
+ runner.adapters = {Platform.TELEGRAM: adapter}
+ return runner, adapter
+
+ def _text_event(self, text: str) -> MessageEvent:
+ source = MagicMock(chat_id="c1", platform=Platform.TELEGRAM)
+ return MessageEvent(
+ text=text,
+ message_type=MessageType.TEXT,
+ source=source,
+ message_id=f"m-{text}",
+ )
+
+ def test_rapid_text_followups_are_queued_in_fifo_order(self):
+ """Five rapid texts in queue mode must all survive (none silently dropped)."""
+ runner, adapter = self._make_runner_and_adapter()
+ session_key = "telegram:user:fifo"
+
+ texts = ["one", "two", "three", "four", "five"]
+ for text in texts:
+ runner._queue_or_replace_pending_event(session_key, self._text_event(text))
+
+ # Head slot keeps the first; overflow keeps the rest in order.
+ assert adapter._pending_messages[session_key].text == "one"
+ assert [e.text for e in runner._queued_events[session_key]] == [
+ "two",
+ "three",
+ "four",
+ "five",
+ ]
+ assert runner._queue_depth(session_key, adapter=adapter) == len(texts)
+
+ def test_queue_respects_bounded_cap(self):
+ """Beyond the per-session cap, follow-ups are dropped (with a warning)."""
+ from gateway.run import GatewayRunner
+
+ runner, adapter = self._make_runner_and_adapter()
+ session_key = "telegram:user:cap"
+
+ cap = GatewayRunner._BUSY_QUEUE_MAX_PENDING
+ for i in range(cap + 5):
+ runner._queue_or_replace_pending_event(
+ session_key, self._text_event(f"msg-{i:03d}")
+ )
+
+ # Exactly ``cap`` follow-ups retained (head + cap-1 in overflow).
+ assert runner._queue_depth(session_key, adapter=adapter) == cap
+ assert adapter._pending_messages[session_key].text == "msg-000"
+ # The last accepted overflow item is msg-{cap-1}.
+ assert runner._queued_events[session_key][-1].text == f"msg-{cap - 1:03d}"
+
+ def test_photo_burst_still_merges_in_head_slot(self):
+ """Photo bursts must keep album-merge semantics, not split into N turns."""
+ runner, adapter = self._make_runner_and_adapter()
+ session_key = "telegram:user:burst"
+
+ source = MagicMock(chat_id="c1", platform=Platform.TELEGRAM)
+ for i in range(3):
+ runner._queue_or_replace_pending_event(
+ session_key,
+ MessageEvent(
+ text="",
+ message_type=MessageType.PHOTO,
+ source=source,
+ message_id=f"p-{i}",
+ media_urls=[f"http://example.com/{i}.jpg"],
+ media_types=["image/jpeg"],
+ ),
+ )
+
+ # Single merged head event with all three media URLs.
+ assert session_key not in runner._queued_events or not runner._queued_events[session_key]
+ head = adapter._pending_messages[session_key]
+ assert head.message_type == MessageType.PHOTO
+ assert len(head.media_urls) == 3
From fec5ca71d8cabb7e770cc6e4a96317a64b970180 Mon Sep 17 00:00:00 2001
From: Cornna <96944678+ymylive@users.noreply.github.com>
Date: Wed, 3 Jun 2026 16:24:33 +0800
Subject: [PATCH 002/174] fix: preserve telegram queue fifo during grace window
---
gateway/run.py | 15 +++++---
tests/gateway/test_busy_session_ack.py | 52 ++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 6 deletions(-)
diff --git a/gateway/run.py b/gateway/run.py
index 917ce2a28cc..bd91061d148 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7902,12 +7902,15 @@ class GatewayRunner:
)
adapter = self.adapters.get(source.platform)
if adapter:
- merge_pending_message_event(
- adapter._pending_messages,
- _quick_key,
- event,
- merge_text=True,
- )
+ if self._busy_input_mode == "queue":
+ self._enqueue_fifo(_quick_key, event, adapter)
+ else:
+ merge_pending_message_event(
+ adapter._pending_messages,
+ _quick_key,
+ event,
+ merge_text=True,
+ )
return None
running_agent = self._running_agents.get(_quick_key)
diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py
index 7fb3d3210c0..c5517c5f638 100644
--- a/tests/gateway/test_busy_session_ack.py
+++ b/tests/gateway/test_busy_session_ack.py
@@ -27,6 +27,7 @@ sys.modules.setdefault("telegram.ext", types.ModuleType("telegram.ext"))
from gateway.platforms.base import (
MessageEvent,
MessageType,
+ Platform,
SessionSource,
build_session_key,
)
@@ -66,6 +67,8 @@ def _make_runner():
runner._busy_text_mode = "interrupt"
runner.adapters = {}
runner.config = MagicMock()
+ runner.config.group_sessions_per_user = True
+ runner.config.thread_sessions_per_user = False
runner.session_store = None
runner.hooks = MagicMock()
runner.hooks.emit = AsyncMock()
@@ -119,6 +122,55 @@ class TestBusySessionAck:
assert sk not in runner._pending_messages
running_agent.interrupt.assert_not_called()
+ @pytest.mark.asyncio
+ async def test_telegram_grace_followups_respect_queue_fifo(self, monkeypatch):
+ """Rapid Telegram text follow-ups in queue mode must not merge."""
+ from gateway.run import GatewayRunner
+
+ monkeypatch.setenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
+
+ runner, _sentinel = _make_runner()
+ runner._busy_input_mode = "queue"
+ runner._queued_events = {}
+ adapter = _make_adapter()
+
+ source = SessionSource(
+ platform=Platform.TELEGRAM,
+ chat_id="123",
+ chat_type="dm",
+ user_id="user1",
+ )
+ sk = build_session_key(source)
+ runner.adapters[source.platform] = adapter
+
+ agent = MagicMock()
+ agent.get_activity_summary.return_value = {
+ "seconds_since_activity": 0.0,
+ }
+ runner._running_agents[sk] = agent
+ runner._running_agents_ts[sk] = time.time()
+
+ events = [
+ MessageEvent(
+ text=text,
+ message_type=MessageType.TEXT,
+ source=source,
+ message_id=f"m-{idx}",
+ )
+ for idx, text in enumerate(("first", "second", "third"), start=1)
+ ]
+
+ for event in events:
+ result = await GatewayRunner._handle_message(runner, event)
+ assert result is None
+
+ assert adapter._pending_messages[sk].text == "first"
+ assert [event.text for event in runner._queued_events[sk]] == [
+ "second",
+ "third",
+ ]
+ agent.interrupt.assert_not_called()
+
@pytest.mark.asyncio
async def test_sends_ack_when_agent_running(self):
"""First message during busy session should get a status ack."""
From ccacfdbd6d92c6cd0aceb585ae9b49d9b57fcd22 Mon Sep 17 00:00:00 2001
From: islam666
Date: Sun, 7 Jun 2026 08:02:11 +0000
Subject: [PATCH 003/174] fix(plugins): discover nested category plugins in
'plugins list' (issue #41066)
_discover_all_plugins() previously did a flat iterdir() scan, missing
all category-namespaced plugins (web/*, image_gen/*, browser/*, video_gen/*).
Now recurses up to 2 levels deep, matching PluginManager._scan_directory_level().
Also fixes _plugin_status() to check both manifest name AND path-derived
key against enabled/disabled sets, so category plugins like 'web/tavily'
show correct status when enabled via config.
---
hermes_cli/plugins_cmd.py | 135 ++++---
.../test_plugins_cmd_category_discovery.py | 355 ++++++++++++++++++
2 files changed, 439 insertions(+), 51 deletions(-)
create mode 100644 tests/hermes_cli/test_plugins_cmd_category_discovery.py
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index ddbd0402f2a..7f6a3314ecf 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -728,64 +728,97 @@ def _plugin_exists(name: str) -> bool:
return False
-def _discover_all_plugins() -> list:
- """Return a list of (name, version, description, source, dir_path) for
- every plugin the loader can see — user + bundled + project.
+def _read_manifest_info(d: Path, prefix: str):
+ """Read a plugin.yaml manifest and return (name, version, description, key).
- Matches the ordering/dedup of ``PluginManager.discover_and_load``:
- bundled first, then user, then project; user overrides bundled on
- name collision.
+ Returns None if no manifest file exists.
"""
+ manifest_file = d / "plugin.yaml"
+ if not manifest_file.exists():
+ manifest_file = d / "plugin.yml"
+ if not manifest_file.exists():
+ return None
try:
import yaml
except ImportError:
yaml = None
+ name = d.name
+ version = ""
+ description = ""
+ if yaml:
+ try:
+ with open(manifest_file, encoding="utf-8") as f:
+ manifest = yaml.safe_load(f) or {}
+ name = manifest.get("name", d.name)
+ version = manifest.get("version", "")
+ description = manifest.get("description", "")
+ except Exception:
+ pass
+ key = f"{prefix}/{d.name}" if prefix else name
+ return name, version, description, key
- seen: dict = {} # name -> (name, version, description, source, path)
- # Bundled (/plugins//), excluding memory/ and context_engine/
- from hermes_cli.plugins import get_bundled_plugins_dir
- repo_plugins = get_bundled_plugins_dir()
- for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
- if not base.is_dir():
+def _scan_level(
+ base: Path,
+ source: str,
+ skip_names: set,
+ prefix: str,
+ depth: int,
+ seen: dict,
+) -> None:
+ """Recursive directory scan matching PluginManager._scan_directory_level.
+
+ Populates *seen* with key -> (name, version, description, source, dir, key).
+ """
+ if not base.is_dir():
+ return
+ for d in sorted(base.iterdir()):
+ if not d.is_dir():
continue
- for d in sorted(base.iterdir()):
- if not d.is_dir():
- continue
- if source == "bundled" and d.name in {"memory", "context_engine"}:
- continue
- manifest_file = d / "plugin.yaml"
- if not manifest_file.exists():
- manifest_file = d / "plugin.yml"
- if not manifest_file.exists():
- continue
- name = d.name
- version = ""
- description = ""
- if yaml:
- try:
- with open(manifest_file, encoding="utf-8") as f:
- manifest = yaml.safe_load(f) or {}
- name = manifest.get("name", d.name)
- version = manifest.get("version", "")
- description = manifest.get("description", "")
- except Exception:
- pass
- # User plugins override bundled on name collision.
- if name in seen and source == "bundled":
+ if depth == 0 and skip_names and d.name in skip_names:
+ continue
+ info = _read_manifest_info(d, prefix)
+ if info is not None:
+ name, version, description, key = info
+ if key in seen and source == "bundled":
continue
src_label = source
if source == "user" and (d / ".git").exists():
src_label = "git"
- seen[name] = (name, version, description, src_label, d)
+ seen[key] = (name, version, description, src_label, d, key)
+ continue
+ if depth >= 1:
+ continue
+ sub_prefix = f"{prefix}/{d.name}" if prefix else d.name
+ _scan_level(d, source, set(), sub_prefix, depth + 1, seen)
+
+
+def _discover_all_plugins() -> list:
+ """Return a list of (name, version, description, source, dir_path, key) for
+ every plugin the loader can see — user + bundled + project.
+
+ Matches the ordering/dedup of ``PluginManager.discover_and_load``:
+ bundled first, then user, then project; user overrides bundled on
+ key collision.
+ """
+ seen: dict = {} # key -> (name, version, description, source, path, key)
+
+ # Bundled (/plugins//), excluding memory/ and context_engine/
+ from hermes_cli.plugins import get_bundled_plugins_dir
+ repo_plugins = get_bundled_plugins_dir()
+ for base, source, skip in (
+ (repo_plugins, "bundled", {"memory", "context_engine"}),
+ (_plugins_dir(), "user", set()),
+ ):
+ _scan_level(base, source, skip, "", 0, seen)
return list(seen.values())
-def _plugin_status(name: str, enabled: set, disabled: set) -> str:
- """Return the user-facing activation state for a plugin name."""
- if name in disabled:
+def _plugin_status(name: str, enabled: set, disabled: set, key: str = "") -> str:
+ """Return the user-facing activation state for a plugin name or key."""
+ if name in disabled or key in disabled:
return "disabled"
- if name in enabled:
+ if name in enabled or key in enabled:
return "enabled"
return "not enabled"
@@ -798,7 +831,7 @@ def _filter_plugin_entries(entries: list, args: Any, enabled: set, disabled: set
if getattr(args, "enabled", False):
filtered = [
entry for entry in filtered
- if _plugin_status(entry[0], enabled, disabled) == "enabled"
+ if _plugin_status(entry[0], enabled, disabled, key=entry[5]) == "enabled"
]
return filtered
@@ -823,19 +856,19 @@ def cmd_list(args: Any | None = None) -> None:
payload = [
{
"name": name,
- "status": _plugin_status(name, enabled, disabled),
+ "status": _plugin_status(name, enabled, disabled, key=key),
"version": str(version),
"description": description,
"source": source,
}
- for name, version, description, source, _dir in entries
+ for name, version, description, source, _dir, key in entries
]
print(json.dumps(payload, indent=2))
return
if getattr(args, "plain", False):
- for name, version, _description, source, _dir in entries:
- status = _plugin_status(name, enabled, disabled)
+ for name, version, _description, source, _dir, key in entries:
+ status = _plugin_status(name, enabled, disabled, key=key)
print(f"{status:12} {source:8} {str(version):8} {name}")
return
@@ -850,8 +883,8 @@ def cmd_list(args: Any | None = None) -> None:
table.add_column("Description")
table.add_column("Source", style="dim")
- for name, version, description, source, _dir in entries:
- status_name = _plugin_status(name, enabled, disabled)
+ for name, version, description, source, _dir, key in entries:
+ status_name = _plugin_status(name, enabled, disabled, key=key)
if status_name == "disabled":
status = "[red]disabled[/red]"
elif status_name == "enabled":
@@ -1051,14 +1084,14 @@ def cmd_toggle() -> None:
plugin_labels = []
plugin_selected = set()
- for i, (name, _version, description, source, _d) in enumerate(entries):
+ for i, (name, _version, description, source, _d, key) in enumerate(entries):
label = f"{name} \u2014 {description}" if description else name
if source == "bundled":
label = f"{label} [bundled]"
plugin_names.append(name)
plugin_labels.append(label)
# Selected (enabled) when in enabled-set AND not in disabled-set
- if name in enabled_set and name not in disabled_set:
+ if (name in enabled_set or key in enabled_set) and name not in disabled_set and key not in disabled_set:
plugin_selected.add(i)
# -- Provider categories --
@@ -1641,7 +1674,7 @@ def _git_pull_plugin_dir(target: Path) -> tuple[bool, str]:
def dashboard_remove_user_plugin(name: str) -> dict[str, Any]:
"""Delete a plugin tree under ``~/.hermes/plugins/`` only."""
plugins_dir = _plugins_dir()
- for n, _ver, _d, src, _path in _discover_all_plugins():
+ for n, _ver, _d, src, _path, _key in _discover_all_plugins():
if n == name and src == "bundled":
return {"ok": False, "error": "Bundled plugins cannot be removed from the dashboard."}
diff --git a/tests/hermes_cli/test_plugins_cmd_category_discovery.py b/tests/hermes_cli/test_plugins_cmd_category_discovery.py
new file mode 100644
index 00000000000..c86462e5ded
--- /dev/null
+++ b/tests/hermes_cli/test_plugins_cmd_category_discovery.py
@@ -0,0 +1,355 @@
+"""Tests for the nested category plugin discovery fix (issue #41066).
+
+Verifies that _discover_all_plugins() recurses into category directories
+(up to 2 levels deep) and that _plugin_status() checks both manifest name
+and path-derived key against the enabled/disabled sets.
+"""
+
+import json
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_plugin_dir(parent: Path, name: str, manifest: dict) -> Path:
+ """Create a minimal plugin directory with a plugin.yaml."""
+ d = parent / name
+ d.mkdir(parents=True, exist_ok=True)
+ import yaml
+ (d / "plugin.yaml").write_text(yaml.dump(manifest), encoding="utf-8")
+ (d / "__init__.py").write_text("def register(ctx): pass\n", encoding="utf-8")
+ return d
+
+
+def _make_category_plugin(
+ parent: Path, category: str, name: str, manifest: dict
+) -> Path:
+ """Create a category-namespaced plugin: ///plugin.yaml."""
+ return _make_plugin_dir(parent / category, name, manifest)
+
+
+# ---------------------------------------------------------------------------
+# _read_manifest_info
+# ---------------------------------------------------------------------------
+
+
+class TestReadManifestInfo:
+ def test_flat_plugin(self, tmp_path):
+ from hermes_cli.plugins_cmd import _read_manifest_info
+
+ d = _make_plugin_dir(tmp_path, "my-plugin", {
+ "name": "my-plugin", "version": "1.0.0", "description": "test"
+ })
+ result = _read_manifest_info(d, "")
+ assert result is not None
+ name, version, description, key = result
+ assert name == "my-plugin"
+ assert version == "1.0.0"
+ assert description == "test"
+ assert key == "my-plugin" # flat: key == name
+
+ def test_category_plugin(self, tmp_path):
+ from hermes_cli.plugins_cmd import _read_manifest_info
+
+ d = _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "2.0.0", "description": "search"
+ })
+ result = _read_manifest_info(d, "web")
+ assert result is not None
+ name, version, description, key = result
+ assert name == "web-tavily" # manifest name
+ assert key == "web/tavily" # path-derived key
+
+ def test_no_manifest(self, tmp_path):
+ from hermes_cli.plugins_cmd import _read_manifest_info
+
+ d = tmp_path / "empty-dir"
+ d.mkdir()
+ assert _read_manifest_info(d, "") is None
+
+ def test_yml_extension(self, tmp_path):
+ from hermes_cli.plugins_cmd import _read_manifest_info
+
+ d = tmp_path / "my-plugin"
+ d.mkdir()
+ import yaml
+ (d / "plugin.yml").write_text(yaml.dump({"name": "my-plugin"}), encoding="utf-8")
+ result = _read_manifest_info(d, "")
+ assert result is not None
+ assert result[0] == "my-plugin"
+
+
+# ---------------------------------------------------------------------------
+# _discover_all_plugins — recursive discovery
+# ---------------------------------------------------------------------------
+
+
+class TestDiscoverAllPlugins:
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_flat_plugins_still_discovered(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ _make_plugin_dir(tmp_path, "disk-cleanup", {
+ "name": "disk-cleanup", "version": "1.0.0"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ entries = _discover_all_plugins()
+ keys = [e[5] for e in entries]
+ assert "disk-cleanup" in keys
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_category_plugins_discovered(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0"
+ })
+ _make_category_plugin(tmp_path, "image_gen", "openai", {
+ "name": "image-gen-openai", "version": "2.0.0"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ entries = _discover_all_plugins()
+ keys = [e[5] for e in entries]
+ assert "web/tavily" in keys
+ assert "image_gen/openai" in keys
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_mixed_flat_and_category(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ _make_plugin_dir(tmp_path, "disk-cleanup", {
+ "name": "disk-cleanup", "version": "1.0.0"
+ })
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0"
+ })
+ _make_category_plugin(tmp_path, "web", "exa", {
+ "name": "web-exa", "version": "1.0.0"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ entries = _discover_all_plugins()
+ keys = [e[5] for e in entries]
+ assert "disk-cleanup" in keys
+ assert "web/tavily" in keys
+ assert "web/exa" in keys
+ assert len(entries) == 3
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_depth_cap_at_two(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ """Plugins nested 3 levels deep should NOT be discovered."""
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ # 2 levels: should be found
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0"
+ })
+ # 3 levels: should NOT be found
+ deep = tmp_path / "a" / "b" / "c"
+ deep.mkdir(parents=True)
+ import yaml
+ (deep / "plugin.yaml").write_text(
+ yaml.dump({"name": "too-deep"}), encoding="utf-8"
+ )
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ entries = _discover_all_plugins()
+ keys = [e[5] for e in entries]
+ assert "web/tavily" in keys
+ assert "a/b/c" not in keys
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_tuple_has_six_elements(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0", "description": "search"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ entries = _discover_all_plugins()
+ assert len(entries) == 1
+ entry = entries[0]
+ assert len(entry) == 6
+ name, version, description, source, dir_path, key = entry
+ assert name == "web-tavily"
+ assert key == "web/tavily"
+ assert source == "user"
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_user_overrides_bundled_on_key_collision(self, mock_user_dir, mock_bundled_dir, tmp_path):
+ """User plugin with same key as bundled should win."""
+ from hermes_cli.plugins_cmd import _discover_all_plugins
+
+ # Simulate a bundled plugin
+ bundled_dir = tmp_path / "bundled"
+ bundled_dir.mkdir()
+ _make_plugin_dir(bundled_dir, "my-plugin", {
+ "name": "my-plugin", "version": "1.0.0"
+ })
+ # User plugin with same key
+ _make_plugin_dir(tmp_path, "my-plugin", {
+ "name": "my-plugin", "version": "2.0.0"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = bundled_dir
+
+ entries = _discover_all_plugins()
+ keys = [e[5] for e in entries]
+ assert keys.count("my-plugin") == 1
+ # User version should win
+ entry = [e for e in entries if e[5] == "my-plugin"][0]
+ assert entry[1] == "2.0.0"
+
+
+# ---------------------------------------------------------------------------
+# _plugin_status — key-aware status
+# ---------------------------------------------------------------------------
+
+
+class TestPluginStatus:
+ def test_name_in_enabled(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("my-plugin", {"my-plugin"}, set()) == "enabled"
+
+ def test_key_in_enabled(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("web-tavily", {"web/tavily"}, set(), key="web/tavily") == "enabled"
+
+ def test_name_in_disabled(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("my-plugin", set(), {"my-plugin"}) == "disabled"
+
+ def test_key_in_disabled(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("web-tavily", set(), {"web/tavily"}, key="web/tavily") == "disabled"
+
+ def test_neither_name_nor_key(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("unknown", {"other"}, set(), key="cat/unknown") == "not enabled"
+
+ def test_disabled_takes_precedence_over_enabled(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("my-plugin", {"my-plugin"}, {"my-plugin"}) == "disabled"
+
+ def test_key_disabled_takes_precedence(self):
+ from hermes_cli.plugins_cmd import _plugin_status
+ assert _plugin_status("web-tavily", {"web/tavily"}, {"web/tavily"}, key="web/tavily") == "disabled"
+
+
+# ---------------------------------------------------------------------------
+# Integration: _filter_plugin_entries with category plugins
+# ---------------------------------------------------------------------------
+
+
+class TestFilterPluginEntries:
+ def test_enabled_filter_uses_key(self):
+ from hermes_cli.plugins_cmd import _filter_plugin_entries
+
+ entries = [
+ ("web-tavily", "1.0.0", "search", "user", Path("/tmp"), "web/tavily"),
+ ("disk-cleanup", "1.0.0", "cleanup", "bundled", Path("/tmp"), "disk-cleanup"),
+ ]
+ args = MagicMock()
+ args.no_bundled = False
+ args.user = False
+ args.enabled = True
+
+ result = _filter_plugin_entries(entries, args, {"web/tavily"}, set())
+ assert len(result) == 1
+ assert result[0][5] == "web/tavily"
+
+ def test_enabled_filter_by_name_still_works(self):
+ from hermes_cli.plugins_cmd import _filter_plugin_entries
+
+ entries = [
+ ("disk-cleanup", "1.0.0", "cleanup", "bundled", Path("/tmp"), "disk-cleanup"),
+ ]
+ args = MagicMock()
+ args.no_bundled = False
+ args.user = False
+ args.enabled = True
+
+ result = _filter_plugin_entries(entries, args, {"disk-cleanup"}, set())
+ assert len(result) == 1
+
+
+# ---------------------------------------------------------------------------
+# Integration: cmd_list JSON output includes category plugins
+# ---------------------------------------------------------------------------
+
+
+class TestCmdListJson:
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_json_output_includes_category_plugins(self, mock_user_dir, mock_bundled_dir, tmp_path, capsys):
+ from hermes_cli.plugins_cmd import cmd_list
+
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0", "description": "search"
+ })
+ _make_plugin_dir(tmp_path, "disk-cleanup", {
+ "name": "disk-cleanup", "version": "2.0.0", "description": "cleanup"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ args = MagicMock()
+ args.json = True
+ args.plain = False
+ args.no_bundled = False
+ args.user = False
+ args.enabled = False
+
+ cmd_list(args)
+ captured = capsys.readouterr()
+ payload = json.loads(captured.out)
+ names = [p["name"] for p in payload]
+ assert "web-tavily" in names
+ assert "disk-cleanup" in names
+
+ @patch("hermes_cli.plugins.get_bundled_plugins_dir")
+ @patch("hermes_cli.plugins_cmd._plugins_dir")
+ def test_json_status_uses_key(self, mock_user_dir, mock_bundled_dir, tmp_path, capsys):
+ from hermes_cli.plugins_cmd import cmd_list
+
+ _make_category_plugin(tmp_path, "web", "tavily", {
+ "name": "web-tavily", "version": "1.0.0"
+ })
+ mock_user_dir.return_value = tmp_path
+ mock_bundled_dir.return_value = tmp_path / "nonexistent"
+
+ # Patch config to return web/tavily as enabled
+ with patch("hermes_cli.plugins_cmd._get_enabled_set", return_value={"web/tavily"}):
+ args = MagicMock()
+ args.json = True
+ args.plain = False
+ args.no_bundled = False
+ args.user = False
+ args.enabled = False
+
+ cmd_list(args)
+ captured = capsys.readouterr()
+ payload = json.loads(captured.out)
+ assert len(payload) == 1
+ assert payload[0]["status"] == "enabled"
From 8e71b5136be81741277b17550f53a6d6937e26a7 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Sun, 7 Jun 2026 15:38:36 +0530
Subject: [PATCH 004/174] fix(cli): paint approval/clarify/sudo/secret modal
prompts directly, not via the throttle (#41098)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In classic CLI mode the dangerous-command approval prompt (and the clarify,
sudo, and secret-capture prompts) could fail to render: the user saw
'⏱ Timeout — denying command' after 60s without ever seeing the panel,
making approvals.mode: manual unusable.
Root cause. These prompts run their wait loop on the agent/background thread:
they set modal state that a ConditionalContainer's filter reads, then call
self._invalidate() to repaint so the panel appears. _invalidate() is a
THROTTLED wrapper built for high-frequency background repaints (spinner frames,
streaming) — it (a) returns early while a SIGWINCH resize-recovery is pending,
and (b) otherwise only repaints if 250ms elapsed since the last paint. Under
either condition the modal's entry paint is silently dropped, the
ConditionalContainer never re-evaluates, and the prompt times out unseen.
The throttle never belonged on these paths. Originally the callbacks painted
with a direct self._app.invalidate() and worked; a throttle PR blanket-replaced
every invalidate (including these rare, one-shot, user-blocking modal paints)
with the throttled _invalidate(); a later commit removed an idle 1Hz repaint
that had been masking dropped modal paints, surfacing the bug. Notably the
modal KEY-BINDING handlers (↑/↓/Enter) already paint with a direct
event.app.invalidate(), never the throttle — the background-thread callbacks
were the inconsistent ones.
Fix. Add a small _paint_now() helper that paints directly (guarded for a
missing _app, exception-safe) and route the four modal paths' entry, response,
countdown, and teardown paints through it — matching the key-handler idiom.
This covers approval, clarify, sudo, and the secret-capture teardown
(_submit_secret_response, which previously used the throttled _invalidate() so
its panel could linger after submit). _invalidate() is left untouched and its
docstring now states it is for high-frequency background repaints only;
modal/interactive paints must use _paint_now()/_app.invalidate() directly. This
also fixes the resize-recovery edge case for free (a direct paint never
consults the resize guard) without a throttle-bypass flag that could be
cargo-culted onto hot paths. Countdown refresh cadence tightened 5s->1s so the
timer stays visible while waiting, and a copy-pasted duplicate countdown block
in _clarify_callback is removed.
Tests: TestModalPaintNow drives all three wait-loop callbacks on a background
thread with BOTH gates active (_resize_recovery_pending=True + a recent
_last_invalidate in the throttle window) and asserts the panel paints on entry
AND repaints on teardown; plus a secret-teardown test, a direct
_paint_now-vs-_invalidate gate test, and a no-_app safety test. Each modal test
fails if its paint is reverted to _invalidate(). 17 in-file tests pass; full
tests/cli suite green (900).
Diagnosis credit: the throttle-drop root cause was identified by @sanidhyasin
in #41116; @islam666 independently reached the same direct-invalidate approach
in #41166; original report #41098 by @jodonnel.
---
cli.py | 93 ++++++++++++++++--------
tests/cli/test_cli_approval_ui.py | 117 ++++++++++++++++++++++++++++++
2 files changed, 181 insertions(+), 29 deletions(-)
diff --git a/cli.py b/cli.py
index 000778b750f..9d51059af7b 100644
--- a/cli.py
+++ b/cli.py
@@ -3479,7 +3479,22 @@ class HermesCLI:
self._background_task_counter = 0
def _invalidate(self, min_interval: float = 0.25) -> None:
- """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
+ """Throttled UI repaint for high-frequency background updates.
+
+ Use this for spinner frames, streaming token flushes, and other
+ repaints that can fire many times per second — the throttle prevents
+ terminal blinking on slow/SSH connections, and the resize-recovery
+ guard avoids stamping footer/status-bar chrome into scrollback while a
+ SIGWINCH reflow is in flight.
+
+ Do NOT use this for user-blocking modal prompts (approval / clarify /
+ sudo). Those are rare, one-shot, user-blocking events that must paint
+ immediately; route them through ``self._app.invalidate()`` directly, the
+ same way the modal key-binding handlers already do. Sending a modal's
+ entry paint through this throttle lets an unrelated background repaint
+ within the 250ms window — or an in-flight resize — silently drop it, so
+ the prompt never renders and times out unseen (#41098).
+ """
if getattr(self, "_resize_recovery_pending", False):
return
now = time.monotonic()
@@ -3487,6 +3502,24 @@ class HermesCLI:
self._last_invalidate = now
self._app.invalidate()
+ def _paint_now(self) -> None:
+ """Immediate, unthrottled repaint for user-blocking modal prompts.
+
+ Background-thread callbacks (approval / clarify / sudo) set their modal
+ state then call this to make the panel visible at once. It deliberately
+ bypasses the ``_invalidate`` throttle and resize-recovery guard — a
+ modal the user is actively waiting on must never be dropped — mirroring
+ the direct ``event.app.invalidate()`` the modal key-binding handlers
+ already use. See ``_invalidate`` for why the throttle must not gate
+ these paints (#41098).
+ """
+ app = getattr(self, "_app", None)
+ if app is not None:
+ try:
+ app.invalidate()
+ except Exception:
+ pass
+
def _force_full_redraw(self) -> None:
"""Force a clean full-screen repaint of the prompt_toolkit UI.
@@ -11801,18 +11834,15 @@ class HermesCLI:
# Open-ended questions skip straight to freetext input
self._clarify_freetext = is_open_ended
- # Trigger prompt_toolkit repaint from this (non-main) thread
- self._invalidate()
+ # Trigger an immediate prompt_toolkit repaint from this (non-main)
+ # thread. Modal prompts must paint at once and must not be gated by the
+ # _invalidate throttle / resize guard — see _paint_now / _invalidate (#41098).
+ self._paint_now()
- # Poll for the user's response. The countdown in the hint line
- # updates on each invalidate — but frequent repaints cause visible
- # flicker in some terminals (Kitty, ghostty). We only refresh the
- # countdown every 5 s; selection changes (↑/↓) trigger instant
- # Poll for the user's response. The countdown in the hint line
- # updates on each invalidate — but frequent repaints cause visible
- # flicker in some terminals (Kitty, ghostty). We only refresh the
- # countdown every 5 s; selection changes (↑/↓) trigger instant
- # repaints via the key bindings.
+ # Poll for the user's response. The countdown in the hint line updates
+ # on each repaint; refresh it once a second so the timer stays visible
+ # while we wait. Selection changes (↑/↓) trigger instant repaints via
+ # the key bindings.
_last_countdown_refresh = _time.monotonic()
while True:
try:
@@ -11823,20 +11853,16 @@ class HermesCLI:
remaining = self._clarify_deadline - _time.monotonic()
if remaining <= 0:
break
- # Only repaint every 5 s for the countdown — avoids flicker
now = _time.monotonic()
- if now - _last_countdown_refresh >= 5.0:
+ if now - _last_countdown_refresh >= 1.0:
_last_countdown_refresh = now
- self._invalidate()
- if now - _last_countdown_refresh >= 5.0:
- _last_countdown_refresh = now
- self._invalidate()
+ self._paint_now()
# Timed out — tear down the UI and let the agent decide
self._clarify_state = None
self._clarify_freetext = False
self._clarify_deadline = 0
- self._invalidate()
+ self._paint_now()
_cprint(f"\n{_DIM}(clarify timed out after {timeout}s — agent will decide){_RST}")
return (
"The user did not provide a response within the time limit. "
@@ -11862,7 +11888,9 @@ class HermesCLI:
}
self._sudo_deadline = _time.monotonic() + timeout
- self._invalidate()
+ # Modal prompt — paint immediately, bypassing the throttle/resize guard
+ # so the prompt can't be dropped and time out unseen (#41098).
+ self._paint_now()
while True:
try:
@@ -11870,7 +11898,7 @@ class HermesCLI:
self._sudo_state = None
self._sudo_deadline = 0
self._restore_modal_input_snapshot()
- self._invalidate()
+ self._paint_now()
if result:
_cprint(f"\n{_DIM} ✓ Password received (cached for session){_RST}")
else:
@@ -11880,12 +11908,12 @@ class HermesCLI:
remaining = self._sudo_deadline - _time.monotonic()
if remaining <= 0:
break
- self._invalidate()
+ self._paint_now()
self._sudo_state = None
self._sudo_deadline = 0
self._restore_modal_input_snapshot()
- self._invalidate()
+ self._paint_now()
_cprint(f"\n{_DIM} ⏱ Timeout — continuing without sudo{_RST}")
return ""
@@ -11919,7 +11947,12 @@ class HermesCLI:
}
self._approval_deadline = _time.monotonic() + timeout
- self._invalidate()
+ # Modal prompt — paint immediately, bypassing the throttle/resize
+ # guard. A throttled paint here can be silently dropped (250ms
+ # window collision or in-flight resize), leaving the panel unseen so
+ # the command is denied on timeout without the user ever seeing it
+ # (#41098). The countdown refreshes below paint the same way.
+ self._paint_now()
_last_countdown_refresh = _time.monotonic()
while True:
@@ -11927,20 +11960,20 @@ class HermesCLI:
result = response_queue.get(timeout=1)
self._approval_state = None
self._approval_deadline = 0
- self._invalidate()
+ self._paint_now()
return result
except queue.Empty:
remaining = self._approval_deadline - _time.monotonic()
if remaining <= 0:
break
now = _time.monotonic()
- if now - _last_countdown_refresh >= 5.0:
+ if now - _last_countdown_refresh >= 1.0:
_last_countdown_refresh = now
- self._invalidate()
+ self._paint_now()
self._approval_state = None
self._approval_deadline = 0
- self._invalidate()
+ self._paint_now()
_cprint(f"\n{_DIM} ⏱ Timeout — denying command{_RST}")
return "deny"
@@ -12198,7 +12231,9 @@ class HermesCLI:
self._secret_state["response_queue"].put(value)
self._secret_state = None
self._secret_deadline = 0
- self._invalidate()
+ # Modal teardown — paint directly so the secret panel clears at once and
+ # isn't held by the _invalidate throttle/resize guard (#41098).
+ self._paint_now()
def _cancel_secret_capture(self) -> None:
self._submit_secret_response("")
diff --git a/tests/cli/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
index f086f27a9b6..df7c06a2d00 100644
--- a/tests/cli/test_cli_approval_ui.py
+++ b/tests/cli/test_cli_approval_ui.py
@@ -339,6 +339,123 @@ class TestCliApprovalUi:
assert not cli._background_tasks
+def _make_real_paint_cli_stub():
+ """A stub whose modal repaint path runs the REAL _paint_now / _invalidate.
+
+ Both gates are set adversarially: _resize_recovery_pending=True and a recent
+ _last_invalidate inside the throttle window. A throttled _invalidate() would
+ be dropped under these conditions — _paint_now must paint regardless.
+ """
+ cli = HermesCLI.__new__(HermesCLI)
+ cli._approval_state = None
+ cli._approval_deadline = 0
+ cli._approval_lock = threading.Lock()
+ cli._sudo_state = None
+ cli._sudo_deadline = 0
+ cli._clarify_state = None
+ cli._clarify_freetext = False
+ cli._clarify_deadline = 0
+ cli._modal_input_snapshot = None
+ # Real methods, not mocks.
+ cli._paint_now = HermesCLI._paint_now.__get__(cli, HermesCLI)
+ cli._invalidate = HermesCLI._invalidate.__get__(cli, HermesCLI)
+ cli._resize_recovery_pending = True # gate 1: resize in flight
+ cli._last_invalidate = time.monotonic() # gate 2: inside throttle window
+ cli._app = SimpleNamespace(invalidate=MagicMock(), current_buffer=_FakeBuffer())
+ return cli
+
+
+class TestModalPaintNow:
+ """Regression for #41098 — modal prompts must paint immediately.
+
+ The dangerous-command approval, clarify, and sudo prompts run their wait
+ loop on a background thread, set modal state a ConditionalContainer reads,
+ then must repaint so the panel becomes visible. They used the throttled
+ _invalidate(), whose paint is silently dropped on a 250ms window collision
+ or while a resize is pending — so the prompt timed out unseen. They now use
+ _paint_now(), which paints directly like the modal key-binding handlers.
+ """
+
+ def test_paint_now_bypasses_throttle_and_resize_guard(self):
+ cli = _make_real_paint_cli_stub()
+ # A bare _invalidate() is suppressed under both gates...
+ cli._invalidate()
+ assert not cli._app.invalidate.called
+ # ...but _paint_now() always paints.
+ cli._paint_now()
+ assert cli._app.invalidate.called
+
+ def test_paint_now_no_app_is_safe(self):
+ cli = HermesCLI.__new__(HermesCLI)
+ cli._app = None
+ cli._paint_now() # must not raise
+
+ def _drive(self, cli, target, state_attr):
+ result = {}
+
+ def _run():
+ result["value"] = target()
+
+ with patch.object(cli_module, "_cprint"):
+ thread = threading.Thread(target=_run, daemon=True)
+ thread.start()
+ deadline = time.time() + 2
+ while getattr(cli, state_attr) is None and time.time() < deadline:
+ time.sleep(0.01)
+ assert getattr(cli, state_attr) is not None
+ assert cli._app.invalidate.called, (
+ f"{state_attr} panel was not painted despite throttle + resize gates"
+ )
+ # Reset so we can prove the response-received teardown also repaints
+ # (the panel must clear at once, not be held by the throttle).
+ cli._app.invalidate.reset_mock()
+ getattr(cli, state_attr)["response_queue"].put(
+ "deny" if state_attr == "_approval_state" else
+ ("a" if state_attr == "_clarify_state" else "pw")
+ )
+ thread.join(timeout=2)
+ # clarify returns immediately on a response (no teardown repaint);
+ # approval and sudo repaint to tear the panel down.
+ if state_attr != "_clarify_state":
+ assert cli._app.invalidate.called, (
+ f"{state_attr} panel was not repainted on teardown"
+ )
+ assert not thread.is_alive()
+ return result["value"]
+
+ def test_approval_prompt_paints_under_both_gates(self):
+ cli = _make_real_paint_cli_stub()
+ value = self._drive(
+ cli, lambda: cli._approval_callback("rm -rf /tmp/scratch", "danger"),
+ "_approval_state",
+ )
+ assert value == "deny"
+
+ def test_clarify_prompt_paints_under_both_gates(self):
+ cli = _make_real_paint_cli_stub()
+ value = self._drive(
+ cli, lambda: cli._clarify_callback("Pick one", ["a", "b"]),
+ "_clarify_state",
+ )
+ assert value == "a"
+
+ def test_sudo_prompt_paints_under_both_gates(self):
+ cli = _make_real_paint_cli_stub()
+ value = self._drive(cli, cli._sudo_password_callback, "_sudo_state")
+ assert value == "pw"
+
+ def test_secret_response_teardown_paints(self):
+ """_submit_secret_response tears the secret panel down via _paint_now,
+ so the panel clears immediately rather than being held by the throttle."""
+ cli = _make_real_paint_cli_stub()
+ cli._secret_state = {"response_queue": queue.Queue()}
+ cli._secret_deadline = 0
+ cli._submit_secret_response("hunter2")
+ assert cli._secret_state is None
+ assert cli._app.invalidate.called
+ assert cli._secret_state is None # cleared
+
+
class TestApprovalCallbackThreadLocalWiring:
"""Regression guard for the thread-local callback freeze (#13617 / #13618).
From cadb74adad3cf7ba2e77258b9094e244d9de4a49 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 07:57:26 -0700
Subject: [PATCH 005/174] fix(desktop): recover chat after sleep/wake by
revalidating a stale remote backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After sleep/wake, a remote (global-remote) primary backend can become
unreachable, but it has no child process whose 'exit' clears the main
process's cached connectionPromise. The renderer then re-dials the same
dead remote forever and the composer stays stuck on "Starting Hermes…";
only a quit+reopen recovered.
Fix: the renderer's existing backoff-paced reconnect loop now asks the
main process to revalidate the cached connection before re-dialing. The
main process liveness-probes the cached REMOTE backend's public
/api/status and, if unreachable, drops the cache (resetHermesConnection
only nulls connectionPromise for a remote — no child to SIGTERM) so the
next getConnection() rebuilds a reachable descriptor. Local backends are
never touched here; they self-heal via the child 'exit' handler. The
renderer's loop already provides retry pacing and rides out transient
blips, so no streak/episode bookkeeping is needed in the main process.
The boot hook dismisses the boot-progress overlay on the post-rebuild
'open' so an in-place rebuild can't leave it stuck at ~94%.
Reimplements #40135 by @AlchemistChaos on a smaller, more interpretable
path (63 added lines vs 555): no extracted helper module, no
failure-streak / episode-window state, the renderer's backoff loop is
the retry mechanism. Original diagnosis and fix by @AlchemistChaos.
Co-authored-by: AlchemistChaos
---
apps/desktop/electron/main.cjs | 39 +++++++++++++++++++
apps/desktop/electron/preload.cjs | 1 +
.../src/app/gateway/hooks/use-gateway-boot.ts | 16 ++++++++
apps/desktop/src/global.d.ts | 7 ++++
4 files changed, 63 insertions(+)
diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs
index d874d7991d9..32634e3ac41 100644
--- a/apps/desktop/electron/main.cjs
+++ b/apps/desktop/electron/main.cjs
@@ -4737,6 +4737,45 @@ function createWindow() {
}
ipcMain.handle('hermes:connection', async (_event, profile) => ensureBackend(profile))
+// Reconnect-after-wake recovery. A REMOTE primary backend has no child process,
+// so the 'exit'/'error' handlers that would clear a dead connectionPromise never
+// fire — once the remote becomes unreachable across a sleep/wake the renderer
+// re-dials the same dead descriptor forever and the composer stays stuck on
+// "Starting Hermes…". Before the renderer's backoff loop reconnects, it asks us
+// to confirm the cached PRIMARY backend is still reachable; if a remote one is
+// not, we drop the cache so the next getConnection() rebuilds it. Local backends
+// self-heal via their child 'exit' handler, so we never touch them here.
+ipcMain.handle('hermes:connection:revalidate', async () => {
+ if (!connectionPromise) {
+ return { ok: true, rebuilt: false }
+ }
+
+ let conn = null
+ try {
+ conn = await connectionPromise
+ } catch {
+ // The cached boot already rejected (its own catch nulls connectionPromise);
+ // nothing to revalidate — the next getConnection() builds fresh.
+ return { ok: true, rebuilt: false }
+ }
+
+ if (!conn || conn.mode !== 'remote' || !conn.baseUrl) {
+ return { ok: true, rebuilt: false }
+ }
+
+ const base = conn.baseUrl.replace(/\/+$/, '')
+ try {
+ await fetchPublicJson(`${base}/api/status`, { timeoutMs: 2_500 })
+ return { ok: true, rebuilt: false }
+ } catch {
+ // Unreachable remote: drop the stale cache so the renderer's next reconnect
+ // tick rebuilds a fresh, reachable descriptor. resetHermesConnection only
+ // nulls connectionPromise for a remote (no child to SIGTERM).
+ rememberLog('Cached remote Hermes backend failed liveness probe; dropping stale connection.')
+ resetHermesConnection()
+ return { ok: true, rebuilt: true }
+ }
+})
ipcMain.handle('hermes:backend:touch', async (_event, profile) => {
touchPoolBackend(profile)
return { ok: true }
diff --git a/apps/desktop/electron/preload.cjs b/apps/desktop/electron/preload.cjs
index 27bc1b20b53..cf094e751c3 100644
--- a/apps/desktop/electron/preload.cjs
+++ b/apps/desktop/electron/preload.cjs
@@ -2,6 +2,7 @@ const { contextBridge, ipcRenderer, webUtils } = require('electron')
contextBridge.exposeInMainWorld('hermesDesktop', {
getConnection: profile => ipcRenderer.invoke('hermes:connection', profile),
+ revalidateConnection: () => ipcRenderer.invoke('hermes:connection:revalidate'),
touchBackend: profile => ipcRenderer.invoke('hermes:backend:touch', profile),
getGatewayWsUrl: profile => ipcRenderer.invoke('hermes:gateway:ws-url', profile),
getBootProgress: () => ipcRenderer.invoke('hermes:boot-progress:get'),
diff --git a/apps/desktop/src/app/gateway/hooks/use-gateway-boot.ts b/apps/desktop/src/app/gateway/hooks/use-gateway-boot.ts
index db43c41a89f..b9bfbf021e9 100644
--- a/apps/desktop/src/app/gateway/hooks/use-gateway-boot.ts
+++ b/apps/desktop/src/app/gateway/hooks/use-gateway-boot.ts
@@ -120,6 +120,13 @@ export function useGatewayBoot({
reconnecting = true
try {
+ // Drop a stale REMOTE backend cache before re-dialing. After sleep/wake a
+ // remote backend can become unreachable, but it has no child process
+ // whose 'exit' would clear the main process's cached descriptor — without
+ // this the renderer re-dials the same dead endpoint forever and stays on
+ // "Starting Hermes…". The probe is a no-op for a healthy or local backend.
+ await desktop.revalidateConnection?.().catch(() => undefined)
+
const conn = await desktop.getConnection($activeGatewayProfile.get())
if (cancelled) {
@@ -218,6 +225,15 @@ export function useGatewayBoot({
reconnectAttempt = 0
reauthNotified = false
clearReconnectTimer()
+
+ // A revalidate-driven reconnect can rebuild the backend in place when the
+ // cached remote was found dead, which re-drives the boot-progress overlay.
+ // Unlike the initial boot, nothing calls completeDesktopBoot() afterwards,
+ // so dismiss it here once we're open again — otherwise the overlay sticks
+ // at ~94%. A no-op on a normal (non-rebuild) reconnect.
+ if (bootCompleted) {
+ completeDesktopBoot()
+ }
} else if (bootCompleted && (st === 'closed' || st === 'error')) {
// The socket dropped after a healthy boot (typically sleep/wake). Try
// to bring it back instead of leaving the composer stuck disabled.
diff --git a/apps/desktop/src/global.d.ts b/apps/desktop/src/global.d.ts
index aff578ac502..213fe5c08d5 100644
--- a/apps/desktop/src/global.d.ts
+++ b/apps/desktop/src/global.d.ts
@@ -7,6 +7,13 @@ declare global {
// the window's backend; pass a named profile to lazily spawn/reuse that
// profile's backend from the pool.
getConnection: (profile?: string | null) => Promise
+ // Reconnect-after-wake recovery: liveness-probe the cached PRIMARY backend
+ // and drop it if a remote one has gone unreachable, so the next
+ // getConnection() rebuilds a reachable descriptor instead of the renderer
+ // re-dialing a dead remote forever. No-op for local backends (they
+ // self-heal via the child 'exit' handler). `rebuilt` is true when a stale
+ // remote cache was dropped.
+ revalidateConnection: () => Promise<{ ok: boolean; rebuilt: boolean }>
// Keepalive: mark a pool profile backend as recently used so the idle
// reaper spares it while its chat is active.
touchBackend: (profile?: string | null) => Promise<{ ok: boolean }>
From 1c7ae46f0eb1551acf9b4974d2e8daef453080db Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 07:58:09 -0700
Subject: [PATCH 006/174] chore(release): map AlchemistChaos co-author email
for #40135 salvage
---
scripts/release.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/release.py b/scripts/release.py
index 08fe0b04741..40c4e33e69e 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
+ "alchemistchaos@protonmail.com": "AlchemistChaos", # co-author only
"yusufalweshdemir@gmail.com": "Dusk1e",
"804436395@qq.com": "LaPhilosophie",
"maxmitcham@mac.home": "maxtrigify",
From e029b7597bdfa8d0445e6e59584386363e4c5a55 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:29:32 -0700
Subject: [PATCH 007/174] feat(desktop): stop the chat viewport from following
streaming output (#41414)
The desktop chat GUI pinned the viewport to the bottom on every content
growth while a turn streamed, so the window chased tokens as they arrived.
Remove that follow behavior: once a turn is running the viewport stays
exactly where the user left it.
- Delete the streaming ResizeObserver re-pin loop in useThreadScrollAnchor.
- Delete the post-run bottom lock (kept pinning ~1.2s after completion).
- Keep the one-time jump-to-bottom on user submit / new turn / session
change so a freshly submitted message still lands in view.
- Update streaming.test.tsx to assert the viewport no longer follows
streaming growth or snaps down on final code-highlight remeasure.
---
.../assistant-ui/streaming.test.tsx | 15 ++-
.../assistant-ui/thread-virtualizer.tsx | 120 ++++--------------
2 files changed, 38 insertions(+), 97 deletions(-)
diff --git a/apps/desktop/src/components/assistant-ui/streaming.test.tsx b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
index 2c4095eb741..c15b4696a21 100644
--- a/apps/desktop/src/components/assistant-ui/streaming.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
@@ -489,7 +489,7 @@ describe('assistant-ui streaming renderer', () => {
expect(viewport.scrollTop).toBe(420)
})
- it('keeps sticky-bottom armed through viewport height changes during streaming', async () => {
+ it('does not follow streaming content growth even while parked at the bottom', async () => {
const { container } = render()
const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
@@ -508,6 +508,7 @@ describe('assistant-ui streaming renderer', () => {
await wait(80)
+ // Park the user at the bottom of the current content.
await act(async () => {
viewport.scrollTop = 800
fireEvent.scroll(viewport)
@@ -520,6 +521,9 @@ describe('assistant-ui streaming renderer', () => {
fireEvent.scroll(viewport)
})
+ // Content grows as tokens stream in. Streaming auto-follow is removed, so
+ // the viewport must NOT chase the new bottom — it stays where the user
+ // last left it.
scrollHeight = 1_200
await act(async () => {
@@ -529,7 +533,7 @@ describe('assistant-ui streaming renderer', () => {
})
await wait(0)
- expect(viewport.scrollTop).toBe(1_200)
+ expect(viewport.scrollTop).toBe(760)
})
it('honors the first upward wheel scroll even when a programmatic bottom-pin scroll event is still pending', async () => {
@@ -566,7 +570,7 @@ describe('assistant-ui streaming renderer', () => {
expect(viewport.scrollTop).toBe(420)
})
- it('keeps following final code-highlight growth when a run completes at bottom', async () => {
+ it('does not snap to the bottom on final code-highlight growth after a run completes', async () => {
const { container } = render()
const content = container.querySelector('[data-slot="aui_thread-content"]') as HTMLDivElement
@@ -588,10 +592,13 @@ describe('assistant-ui streaming renderer', () => {
await wait(650)
+ // Completion re-measures (Shiki highlight) and grows the content. The
+ // post-run bottom lock is removed, so the viewport stays put instead of
+ // snapping to the new bottom.
scrollHeight = 1_700
await wait(0)
- expect(viewport.scrollTop).toBe(1_700)
+ expect(viewport.scrollTop).toBe(800)
})
it('does not restart bottom-follow after completion when the user scrolled up', async () => {
diff --git a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
index 7922c3870db..e0c6df42937 100644
--- a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
@@ -19,7 +19,6 @@ import { setThreadScrolledUp } from '@/store/thread-scroll'
const ESTIMATED_ITEM_HEIGHT = 220
const OVERSCAN = 4
const AT_BOTTOM_THRESHOLD = 4
-const POST_RUN_BOTTOM_LOCK_MS = 1_200
type ThreadMessageComponents = ComponentProps['components']
@@ -369,51 +368,15 @@ function useThreadScrollAnchor({
}
}, [scrollerRef, stickyBottomRef])
- // Follow content growth (streaming, item measurements, loading indicator)
- // while armed. During fast streaming the ResizeObserver can fire many
- // times per frame as Streamdown re-tokenizes; coalesce to one pin per
- // animation frame so we don't run the scroll-event/re-pin chain
- // (~20+ ms self in `Virtualizer.getMaxScrollOffset`) several times per
- // token.
- useEffect(() => {
- if (!enabled || !isRunning) {
- return undefined
- }
-
- const el = scrollerRef.current
-
- if (!el) {
- return undefined
- }
-
- let pinRafScheduled = false
-
- const schedulePin = () => {
- if (pinRafScheduled || !stickyBottomRef.current) {
- return
- }
-
- pinRafScheduled = true
- requestAnimationFrame(() => {
- pinRafScheduled = false
-
- if (stickyBottomRef.current) {
- pinToBottom()
- }
- })
- }
-
- const observer = new ResizeObserver(schedulePin)
-
- // Observe ONLY the content (firstElementChild), not the scroller `el`
- // itself. Resizes of the viewport/scroller (window resize, devtools
- // panel toggle) shouldn't trigger a pin — only content growth should.
- if (el.firstElementChild) {
- observer.observe(el.firstElementChild)
- }
-
- return () => observer.disconnect()
- }, [enabled, isRunning, pinToBottom, scrollerRef, stickyBottomRef])
+ // Intentionally NO streaming auto-follow. Earlier builds ran a
+ // ResizeObserver here that re-pinned the viewport to the bottom on every
+ // content growth while a turn was running, so the chat tracked tokens as
+ // they streamed. That behavior is removed by request: once a turn is in
+ // flight the viewport stays exactly where the user left it. The viewport
+ // is still moved to the bottom ONCE per user submit / new turn / session
+ // change (see the layout effect and the session-change effect below) so a
+ // freshly submitted message lands in view — but it does not chase the
+ // stream afterward.
// Jump to bottom on session change OR when an empty thread first gets
// content. Both share the same intent and the same effect.
@@ -429,22 +392,21 @@ function useThreadScrollAnchor({
}
}, [enabled, groupCount, jumpToBottom, sessionKey])
- // Pre-paint pin: when groupCount increases while armed (optimistic user
- // message insert, streaming assistant turn arriving, etc.), pin BEFORE
- // the browser commits the layout to screen. Using useLayoutEffect rather
- // than useEffect so this runs synchronously after React commits the DOM
- // mutation but before the browser paints. Without this, there's a ~50ms
- // visual window where the new message sits below the fold while we wait
- // for the ResizeObserver / scroll event chain to fire and re-pin.
+ // Pre-paint pin: when groupCount increases while armed (a new turn arriving
+ // from the user submit or assistant turn start), pin BEFORE the browser
+ // commits the layout to screen. Using useLayoutEffect rather than useEffect
+ // so this runs synchronously after React commits the DOM mutation but before
+ // the browser paints. Without this, there's a ~50ms visual window where the
+ // new message sits below the fold.
//
// We pin TWICE in this critical path — once synchronously, then once on
// the next rAF. The second pin catches the case where React mounts the
// new message in the second commit (after our layout effect ran), which
// grows scrollHeight again; without the rAF pin the user briefly sees a
- // ~15 px gap below the new message until the RO catches up. Streaming
- // tokens use the rate-limited RO path only; only the group-count change
- // (which fires once per user submit / new turn arrival) pays for the
- // extra pin.
+ // ~15 px gap below the new message. This fires once per user submit / new
+ // turn arrival — it is NOT streaming-token follow (that path is removed
+ // above), so a turn that streams a long response after this initial jump
+ // will not chase the bottom.
const prevGroupCountForLayoutRef = useRef(groupCount)
useLayoutEffect(() => {
if (!enabled) {
@@ -468,45 +430,17 @@ function useThreadScrollAnchor({
prevGroupCountForLayoutRef.current = groupCount
}, [enabled, groupCount, pinToBottom, stickyBottomRef])
- // Completion swaps streaming placeholders/plain code for final rendered DOM
- // (notably Shiki-highlighted code). Keep following the bottom briefly after
- // `isRunning` flips false so that final measurement pass cannot strand the
- // viewport near the top of a large code block.
+ // Intentionally NO post-run bottom lock. Earlier builds kept pinning to
+ // the bottom for POST_RUN_BOTTOM_LOCK_MS after `isRunning` flipped false to
+ // chase final Shiki re-highlight measurement. With streaming follow gone,
+ // re-pinning at completion would yank the viewport back to the bottom even
+ // though the user is reading earlier content — the opposite of what's
+ // wanted. The one-time submit / new-turn jump already covers landing a
+ // fresh message in view.
const prevIsRunningForLayoutRef = useRef(isRunning)
useLayoutEffect(() => {
- const finishedRun = prevIsRunningForLayoutRef.current && !isRunning
prevIsRunningForLayoutRef.current = isRunning
-
- if (!enabled || !finishedRun || !stickyBottomRef.current) {
- return undefined
- }
-
- const lockUntil = performance.now() + POST_RUN_BOTTOM_LOCK_MS
- let lockRaf: number | null = null
-
- const lockFrame = () => {
- lockRaf = null
-
- if (!stickyBottomRef.current) {
- return
- }
-
- pinToBottom()
-
- if (performance.now() < lockUntil) {
- lockRaf = requestAnimationFrame(lockFrame)
- }
- }
-
- pinToBottom()
- lockRaf = requestAnimationFrame(lockFrame)
-
- return () => {
- if (lockRaf !== null) {
- cancelAnimationFrame(lockRaf)
- }
- }
- }, [enabled, isRunning, pinToBottom, stickyBottomRef])
+ }, [isRunning])
useAuiEvent('thread.runStart', jumpToBottom)
}
From dde9c0d19d1609cb4d70dadc89c76659a1004e08 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:29:55 -0700
Subject: [PATCH 008/174] feat(gateway): render terminal tool calls as native
bash code blocks on markdown platforms (#41215)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Tool-progress now shows a terminal command in a ```bash fenced block —
full command, no surrounding quotes, no label, no 40-char truncation —
instead of the noisy `terminal: "cmd…"` line, on every platform that
renders markdown code blocks (Telegram, Slack, Matrix, WhatsApp, Feishu,
Weixin, Discord). Plain-text platforms keep the compact preview line.
Gated on a new `BasePlatformAdapter.supports_code_blocks` capability
(default False) rather than a hardcoded platform list, so plugin adapters
(Discord lives in plugins/platforms/) opt in by setting the flag. Applies
to both all/new and verbose progress modes, with a safe fallback when the
command arg is missing or blank.
---
gateway/platforms/base.py | 9 ++++++++-
gateway/platforms/feishu.py | 2 ++
gateway/platforms/matrix.py | 2 ++
gateway/platforms/slack.py | 1 +
gateway/platforms/telegram.py | 1 +
gateway/platforms/weixin.py | 2 ++
gateway/platforms/whatsapp.py | 1 +
gateway/run.py | 28 ++++++++++++++++++++++++++--
plugins/platforms/discord/adapter.py | 1 +
9 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 0ddcc1e8cb6..adac5fad2a7 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1792,7 +1792,14 @@ class BasePlatformAdapter(ABC):
- Sending messages/responses
- Handling media
"""
-
+
+ # Whether this platform renders triple-backtick fenced code blocks (i.e.
+ # ``format_message`` translates/preserves markdown fences into a real code
+ # block). Drives presentation choices like rendering a ``terminal`` tool
+ # call's command as a ```bash block instead of a flat preview line.
+ # Default False (plain-text platforms); markdown-rendering adapters set True.
+ supports_code_blocks: bool = False
+
def __init__(self, config: PlatformConfig, platform: Platform):
self.config = config
self.platform = platform
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index b361ebc8cfc..4814107bacd 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -1409,6 +1409,8 @@ def check_feishu_requirements() -> bool:
class FeishuAdapter(BasePlatformAdapter):
"""Feishu/Lark bot adapter."""
+ supports_code_blocks = True # Feishu renders fenced code blocks
+
MAX_MESSAGE_LENGTH = 8000
# Max distinct chat IDs retained in _chat_locks before LRU eviction kicks in.
CHAT_LOCK_MAX_SIZE: int = 1000
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index a649bb91e59..e885afc9337 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -420,6 +420,8 @@ class _CryptoStateStore:
class MatrixAdapter(BasePlatformAdapter):
"""Gateway adapter for Matrix (any homeserver)."""
+ supports_code_blocks = True # Matrix renders fenced code blocks (HTML/markdown)
+
# Threshold for detecting Matrix client-side message splits.
# When a chunk is near the ~4000-char practical limit, a continuation
# is almost certain.
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 46068ca20ea..6754e21fb75 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -317,6 +317,7 @@ class SlackAdapter(BasePlatformAdapter):
"""
MAX_MESSAGE_LENGTH = 39000 # Slack API allows 40,000 chars; leave margin
+ supports_code_blocks = True # Slack mrkdwn renders fenced code blocks
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.SLACK)
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index d2b425b52b9..ea19bba8016 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -344,6 +344,7 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram message limits
MAX_MESSAGE_LENGTH = 4096
+ supports_code_blocks = True # Telegram MarkdownV2 renders fenced code blocks
# Threshold for detecting Telegram client-side message splits.
# When a chunk is near this limit, a continuation is almost certain.
_SPLIT_THRESHOLD = 4000
diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py
index 73e9e68ea70..adb6d21a0e0 100644
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@@ -1138,6 +1138,8 @@ async def qr_login(
class WeixinAdapter(BasePlatformAdapter):
"""Native Hermes adapter for Weixin personal accounts."""
+ supports_code_blocks = True # Weixin renders fenced code blocks
+
MAX_MESSAGE_LENGTH = 2000
# WeChat does not support editing sent messages — streaming must use the
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 7ece37dbca5..59392201150 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -242,6 +242,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
# WhatsApp message limits — practical UX limit, not protocol max.
# WhatsApp allows ~65K but long messages are unreadable on mobile.
MAX_MESSAGE_LENGTH = 4096
+ supports_code_blocks = True # WhatsApp renders fenced code blocks (monospace)
DEFAULT_REPLY_PREFIX = "⚕ *Hermes Agent*\n────────────\n"
# Default bridge location relative to the hermes-agent install
diff --git a/gateway/run.py b/gateway/run.py
index 14dc362a4da..08c6a35cda5 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -17339,10 +17339,32 @@ class GatewayRunner:
# Build progress message with primary argument preview
from agent.display import get_tool_emoji
emoji = get_tool_emoji(tool_name, default="⚙️")
+
+ # Markdown-capable platforms render a terminal command as a native
+ # ```bash fenced block (full command, no quotes, no label, no
+ # truncation) instead of the noisy `terminal: "cmd…"` line. Gated
+ # on the adapter's ``supports_code_blocks`` capability so every
+ # markdown-rendering platform (and plugin adapters that opt in) gets
+ # it, while plain-text platforms keep the compact line.
+ _bash_block = None
+ try:
+ _progress_adapter = self.adapters.get(source.platform)
+ except Exception:
+ _progress_adapter = None
+ if (
+ getattr(_progress_adapter, "supports_code_blocks", False)
+ and tool_name == "terminal"
+ and isinstance(args, dict)
+ and isinstance(args.get("command"), str)
+ and args["command"].strip()
+ ):
+ _bash_block = f"```bash\n{args['command'].rstrip()}\n```"
# Verbose mode: show detailed arguments, respects tool_preview_length
if progress_mode == "verbose":
- if args:
+ if _bash_block is not None:
+ msg = _bash_block
+ elif args:
from agent.display import get_tool_preview_max_len
_pl = get_tool_preview_max_len()
args_str = json.dumps(args, ensure_ascii=False, default=str)
@@ -17362,7 +17384,9 @@ class GatewayRunner:
# "all" / "new" modes: short preview, respects tool_preview_length
# config (defaults to 40 chars when unset to keep gateway messages
# compact — unlike CLI spinners, these persist as permanent messages).
- if preview:
+ if _bash_block is not None:
+ msg = _bash_block
+ elif preview:
from agent.display import get_tool_preview_max_len
_pl = get_tool_preview_max_len()
_cap = _pl if _pl > 0 else 40
diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py
index 3d97274ea48..1cf33020e7b 100644
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@@ -573,6 +573,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Discord message limits
MAX_MESSAGE_LENGTH = 2000
_SPLIT_THRESHOLD = 1900 # near the 2000-char split point
+ supports_code_blocks = True # Discord markdown renders fenced code blocks natively
# Auto-disconnect from voice channel after this many seconds of inactivity
VOICE_TIMEOUT = 300
From 09d66037f8f7bc5bd879ed8128273fb6780a009f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:41:10 -0700
Subject: [PATCH 009/174] fix(hindsight): send only new-turn delta on append
retains instead of whole session (#40605)
Closes #40503.
Salvaged from #40519; re-verified on main, tightened, tested.
Co-authored-by: skylarbpayne
---
plugins/memory/hindsight/__init__.py | 36 ++++++++++---
.../plugins/memory/test_hindsight_provider.py | 53 +++++++++++++++++--
2 files changed, 80 insertions(+), 9 deletions(-)
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 2f94c08da38..53f422b2d7c 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -575,6 +575,10 @@ class HindsightMemoryProvider(MemoryProvider):
self._retain_context = "conversation between Hermes Agent and the User"
self._turn_counter = 0
self._session_turns: list[str] = [] # accumulates ALL turns for the session
+ # How many turns the last append-mode retain already shipped. Used to
+ # send only the new delta on subsequent retains when the API supports
+ # update_mode='append' (legacy/overwrite path still sends everything).
+ self._last_retained_turn_count = 0
# Recall controls
self._auto_recall = True
@@ -1119,6 +1123,7 @@ class HindsightMemoryProvider(MemoryProvider):
self._agent_workspace = str(kwargs.get("agent_workspace") or "").strip()
self._turn_index = 0
self._session_turns = []
+ self._last_retained_turn_count = 0
self._mode = self._config.get("mode", "cloud")
# Read timeout from config or env var, fall back to default
self._timeout = _parse_int_setting(
@@ -1461,9 +1466,24 @@ class HindsightMemoryProvider(MemoryProvider):
self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
return
- logger.debug("sync_turn: retaining %d turns, total session content %d chars",
- len(self._session_turns), sum(len(t) for t in self._session_turns))
- content = "[" + ",".join(self._session_turns) + "]"
+ document_id, update_mode = self._resolve_retain_target(self._document_id)
+
+ # On append-capable APIs each retain only needs to ship the turns
+ # accumulated since the last retain — the server appends them to the
+ # existing document. On legacy/overwrite APIs we must resend the whole
+ # session because each retain replaces the document.
+ if update_mode == "append":
+ turns_to_retain = self._session_turns[self._last_retained_turn_count:]
+ if not turns_to_retain:
+ logger.debug("sync_turn: skipped append retain; no new turns since last retain")
+ return
+ else:
+ turns_to_retain = list(self._session_turns)
+
+ logger.debug("sync_turn: retaining %d/%d turns, payload %d chars",
+ len(turns_to_retain), len(self._session_turns),
+ sum(len(t) for t in turns_to_retain))
+ content = "[" + ",".join(turns_to_retain) + "]"
lineage_tags: list[str] = []
if self._session_id:
@@ -1474,11 +1494,10 @@ class HindsightMemoryProvider(MemoryProvider):
# Snapshot the state needed for the retain. The writer may run after
# _session_turns / _turn_index are mutated by a later sync_turn().
metadata_snapshot = self._build_metadata(
- message_count=len(self._session_turns) * 2,
+ message_count=len(turns_to_retain) * 2,
turn_index=self._turn_index,
)
- num_turns = len(self._session_turns)
- document_id, update_mode = self._resolve_retain_target(self._document_id)
+ num_turns = len(turns_to_retain)
bank_id = self._bank_id
retain_async_flag = self._retain_async
retain_context = self._retain_context
@@ -1509,6 +1528,10 @@ class HindsightMemoryProvider(MemoryProvider):
self._ensure_writer()
self._register_atexit()
self._retain_queue.put(_do_retain)
+ # Advance the append watermark only after the delta is queued, so a
+ # later retain doesn't re-ship turns we've already handed to the writer.
+ if update_mode == "append":
+ self._last_retained_turn_count = len(self._session_turns)
def get_tool_schemas(self) -> List[Dict[str, Any]]:
if self._memory_mode == "context":
@@ -1706,6 +1729,7 @@ class HindsightMemoryProvider(MemoryProvider):
self._session_turns = []
self._turn_counter = 0
self._turn_index = 0
+ self._last_retained_turn_count = 0
logger.debug(
"Hindsight on_session_switch: new_session=%s parent=%s reset=%s doc=%s",
self._session_id, self._parent_session_id, reset, self._document_id,
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index f49c227611a..a7ca66f73f4 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -780,8 +780,8 @@ class TestSyncTurn:
assert item["metadata"]["turn_index"] == "3"
assert item["metadata"]["message_count"] == "6"
- def test_sync_turn_accumulates_full_session(self, provider_with_config):
- """Each retain sends the ENTIRE session, not just the latest batch."""
+ def test_sync_turn_accumulates_full_session_without_append_support(self, provider_with_config):
+ """Legacy/overwrite APIs (no update_mode=append) resend the ENTIRE session each retain."""
p = provider_with_config(retain_every_n_turns=2)
p.sync_turn("turn1-user", "turn1-asst")
@@ -795,12 +795,59 @@ class TestSyncTurn:
p._retain_queue.join()
content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"]
- # Should contain ALL turns from the session
+ # Without append support the document is overwritten, so it must
+ # contain ALL turns from the session.
assert "turn1-user" in content
assert "turn2-user" in content
assert "turn3-user" in content
assert "turn4-user" in content
+ def test_sync_turn_appends_only_delta_when_append_supported(self, provider_with_config, monkeypatch):
+ """On append-capable APIs each retain ships only the new turns, not the whole session."""
+ monkeypatch.setattr(
+ "plugins.memory.hindsight._fetch_hindsight_api_version",
+ lambda *a, **kw: "0.5.6",
+ )
+ from plugins.memory.hindsight import _append_capability_cache, _append_capability_lock
+ # Clear before AND after: the capability cache is module-global and keyed
+ # per api_url, so a stale entry would leak into other tests.
+ with _append_capability_lock:
+ _append_capability_cache.clear()
+ try:
+ p = provider_with_config(retain_every_n_turns=2)
+
+ p.sync_turn("turn1-user", "turn1-asst")
+ p.sync_turn("turn2-user", "turn2-asst")
+ p._retain_queue.join()
+
+ first = p._client.aretain_batch.call_args.kwargs
+ first_item = first["items"][0]
+ assert first["document_id"] == "test-session"
+ assert first_item["update_mode"] == "append"
+ assert "turn1-user" in first_item["content"]
+ assert "turn2-user" in first_item["content"]
+
+ p._client.aretain_batch.reset_mock()
+
+ p.sync_turn("turn3-user", "turn3-asst")
+ p.sync_turn("turn4-user", "turn4-asst")
+ p._retain_queue.join()
+
+ second = p._client.aretain_batch.call_args.kwargs
+ second_item = second["items"][0]
+ assert second["document_id"] == "test-session"
+ assert second_item["update_mode"] == "append"
+ # Only the delta — the already-retained turns must NOT be resent.
+ assert "turn1-user" not in second_item["content"]
+ assert "turn2-user" not in second_item["content"]
+ assert "turn3-user" in second_item["content"]
+ assert "turn4-user" in second_item["content"]
+ # message_count reflects only the delta (2 turns -> 4 messages).
+ assert second_item["metadata"]["message_count"] == "4"
+ finally:
+ with _append_capability_lock:
+ _append_capability_cache.clear()
+
def test_sync_turn_passes_document_id(self, provider):
"""sync_turn should pass document_id (session_id + per-startup ts)."""
provider.sync_turn("hello", "hi")
From 2b119baac137b9348a0cf812b03c96ed8cee8296 Mon Sep 17 00:00:00 2001
From: AMIK
Date: Mon, 8 Jun 2026 05:45:27 +0500
Subject: [PATCH 010/174] docs: add Urdu translation of README (#40578)
Co-authored-by: AMIK-coorporations
---
README.md | 1 +
README.ur-pk.md | 261 ++++++++++++++++++++++++++++++++++++++++++++++++
README.zh-CN.md | 1 +
3 files changed, 263 insertions(+)
create mode 100644 README.ur-pk.md
diff --git a/README.md b/README.md
index b8fe2117147..2c587b81ac5 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,7 @@
+
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
diff --git a/README.ur-pk.md b/README.ur-pk.md
new file mode 100644
index 00000000000..100b7461a02
--- /dev/null
+++ b/README.ur-pk.md
@@ -0,0 +1,261 @@
+
+
+
+
+
+
+# ہرمیس ایجنٹ ☤ (Hermes Agent)
+
+
+
+
+
+
+
+
+
+
+**[نوس ریسرچ (Nous Research)](https://nousresearch.com) کا تیار کردہ خود کو بہتر بنانے والا اے آئی (AI) ایجنٹ۔** یہ واحد ایجنٹ ہے جس میں سیکھنے کا عمل (learning loop) پہلے سے موجود ہے — یہ اپنے تجربات سے نئی مہارتیں (skills) بناتا ہے، استعمال کے دوران ان کو بہتر کرتا ہے، معلومات کو محفوظ رکھنے کے لیے خود کو یاد دہانی کرواتا ہے، اپنی پرانی بات چیت کو تلاش کر سکتا ہے، اور مختلف سیشنز کے دوران آپ کے بارے میں ایک گہری سمجھ پیدا کرتا ہے۔ اسے $5 والے VPS پر چلائیں، GPU کلسٹر پر، یا سرور لیس (serverless) انفراسٹرکچر پر جس کی قیمت استعمال نہ ہونے پر تقریباً صفر ہے۔ یہ آپ کے لیپ ٹاپ تک محدود نہیں ہے — آپ ٹیلی گرام (Telegram) سے اس کے ساتھ بات چیت کر سکتے ہیں جبکہ یہ کلاؤڈ VM پر کام کر رہا ہو۔
+
+آپ اپنی مرضی کا کوئی بھی ماڈل استعمال کر سکتے ہیں — [Nous Portal](https://portal.nousresearch.com)، [OpenRouter](https://openrouter.ai) (200 سے زائد ماڈلز)، [NovitaAI](https://novita.ai) (ماڈل API، ایجنٹ سینڈ باکس، اور GPU کلاؤڈ کے لیے اے آئی مقامی کلاؤڈ)، [NVIDIA NIM](https://build.nvidia.com) (Nemotron)، [Xiaomi MiMo](https://platform.xiaomimimo.com)، [z.ai/GLM](https://z.ai)، [Kimi/Moonshot](https://platform.moonshot.ai)، [MiniMax](https://www.minimax.io)، [Hugging Face](https://huggingface.co)، OpenAI، یا اپنا حسب ضرورت اینڈ پوائنٹ (endpoint) استعمال کریں۔ ماڈل تبدیل کرنے کے لیے صرف `hermes model` استعمال کریں — کسی کوڈ کو تبدیل کرنے کی ضرورت نہیں، کوئی پابندی نہیں۔
+
+
+
حقیقی ٹرمینل انٹرفیس
مکمل TUI جس میں ملٹی لائن ایڈیٹنگ، سلیش-کمانڈ آٹو کمپلیٹ، بات چیت کی ہسٹری، انٹرپٹ اور ری ڈائریکٹ، اور سٹریمنگ ٹول آؤٹ پٹ شامل ہے۔
+
یہ وہاں موجود ہے جہاں آپ ہیں
ٹیلی گرام، ڈسکارڈ (Discord)، سلیک (Slack)، واٹس ایپ (WhatsApp)، سگنل (Signal)، اور CLI — سب ایک ہی گیٹ وے پروسیس سے کام کرتے ہیں۔ وائس میمو (Voice memo) ٹرانسکرپشن، کراس پلیٹ فارم بات چیت کا تسلسل۔
+
سیکھنے کا ایک مکمل عمل
ایجنٹ کی اپنی ترتیب دی گئی میموری، جس میں وہ خود کو وقتاً فوقتاً یاد دہانی کرواتا ہے۔ پیچیدہ کاموں کے بعد خود کار طریقے سے مہارت (skill) کی تخلیق۔ استعمال کے دوران مہارتوں میں بہتری۔ LLM سمرائزیشن کے ساتھ FTS5 سیشن سرچ تاکہ پرانے سیشنز کی یاددہانی کی جا سکے۔ Honcho کے ذریعے صارف کی ماڈلنگ۔ agentskills.io اوپن سٹینڈرڈ کے ساتھ مکمل مطابقت۔
+
شیڈول کی گئی خودکار کارروائیاں
بلٹ ان (Built-in) کرون (cron) شیڈیولر جو کسی بھی پلیٹ فارم پر ڈیلیوری کے لیے استعمال ہو سکتا ہے۔ روزانہ کی رپورٹس، رات کے بیک اپس، ہفتہ وار آڈٹس — یہ سب کچھ قدرتی زبان (natural language) میں اور بغیر کسی نگرانی کے کام کرتا ہے۔
+
کام کی تقسیم اور متوازی عمل
متوازی (parallel) کاموں کے لیے الگ سے ذیلی ایجنٹس (subagents) بنائیں۔ پائتھون (Python) سکرپٹس لکھیں جو RPC کے ذریعے ٹولز کو استعمال کریں، تاکہ کئی مراحل پر مشتمل کاموں کو بغیر کسی سیاق و سباق (context) کے خرچ کے، ایک ہی باری میں انجام دیا جا سکے۔
+
کہیں بھی چلائیں، صرف اپنے لیپ ٹاپ پر نہیں
چھ (Six) ٹرمینل بیک اینڈز — لوکل، Docker، SSH، Singularity، Modal، اور Daytona۔ ڈیٹونا (Daytona) اور موڈل (Modal) سرور لیس (serverless) فعالیت پیش کرتے ہیں — جب آپ کا ایجنٹ فارغ ہوتا ہے تو اس کا ماحول سلیپ (hibernate) ہو جاتا ہے اور ضرورت پڑنے پر خود بخود جاگ جاتا ہے، جس کی وجہ سے سیشنز کے درمیان لاگت تقریباً صفر رہتی ہے۔ اسے $5 والے VPS یا GPU کلسٹر پر چلائیں۔
+
تحقیق کے لیے تیار
بیچ (Batch) ٹریجیکٹری (trajectory) جنریشن، اگلی نسل کے ٹول کالنگ ماڈلز کی تربیت کے لیے ٹریجیکٹری کمپریشن۔
+
+
+---
+
+## فوری انسٹالیشن (Quick Install)
+
+### لینکس (Linux)، میک او ایس (macOS)، ڈبلیو ایس ایل ٹو (WSL2)، ٹرمکس (Termux)
+
+
+
+### ونڈوز (نیٹو، پاور شیل)
+
+> **توجہ فرمائیں:** مقامی ونڈوز (Native Windows) پر ہرمیس بغیر WSL کے چلتا ہے — CLI، گیٹ وے، TUI، اور ٹولز سب مقامی طور پر کام کرتے ہیں۔ اگر آپ WSL2 استعمال کرنا پسند کرتے ہیں، تو اوپر دی گئی لینکس/میک او ایس کی کمانڈ وہاں بھی کام کرے گی۔ کوئی مسئلہ نظر آیا؟ براہ کرم [مسائل (issues) درج کریں](https://github.com/NousResearch/hermes-agent/issues)۔
+
+اسے پاور شیل (PowerShell) میں چلائیں:
+
+
+
+انسٹالر سب کچھ خود سنبھالتا ہے: uv، Python 3.11، Node.js، ripgrep، ffmpeg، **اور ایک پورٹ ایبل (portable) گٹ بیش (Git Bash)** (یعنی MinGit، جو `%LOCALAPPDATA%\hermes\git` میں ان پیک ہوتا ہے — اس کے لیے ایڈمن کی اجازت درکار نہیں، اور یہ سسٹم کے کسی بھی گٹ انسٹال سے بالکل الگ ہے)۔ ہرمیس اس بنڈل شدہ گٹ بیش کو شیل کمانڈز چلانے کے لیے استعمال کرتا ہے۔
+
+اگر آپ کے پاس پہلے سے گٹ (Git) انسٹال ہے، تو انسٹالر اسے شناخت کر لیتا ہے اور اسے ہی استعمال کرتا ہے۔ بصورت دیگر آپ کو صرف ~45MB کے MinGit ڈاؤنلوڈ کی ضرورت ہوگی — یہ آپ کے سسٹم کے گٹ پر کوئی اثر نہیں ڈالے گا۔
+
+> **اینڈرائیڈ (Android) / ٹرمکس (Termux):** ٹیسٹ کیا گیا مینوئل طریقہ [Termux گائیڈ](https://hermes-agent.nousresearch.com/docs/getting-started/termux) میں موجود ہے۔ ٹرمکس پر ہرمیس ایک مخصوص `.[termux]` ایکسٹرا انسٹال کرتا ہے کیونکہ مکمل `.[all]` ایکسٹرا میں ایسی وائس ڈیپینڈینسیز شامل ہیں جو اینڈرائیڈ کے ساتھ مطابقت نہیں رکھتیں۔
+>
+> **ونڈوز (Windows):** مقامی ونڈوز کی مکمل سپورٹ موجود ہے — اوپر دی گئی پاور شیل کی کمانڈ سب کچھ انسٹال کر دیتی ہے۔ اگر آپ WSL2 استعمال کرنا چاہتے ہیں، تو لینکس کی کمانڈ وہاں کام کرتی ہے۔ مقامی ونڈوز میں انسٹالیشن `%LOCALAPPDATA%\hermes` میں ہوتی ہے؛ جبکہ WSL2 میں لینکس کی طرح `~/.hermes` میں ہوتی ہے۔ ہرمیس کا وہ واحد فیچر جسے فی الحال خاص طور پر WSL2 کی ضرورت ہے وہ براؤزر پر مبنی ڈیش بورڈ چیٹ پین ہے (یہ POSIX PTY استعمال کرتا ہے — کلاسک CLI اور گیٹ وے دونوں مقامی طور پر چلتے ہیں)۔
+
+انسٹالیشن کے بعد:
+
+
+
+```bash
+source ~/.bashrc # شیل کو ری لوڈ کریں (یا: source ~/.zshrc)
+hermes # بات چیت شروع کریں!
+```
+
+
+
+---
+
+## آغاز کریں (Getting Started)
+
+
+
+```bash
+hermes # انٹرایکٹو CLI — بات چیت شروع کریں
+hermes model # اپنا LLM پرووائیڈر اور ماڈل منتخب کریں
+hermes tools # کنفیگر کریں کہ کون سے ٹولز ایکٹو ہیں
+hermes config set # انفرادی کنفگ (config) ویلیوز سیٹ کریں
+hermes gateway # میسجنگ گیٹ وے شروع کریں (ٹیلی گرام، ڈسکارڈ، وغیرہ)
+hermes setup # مکمل سیٹ اپ وزرڈ چلائیں (یہ سب کچھ ایک ساتھ کنفیگر کر دے گا)
+hermes claw migrate # OpenClaw سے مائیگریٹ کریں (اگر آپ OpenClaw سے آ رہے ہیں)
+hermes update # لیٹسٹ ورژن پر اپ ڈیٹ کریں
+hermes doctor # کسی بھی مسئلے کی تشخیص کریں
+```
+
+
+
+📖 **[مکمل دستاویزات →](https://hermes-agent.nousresearch.com/docs/)**
+
+---
+
+## API-کیز اکٹھی کرنے سے بچیں — Nous Portal
+
+ہرمیس آپ کے پسندیدہ پرووائیڈر کے ساتھ کام کرتا ہے — یہ چیز تبدیل نہیں ہو رہی۔ لیکن اگر آپ ماڈل، ویب سرچ، امیج جنریشن، TTS، اور کلاؤڈ براؤزر کے لیے پانچ الگ الگ API کیز جمع نہیں کرنا چاہتے، تو **[Nous Portal](https://portal.nousresearch.com)** ان سب کو ایک ہی سبسکرپشن کے تحت کور کرتا ہے:
+
+- **300+ ماڈلز** — ان میں سے کوئی بھی ماڈل `/model ` کے ذریعے منتخب کریں
+- **ٹول گیٹ وے (Tool Gateway)** — ویب سرچ (Firecrawl)، امیج جنریشن (FAL)، ٹیکسٹ ٹو سپیچ (OpenAI)، کلاؤڈ براؤزر (Browser Use)، یہ سب آپ کی سبسکرپشن کے ذریعے چلتے ہیں۔ کسی اضافی اکاؤنٹ کی ضرورت نہیں۔
+
+نئی انسٹالیشن کے بعد بس ایک کمانڈ کی ضرورت ہے:
+
+
+
+```bash
+hermes setup --portal
+```
+
+
+
+یہ آپ کو OAuth کے ذریعے لاگ ان کرواتا ہے، Nous کو آپ کا پرووائیڈر مقرر کرتا ہے، اور ٹول گیٹ وے کو آن کر دیتا ہے۔ `hermes portal info` کمانڈ استعمال کر کے آپ کسی بھی وقت چیک کر سکتے ہیں کہ کون کون سی سروسز منسلک ہیں۔ مکمل تفصیلات [Tool Gateway دستاویزات کے صفحے](https://hermes-agent.nousresearch.com/docs/user-guide/features/tool-gateway) پر موجود ہیں۔
+
+آپ اب بھی کسی بھی ٹول کے لیے اپنی مرضی کی API کیز استعمال کر سکتے ہیں — گیٹ وے ہر سروس کے لیے الگ الگ کام کرتا ہے، ایسا نہیں کہ یا تو سب کچھ استعمال کریں یا کچھ بھی نہیں۔
+
+---
+
+## CLI بمقابلہ میسجنگ فوری حوالہ
+
+ہرمیس کے دو بنیادی انٹر فیس ہیں: آپ ٹرمینل UI کو `hermes` کے ساتھ شروع کریں، یا گیٹ وے چلا کر اس کے ساتھ ٹیلی گرام، ڈسکارڈ، سلیک، واٹس ایپ، سگنل، یا ای میل کے ذریعے بات کریں۔ جب آپ کسی بات چیت میں ہوتے ہیں، تو بہت سی سلیش (slash) کمانڈز دونوں انٹرفیسز میں ایک جیسی ہوتی ہیں۔
+
+
+
+| کارروائی (Action) | سی ایل آئی (CLI) | میسجنگ پلیٹ فارمز (Messaging platforms) |
+| --------------------------------------- | --------------------------------------------- | -------------------------------------------------------------------------------- |
+| بات چیت شروع کریں | `hermes` | `hermes gateway setup` اور `hermes gateway start` چلائیں، پھر بوٹ کو میسج بھیجیں |
+| نئی بات چیت شروع کریں | `/new` یا `/reset` | `/new` یا `/reset` |
+| ماڈل تبدیل کریں | `/model [provider:model]` | `/model [provider:model]` |
+| پرسنلٹی (Personality) سیٹ کریں | `/personality [name]` | `/personality [name]` |
+| پچھلی باری کو دوبارہ یا منسوخ (undo) کریں | `/retry`، `/undo` | `/retry`، `/undo` |
+| کانٹیکسٹ (context) کمپریس کریں / استعمال چیک کریں | `/compress`، `/usage`، `/insights [--days N]` | `/compress`، `/usage`، `/insights [days]` |
+| مہارتیں (Skills) براؤز کریں | `/skills` یا `/` | `/` |
+| موجودہ کام کو روکیں | `Ctrl+C` دبائیں یا نیا میسج بھیجیں | `/stop` یا نیا میسج بھیجیں |
+| پلیٹ فارم کے لحاظ سے سٹیٹس | `/platforms` | `/status`، `/sethome` |
+
+
+
+مکمل کمانڈ لسٹ کے لیے، [CLI گائیڈ](https://hermes-agent.nousresearch.com/docs/user-guide/cli) اور [میسجنگ گیٹ وے گائیڈ](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) دیکھیں۔
+
+---
+
+## دستاویزات (Documentation)
+
+تمام دستاویزات **[hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)** پر موجود ہیں:
+
+
+
+| سیکشن (Section) | تفصیل (What's Covered) |
+| --------------------------------------------------------------------------------------------------- | ---------------------------------------------------------- |
+| [فوری آغاز (Quickstart)](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | انسٹالیشن → سیٹ اپ → 2 منٹ میں پہلی بات چیت شروع کریں |
+| [CLI کا استعمال](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | کمانڈز، کی بائنڈنگز (keybindings)، پرسنلٹیز (personalities)، سیشنز |
+| [کنفیگریشن (Configuration)](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | کنفگ فائل، پرووائیڈرز، ماڈلز، اور تمام آپشنز |
+| [میسجنگ گیٹ وے](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | ٹیلی گرام، ڈسکارڈ، سلیک، واٹس ایپ، سگنل، ہوم اسسٹنٹ |
+| [سیکیورٹی (Security)](https://hermes-agent.nousresearch.com/docs/user-guide/security) | کمانڈ کی منظوری، DM پیئرنگ (pairing)، کنٹینر آئسولیشن |
+| [ٹولز اور ٹول سیٹس](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40 سے زائد ٹولز، ٹول سیٹ سسٹم، ٹرمینل بیک اینڈز |
+| [مہارتوں کا سسٹم (Skills System)](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills)| پروسیجرل (Procedural) میموری، سکلز ہب، نئی مہارتیں بنانا |
+| [میموری (Memory)](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory) | مستقل میموری، یوزر پروفائلز، بہترین طریقہ کار |
+| [MCP انضمام (Integration)](https://hermes-agent.nousresearch.com/docs/user-guide/features/mcp) | صلاحیتوں کو بڑھانے کے لیے کسی بھی MCP سرور کو جوڑیں |
+| [کرون (Cron) شیڈیولنگ](https://hermes-agent.nousresearch.com/docs/user-guide/features/cron) | پلیٹ فارم ڈیلیوری کے ساتھ شیڈول کیے گئے کام |
+| [کانٹیکسٹ (Context) فائلز](https://hermes-agent.nousresearch.com/docs/user-guide/features/context-files)| پروجیکٹ کا سیاق و سباق (context) جو ہر بات چیت پر اثر انداز ہوتا ہے |
+| [آرکیٹیکچر (Architecture)](https://hermes-agent.nousresearch.com/docs/developer-guide/architecture) | پروجیکٹ کا ڈھانچہ، ایجنٹ لوپ، اہم کلاسز |
+| [تعاون (Contributing)](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) | ڈیویلپمنٹ سیٹ اپ، PR کا طریقہ کار، کوڈنگ کا انداز |
+| [CLI حوالہ جات (Reference)](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) | تمام کمانڈز اور فلیگز (flags) |
+| [انوائرمنٹ ویری ایبلز](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) | مکمل انوائرمنٹ ویری ایبل حوالہ جات |
+
+
+
+---
+
+## OpenClaw سے منتقلی
+
+اگر آپ OpenClaw سے منتقل ہو رہے ہیں، تو ہرمیس آپ کی سیٹنگز، یادیں (memories)، مہارتیں (skills)، اور API کیز کو خود بخود امپورٹ کر سکتا ہے۔
+
+**پہلی بار سیٹ اپ کے دوران:** سیٹ اپ وزرڈ (`hermes setup`) خود بخود `~/.openclaw` کو پہچان لیتا ہے اور کنفیگریشن شروع ہونے سے پہلے مائیگریٹ (migrate) کرنے کا آپشن دیتا ہے۔
+
+**انسٹالیشن کے بعد کسی بھی وقت:**
+
+
+
+```bash
+hermes claw migrate # انٹرایکٹو مائیگریشن (مکمل پری سیٹ)
+hermes claw migrate --dry-run # جائزہ لیں کہ کیا کیا مائیگریٹ ہوگا
+hermes claw migrate --preset user-data # حساس معلومات (secrets) کے بغیر مائیگریٹ کریں
+hermes claw migrate --overwrite # موجودہ متصادم فائلوں کو اوور رائٹ کریں
+```
+
+
+
+جو چیزیں امپورٹ ہوتی ہیں:
+
+- **SOUL.md** — پرسونا (persona) فائل
+- **میموریز (Memories)** — MEMORY.md اور USER.md کی اندراجات
+- **مہارتیں (Skills)** — صارف کی بنائی گئی مہارتیں → `~/.hermes/skills/openclaw-imports/`
+- **کمانڈ الاؤ لسٹ (allowlist)** — منظوری کے پیٹرنز (approval patterns)
+- **میسجنگ سیٹنگز** — پلیٹ فارم کنفیگریشنز، اجازت یافتہ صارفین، ورکنگ ڈائریکٹری
+- **API کیز** — الاؤ لسٹ شدہ حساس معلومات (ٹیلی گرام، OpenRouter، OpenAI، Anthropic، ElevenLabs)
+- **TTS اثاثے** — ورک اسپیس کی آڈیو فائلیں
+- **ورک اسپیس کی ہدایات** — AGENTS.md (`--workspace-target` کے ساتھ)
+
+تمام آپشنز دیکھنے کے لیے `hermes claw migrate --help` استعمال کریں، یا انٹرایکٹو ایجنٹ کی مدد سے مائیگریٹ کرنے کے لیے `openclaw-migration` سکل کا استعمال کریں (جس میں ڈرائی رن (dry-run) پریویوز شامل ہیں)۔
+
+---
+
+## تعاون کریں (Contributing)
+
+ہم آپ کے تعاون کا خیرمقدم کرتے ہیں! ڈیویلپمنٹ سیٹ اپ، کوڈ کے انداز اور PR کے طریقہ کار کے لیے براہ کرم ہماری [Contributing گائیڈ](https://hermes-agent.nousresearch.com/docs/developer-guide/contributing) دیکھیں۔
+
+معاونین (contributors) کے لیے فوری آغاز — کلون (clone) کریں اور `setup-hermes.sh` چلائیں:
+
+
+
+```bash
+git clone https://github.com/NousResearch/hermes-agent.git
+cd hermes-agent
+./setup-hermes.sh # uv کو انسٹال کرتا ہے، venv بناتا ہے، .[all] کو انسٹال کرتا ہے، اور ~/.local/bin/hermes کا سیم لنک (symlink) بناتا ہے
+./hermes # خود بخود venv کی شناخت کرتا ہے، پہلے `source` کرنے کی ضرورت نہیں
+```
+
+
+
+---
+
+## کمیونٹی (Community)
+
+- 💬 [ڈسکارڈ (Discord)](https://discord.gg/NousResearch)
+- 📚 [سکلز ہب (Skills Hub)](https://agentskills.io)
+- 🐛 [مسائل (Issues)](https://github.com/NousResearch/hermes-agent/issues)
+- 🔌 [computer-use-linux](https://github.com/avifenesh/computer-use-linux) — ہرمیس اور دیگر MCP ہوسٹس کے لیے لینکس (Linux) ڈیسک ٹاپ کنٹرول MCP سرور، جس میں AT-SPI ایکسیسیبلٹی ٹریز، Wayland/X11 ان پٹ، سکرین شاٹس، اور کمپوزیٹر ونڈو ٹارگیٹنگ شامل ہے۔
+- 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — کمیونٹی وی چیٹ (WeChat) برج: ہرمیس ایجنٹ اور OpenClaw کو ایک ہی وی چیٹ اکاؤنٹ پر چلائیں۔
+
+---
+
+## لائسنس (License)
+
+MIT — تفصیلات کے لیے [LICENSE](LICENSE) دیکھیں۔
+
+[نوس ریسرچ (Nous Research)](https://nousresearch.com) کی جانب سے تیار کردہ۔
+
+
diff --git a/README.zh-CN.md b/README.zh-CN.md
index e40b65990f0..59b1268f81b 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -10,6 +10,7 @@
+
**由 [Nous Research](https://nousresearch.com) 构建的自进化 AI 代理。** 它是唯一内置学习闭环的智能代理——从经验中创建技能,在使用中改进技能,主动持久化知识,搜索过往对话,并在跨会话中逐步构建对你的深度理解。可以在 $5 的 VPS 上运行,也可以在 GPU 集群上运行,或者使用几乎零成本的 Serverless 基础设施。它不绑定你的笔记本——你可以在 Telegram 上与它对话,而它在云端 VM 上工作。
From cb83149dc67bbf9f12979ca4e991b8a33e359f76 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:49:38 -0700
Subject: [PATCH 011/174] fix(yuanbao): bound ws.close() so an idle server
can't stall shutdown ~5s (#40607)
Salvaged from #40421; re-verified on main, tightened, tested.
Co-authored-by: maxmilian
---
gateway/platforms/yuanbao.py | 24 ++++++-
tests/test_yuanbao_shutdown.py | 117 +++++++++++++++++++++++++++++++++
2 files changed, 139 insertions(+), 2 deletions(-)
create mode 100644 tests/test_yuanbao_shutdown.py
diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py
index 6dc54dbcd50..7c34f1453cb 100644
--- a/gateway/platforms/yuanbao.py
+++ b/gateway/platforms/yuanbao.py
@@ -120,6 +120,16 @@ AUTH_TIMEOUT_SECONDS = 10.0
MAX_RECONNECT_ATTEMPTS = 100
DEFAULT_SEND_TIMEOUT = 30.0 # WS biz request timeout
+# Upper bound on the WS close handshake during teardown (#40383). The
+# websockets connection's own close_timeout (5s) blocks until the server
+# echoes the close frame; an idle/unresponsive server never replies, stalling
+# gateway shutdown by the full timeout. Bounding the close await here keeps
+# teardown fast — a responsive server completes the handshake in well under a
+# second, so this only caps the pathological hang. Also bounds the reconnect /
+# connect-failure cleanup paths that reuse _cleanup_ws(), where a graceful
+# close is unnecessary anyway (the socket is being discarded to redial).
+WS_CLOSE_TIMEOUT_S = 1.0
+
# Close codes that indicate permanent errors — do NOT reconnect.
NO_RECONNECT_CLOSE_CODES = {4012, 4013, 4014, 4018, 4019, 4021}
@@ -3445,12 +3455,22 @@ class ConnectionManager:
return False
async def _cleanup_ws(self) -> None:
- """Close and clear the WebSocket connection."""
+ """Close and clear the WebSocket connection, bounded by
+ ``WS_CLOSE_TIMEOUT_S`` so an unresponsive server can't stall teardown
+ (see the constant's definition for the full rationale)."""
ws = self._ws
self._ws = None
if ws is not None:
try:
- await ws.close()
+ await asyncio.wait_for(ws.close(), timeout=WS_CLOSE_TIMEOUT_S)
+ except asyncio.TimeoutError:
+ # Server never echoed the close frame within the bound; drop the
+ # connection. websockets force-closes the transport on cancel,
+ # and at shutdown the loop is tearing down anyway.
+ logger.debug(
+ "[%s] WS close handshake exceeded %.1fs — dropping connection",
+ self._adapter.name, WS_CLOSE_TIMEOUT_S,
+ )
except Exception:
pass
diff --git a/tests/test_yuanbao_shutdown.py b/tests/test_yuanbao_shutdown.py
new file mode 100644
index 00000000000..be535f46c70
--- /dev/null
+++ b/tests/test_yuanbao_shutdown.py
@@ -0,0 +1,117 @@
+"""test_yuanbao_shutdown.py - Yuanbao adapter shutdown teardown timing.
+
+Regression coverage for #40383: a non-responsive Yuanbao WS server must not
+stall gateway shutdown. ``websockets`` ``ws.close()`` blocks up to the
+connection's ``close_timeout`` (5s) waiting for the server's close-frame echo;
+on an idle shutdown the server never replies, so ``_cleanup_ws`` used to wait
+the full ~5s. The cleanup path now bounds the close await so a hung server
+cannot stall teardown.
+
+These tests assert the *bounding/timing* contract of ``_cleanup_ws`` using
+lightweight fakes; force-closing the underlying TCP transport on cancellation
+is ``websockets``' responsibility (and harmless at shutdown, where the loop is
+tearing down regardless), so it is intentionally out of scope here.
+"""
+
+import sys
+import os
+import asyncio
+
+_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if _REPO_ROOT not in sys.path:
+ sys.path.insert(0, _REPO_ROOT)
+
+import pytest
+from gateway.config import PlatformConfig
+from gateway.platforms.yuanbao import (
+ YuanbaoAdapter,
+ ConnectionManager,
+ WS_CLOSE_TIMEOUT_S,
+)
+
+
+def make_config(**kwargs):
+ extra = kwargs.pop("extra", {})
+ extra.setdefault("app_id", "test_key")
+ extra.setdefault("app_secret", "test_secret")
+ extra.setdefault("ws_url", "wss://test.example.com/ws")
+ extra.setdefault("api_domain", "https://test.example.com")
+ return PlatformConfig(extra=extra, **kwargs)
+
+
+class _HangingWS:
+ """Fake WS whose close() never gets a server echo — sleeps past the bound."""
+
+ def __init__(self, sleep_s: float):
+ self._sleep_s = sleep_s
+ self.close_called = False
+
+ async def close(self):
+ self.close_called = True
+ await asyncio.sleep(self._sleep_s)
+
+
+class _FastWS:
+ """Fake WS whose close() returns promptly (responsive server)."""
+
+ def __init__(self):
+ self.close_called = False
+
+ async def close(self):
+ self.close_called = True
+
+
+class _RaisingWS:
+ async def close(self):
+ raise RuntimeError("connection already reset")
+
+
+def _connection() -> ConnectionManager:
+ return YuanbaoAdapter(make_config())._connection
+
+
+@pytest.mark.asyncio
+async def test_cleanup_ws_does_not_stall_on_hung_server():
+ """A server that never echoes the close frame must not stall teardown."""
+ cm = _connection()
+ hung = _HangingWS(sleep_s=WS_CLOSE_TIMEOUT_S + 4.0)
+ cm._ws = hung
+
+ loop = asyncio.get_running_loop()
+ start = loop.time()
+ await cm._cleanup_ws()
+ elapsed = loop.time() - start
+
+ assert hung.close_called
+ assert cm._ws is None
+ # Bounded by WS_CLOSE_TIMEOUT_S (+ small scheduling slack), not the 5s
+ # close_timeout the server would otherwise hold us to.
+ assert elapsed < WS_CLOSE_TIMEOUT_S + 1.0
+
+
+@pytest.mark.asyncio
+async def test_cleanup_ws_fast_path_returns_immediately():
+ """A responsive server completes the handshake well under the bound."""
+ cm = _connection()
+ fast = _FastWS()
+ cm._ws = fast
+
+ loop = asyncio.get_running_loop()
+ start = loop.time()
+ await cm._cleanup_ws()
+ elapsed = loop.time() - start
+
+ assert fast.close_called
+ assert cm._ws is None
+ assert elapsed < 1.0
+
+
+@pytest.mark.asyncio
+async def test_cleanup_ws_swallows_close_errors():
+ """A close() that raises must still clear the ws reference."""
+ cm = _connection()
+ cm._ws = _RaisingWS()
+
+ await cm._cleanup_ws()
+
+ assert cm._ws is None
From ae82eed2b194a5708bfecbc153637e434fc15ddb Mon Sep 17 00:00:00 2001
From: Gilad Bauman
Date: Tue, 5 May 2026 11:28:27 +0000
Subject: [PATCH 012/174] fix(gateway): use OGG for Telegram auto TTS
---
gateway/run.py | 7 +-
tests/gateway/test_auto_voice_reply_format.py | 100 ++++++++++++++++++
2 files changed, 104 insertions(+), 3 deletions(-)
create mode 100644 tests/gateway/test_auto_voice_reply_format.py
diff --git a/gateway/run.py b/gateway/run.py
index 08c6a35cda5..f643eadf4a7 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -12422,11 +12422,12 @@ class GatewayRunner:
if not tts_text:
return
- # Use .mp3 extension so edge-tts conversion to opus works correctly.
- # The TTS tool may convert to .ogg — use file_path from result.
+ # Telegram's adapter only sends native voice bubbles for OGG/Opus.
+ # Other platforms keep the existing MP3 default.
+ audio_ext = "ogg" if event.source.platform == Platform.TELEGRAM else "mp3"
audio_path = os.path.join(
tempfile.gettempdir(), "hermes_voice",
- f"tts_reply_{_uuid.uuid4().hex[:12]}.mp3",
+ f"tts_reply_{_uuid.uuid4().hex[:12]}.{audio_ext}",
)
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
diff --git a/tests/gateway/test_auto_voice_reply_format.py b/tests/gateway/test_auto_voice_reply_format.py
new file mode 100644
index 00000000000..eeb39ab60e7
--- /dev/null
+++ b/tests/gateway/test_auto_voice_reply_format.py
@@ -0,0 +1,100 @@
+"""Tests for gateway auto-TTS voice reply audio format selection."""
+
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+class TestAutoVoiceReplyFormat:
+ @pytest.mark.asyncio
+ async def test_telegram_auto_voice_reply_requests_ogg_for_native_voice_bubble(self):
+ """Telegram auto-TTS should request OGG/Opus so send_voice sends a voice bubble."""
+ runner = _make_runner()
+ adapter = _make_adapter(Platform.TELEGRAM)
+ runner.adapters[Platform.TELEGRAM] = adapter
+ event = _make_event(Platform.TELEGRAM)
+ requested_paths = []
+
+ def fake_tts(*, text, output_path):
+ requested_paths.append(output_path)
+ assert output_path.endswith(".ogg")
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+ Path(output_path).write_bytes(b"fake ogg opus")
+ return json.dumps({
+ "success": True,
+ "file_path": output_path,
+ "provider": "gemini",
+ "voice_compatible": True,
+ })
+
+ with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
+ await runner._send_voice_reply(event, "hello from auto tts")
+
+ assert requested_paths
+ assert requested_paths[0].endswith(".ogg")
+ adapter.send_voice.assert_awaited_once()
+ assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".ogg")
+
+ @pytest.mark.asyncio
+ async def test_non_telegram_auto_voice_reply_keeps_mp3_default(self):
+ """Non-Telegram platforms should keep the current MP3 default."""
+ runner = _make_runner()
+ adapter = _make_adapter(Platform.SLACK)
+ runner.adapters[Platform.SLACK] = adapter
+ event = _make_event(Platform.SLACK)
+ requested_paths = []
+
+ def fake_tts(*, text, output_path):
+ requested_paths.append(output_path)
+ assert output_path.endswith(".mp3")
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+ Path(output_path).write_bytes(b"fake mp3")
+ return json.dumps({
+ "success": True,
+ "file_path": output_path,
+ "provider": "gemini",
+ "voice_compatible": False,
+ })
+
+ with patch("tools.tts_tool.text_to_speech_tool", side_effect=fake_tts):
+ await runner._send_voice_reply(event, "hello from auto tts")
+
+ assert requested_paths
+ assert requested_paths[0].endswith(".mp3")
+ adapter.send_voice.assert_awaited_once()
+ assert adapter.send_voice.await_args.kwargs["audio_path"].endswith(".mp3")
+
+
+def _make_runner() -> GatewayRunner:
+ with patch("gateway.run.GatewayRunner._load_voice_modes", return_value={}):
+ runner = GatewayRunner.__new__(GatewayRunner)
+ runner._voice_mode = {}
+ runner.adapters = {}
+ return runner
+
+
+def _make_adapter(platform: Platform) -> MagicMock:
+ adapter = MagicMock()
+ adapter.platform = platform
+ adapter.send_voice = AsyncMock()
+ return adapter
+
+
+def _make_event(platform: Platform) -> MessageEvent:
+ return MessageEvent(
+ text="trigger",
+ source=SessionSource(
+ platform=platform,
+ chat_id="123",
+ user_id="u1",
+ user_name="User",
+ ),
+ message_id="456",
+ )
From 9c5d1afbe956ab4dc75393e7db86d686318e49b2 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:41:55 -0700
Subject: [PATCH 013/174] chore: add giladbau to AUTHOR_MAP for salvaged PR
#20182
---
scripts/release.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/release.py b/scripts/release.py
index 40c4e33e69e..35ab90229e5 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -46,6 +46,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"alchemistchaos@protonmail.com": "AlchemistChaos", # co-author only
+ "gilad@smiti.ai": "giladbau",
"yusufalweshdemir@gmail.com": "Dusk1e",
"804436395@qq.com": "LaPhilosophie",
"maxmitcham@mac.home": "maxtrigify",
From 69a293b419393c1c560ab9dab43b4d66e0e31230 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 17:52:45 -0700
Subject: [PATCH 014/174] hardening(todo): bound TodoStore item content length
and count
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The todo list is re-injected into the model's context after every
context-compression event (TodoStore.format_for_injection), so an oversized
todo item or an unbounded number of items defeats the compression it is meant
to ride through. TodoStore.write/_validate previously enforced no size or count
bounds, so a single 50KB item produced a ~50KB re-injection block on every
subsequent turn.
Add two caps:
- MAX_TODO_CONTENT_CHARS (4000): per-item content is truncated with a marker.
Routed through a shared _cap_content() so the merge-update path (which writes
content directly, bypassing _validate) is capped too.
- MAX_TODO_ITEMS (256): total list length is bounded, keeping the
highest-priority head (list order is priority).
Both caps are generous relative to real plans — a todo item is a short task
description and active lists are a handful of items.
NOT a security fix. Raised externally via GHSA-5g4g-6jrg-mw3g, which framed a
caller-supplied conversation_history on the authenticated API server replaying
into _hydrate_todo_store as a DoS. That path is authenticated (the API server
refuses to start without API_SERVER_KEY) and self-scoped (the caller supplies
their own entire history and can only inflate their own response chain — forged
role=tool entries are never persisted to the session DB), so it is out of scope
as a vulnerability under SECURITY.md 3.2. These bounds are footgun containment
that also applies to the trusted agent path, where the model itself authors the
todos. Credit to the reporter for the observation.
Co-authored-by: YLChen-007 <30854794+YLChen-007@users.noreply.github.com>
---
tests/tools/test_todo_tool.py | 58 +++++++++++++++++++++++++++++++++++
tools/todo_tool.py | 33 +++++++++++++++++++-
2 files changed, 90 insertions(+), 1 deletion(-)
diff --git a/tests/tools/test_todo_tool.py b/tests/tools/test_todo_tool.py
index 6215078525c..dbb64e80ee6 100644
--- a/tests/tools/test_todo_tool.py
+++ b/tests/tools/test_todo_tool.py
@@ -117,3 +117,61 @@ class TestTodoToolFunction:
def test_no_store_returns_error(self):
result = json.loads(todo_tool())
assert "error" in result
+
+
+class TestTodoStoreBounds:
+ """Bounds on persisted todo state (GHSA-5g4g-6jrg-mw3g hardening).
+
+ The todo list is re-injected into context after every compression event,
+ so an unbounded item — whether authored by the model or replayed from
+ caller-supplied history on the API server's _hydrate_todo_store path —
+ would defeat the compression it rides through. These pin the caps.
+ Not a security boundary (the API surface is authenticated and the caller
+ supplies their own history); this is footgun containment / parity.
+ """
+
+ def test_oversized_content_is_truncated(self):
+ from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+ store = TodoStore()
+ store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
+ item = store.read()[0]
+ assert len(item["content"]) <= MAX_TODO_CONTENT_CHARS
+ assert item["content"].endswith("… [truncated]")
+
+ def test_injection_block_is_bounded(self):
+ from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+ store = TodoStore()
+ store.write([{"id": "1", "content": "A" * 50001, "status": "pending"}])
+ inj = store.format_for_injection()
+ # Before the fix this was ~50085 chars; now it tracks the cap.
+ assert len(inj) < MAX_TODO_CONTENT_CHARS + 200
+
+ def test_merge_update_content_is_capped(self):
+ """The merge path updates content directly, bypassing _validate —
+ verify it is capped too."""
+ from tools.todo_tool import MAX_TODO_CONTENT_CHARS
+ store = TodoStore()
+ store.write([{"id": "1", "content": "short", "status": "pending"}])
+ store.write([{"id": "1", "content": "B" * 50001}], merge=True)
+ assert len(store.read()[0]["content"]) <= MAX_TODO_CONTENT_CHARS
+
+ def test_item_count_is_bounded(self):
+ from tools.todo_tool import MAX_TODO_ITEMS
+ store = TodoStore()
+ store.write([
+ {"id": str(i), "content": f"task {i}", "status": "pending"}
+ for i in range(5000)
+ ])
+ assert len(store.read()) == MAX_TODO_ITEMS
+
+ def test_normal_list_is_unchanged(self):
+ """No regression: ordinary plans pass through untouched (no marker,
+ same content, same order)."""
+ store = TodoStore()
+ store.write([
+ {"id": "1", "content": "write the report", "status": "in_progress"},
+ {"id": "2", "content": "review PR", "status": "pending"},
+ ])
+ items = store.read()
+ assert [i["content"] for i in items] == ["write the report", "review PR"]
+ assert "[truncated]" not in items[0]["content"]
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index 99d9ffe8515..960dab66603 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -21,6 +21,17 @@ from typing import Dict, Any, List, Optional
# Valid status values for todo items
VALID_STATUSES = {"pending", "in_progress", "completed", "cancelled"}
+# Bounds on persisted todo state. The todo list is a planning aid the model
+# re-reads after every context-compression event (see format_for_injection),
+# so unbounded item content or count defeats the compression it rides through.
+# These caps keep a single oversized item (whether authored by the model or
+# replayed from caller-supplied history on the API server) from inflating the
+# re-injection block. Generous relative to real plans — a todo item is a short
+# task description, and active lists are a handful of items, not hundreds.
+MAX_TODO_CONTENT_CHARS = 4000
+MAX_TODO_ITEMS = 256
+_TRUNCATION_MARKER = "… [truncated]"
+
class TodoStore:
"""
@@ -58,7 +69,7 @@ class TodoStore:
if item_id in existing:
# Update only the fields the LLM actually provided
if "content" in t and t["content"]:
- existing[item_id]["content"] = str(t["content"]).strip()
+ existing[item_id]["content"] = self._cap_content(str(t["content"]).strip())
if "status" in t and t["status"]:
status = str(t["status"]).strip().lower()
if status in VALID_STATUSES:
@@ -77,6 +88,11 @@ class TodoStore:
rebuilt.append(current)
seen.add(current["id"])
self._items = rebuilt
+ # Bound total item count so a replayed/oversized list can't grow the
+ # re-injection block without limit. Keep the highest-priority head
+ # (list order is priority).
+ if len(self._items) > MAX_TODO_ITEMS:
+ self._items = self._items[:MAX_TODO_ITEMS]
return self.read()
def read(self) -> List[Dict[str, str]]:
@@ -121,6 +137,19 @@ class TodoStore:
return "\n".join(lines)
+ @staticmethod
+ def _cap_content(content: str) -> str:
+ """Truncate oversized todo content to MAX_TODO_CONTENT_CHARS.
+
+ A single huge item would otherwise inflate the post-compression
+ re-injection block (format_for_injection) without bound. Keep the
+ head — the actionable part of a task description — plus a marker.
+ """
+ if len(content) > MAX_TODO_CONTENT_CHARS:
+ keep = MAX_TODO_CONTENT_CHARS - len(_TRUNCATION_MARKER)
+ return content[:keep] + _TRUNCATION_MARKER
+ return content
+
@staticmethod
def _validate(item: Dict[str, Any]) -> Dict[str, str]:
"""
@@ -136,6 +165,8 @@ class TodoStore:
content = str(item.get("content", "")).strip()
if not content:
content = "(no description)"
+ else:
+ content = TodoStore._cap_content(content)
status = str(item.get("status", "pending")).strip().lower()
if status not in VALID_STATUSES:
From c50fb560ef046797fbeea5e01e33c98c94cf9288 Mon Sep 17 00:00:00 2001
From: xxxigm <54813621+xxxigm@users.noreply.github.com>
Date: Mon, 8 Jun 2026 08:09:55 +0700
Subject: [PATCH 015/174] Merge pull request #40433 from
xxxigm/fix/desktop-chat-autoscroll
fix(desktop): stop chat transcript from jumping/flickering while reading (#37549)
---
.../session/hooks/use-session-state-cache.ts | 37 ++++++++++++++++++-
.../assistant-ui/thread-virtualizer.tsx | 21 ++++++++++-
2 files changed, 56 insertions(+), 2 deletions(-)
diff --git a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts
index c0a78da300e..bc5d8f2bb32 100644
--- a/apps/desktop/src/app/session/hooks/use-session-state-cache.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-state-cache.ts
@@ -9,6 +9,28 @@ import { $busy, $messages, noteSessionActivity, setSessionAttention, setSessionW
import type { ClientSessionState } from '../../types'
+// Shallow per-message identity check. When a flush carries no transcript
+// changes, `preserveLocalAssistantErrors` returns the same message objects in
+// the same order, so reference equality per slot is enough to detect "nothing
+// to publish" and avoid a needless `$messages` churn.
+function sameMessageList(a: ChatMessage[], b: ChatMessage[]): boolean {
+ if (a === b) {
+ return true
+ }
+
+ if (a.length !== b.length) {
+ return false
+ }
+
+ for (let index = 0; index < a.length; index += 1) {
+ if (a[index] !== b[index]) {
+ return false
+ }
+ }
+
+ return true
+}
+
interface SessionStateCacheOptions {
activeSessionId: string | null
busyRef: MutableRefObject
@@ -88,7 +110,20 @@ export function useSessionStateCache({
return
}
- setMessages(preserveLocalAssistantErrors(pending.state.messages, $messages.get()))
+ // `preserveLocalAssistantErrors` always returns a fresh array, so publishing
+ // it unconditionally puts a new `$messages` reference on the store every
+ // flush — including the periodic `session.info` heartbeats that don't touch
+ // the transcript. That churns ChatView → runtimeMessageRepository → the
+ // assistant-ui runtime → the virtualizer, which re-measures and visibly
+ // jerks the scroll position while the user is reading. Skip the publish when
+ // the merged result is content-identical to what's already on screen.
+ const currentMessages = $messages.get()
+ const nextMessages = preserveLocalAssistantErrors(pending.state.messages, currentMessages)
+
+ if (!sameMessageList(nextMessages, currentMessages)) {
+ setMessages(nextMessages)
+ }
+
setBusy(pending.state.busy)
setMutableRef(busyRef, pending.state.busy)
setAwaitingResponse(pending.state.awaitingResponse)
diff --git a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
index e0c6df42937..506319e89f5 100644
--- a/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread-virtualizer.tsx
@@ -264,8 +264,27 @@ function useThreadScrollAnchor({
return
}
+ // Already parked at the bottom: writing `scrollTop` is a no-op and the
+ // browser fires NO scroll event, so arming the programmatic gate here would
+ // leave it permanently set. Repeated pins (streaming heartbeats, the
+ // post-run lock loop) then accumulate the gate, and the next genuine user
+ // scroll-up is misread as one of our programmatic scrolls — re-arming
+ // sticky-bottom and yanking the viewport back down. Refresh trackers, bail.
+ const distFromBottom = el.scrollHeight - (el.scrollTop + el.clientHeight)
+
+ if (distFromBottom <= AT_BOTTOM_THRESHOLD) {
+ lastTopRef.current = el.scrollTop
+ lastHeightRef.current = el.scrollHeight
+ lastClientHeightRef.current = el.clientHeight
+
+ return
+ }
+
// Hold the disarm gate across the scroll event the next line will fire.
- programmaticScrollPendingRef.current += 1
+ // Set to 1 rather than incrementing: coalesced writes within a frame fire a
+ // single scroll event, so a counter > 1 can never drain and would swallow a
+ // later real user scroll.
+ programmaticScrollPendingRef.current = 1
scrollElementToBottom(el)
lastTopRef.current = el.scrollTop
lastHeightRef.current = el.scrollHeight
From 628780b4f32249709e8753b5de90f9a6711e11bc Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:10:32 -0700
Subject: [PATCH 016/174] fix(desktop): pin empty PostCSS config so Vite stops
walking up the home tree (#40609)
Salvaged from #40526; re-verified on main, tightened, tested.
Co-authored-by: xxxigm
---
apps/desktop/vite.config.ts | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/apps/desktop/vite.config.ts b/apps/desktop/vite.config.ts
index 0512c6c759e..4401868eb8b 100644
--- a/apps/desktop/vite.config.ts
+++ b/apps/desktop/vite.config.ts
@@ -6,6 +6,19 @@ import path from 'path'
export default defineConfig({
base: './',
plugins: [react(), tailwindcss()],
+ css: {
+ // Pin an explicit (empty) PostCSS config. Tailwind is handled entirely by
+ // `@tailwindcss/vite`, so the renderer needs no PostCSS plugins — and
+ // without this, Vite's `postcss-load-config` walks UP the filesystem
+ // looking for a stray `postcss.config.*` / `tailwind.config.*`. The desktop
+ // build runs from inside the user's home tree (e.g.
+ // `C:\Users\\AppData\Local\hermes\hermes-agent\apps\desktop`), so an
+ // unrelated Tailwind v3 config higher up the tree gets picked up and
+ // reprocesses our v4 stylesheet, failing the build with
+ // "`@layer base` is used but no matching `@tailwind base` directive is
+ // present." Pinning the config makes the build hermetic.
+ postcss: { plugins: [] }
+ },
build: {
// Keep desktop packaging stable: Shiki ships many dynamic chunks by
// default, and electron-builder can OOM scanning thousands of files.
From 6bdc4c02314acf76e5e4949d3d385afe555e48c9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:21:03 -0700
Subject: [PATCH 017/174] test: skip curses tests on Windows where _curses is
unavailable (#40611)
Salvaged from #40447; re-verified on main, tightened, tested.
Co-authored-by: Ganesh0690
---
tests/hermes_cli/test_curses_arrow_keys.py | 7 +++++++
tests/hermes_cli/test_curses_color_compat.py | 7 +++++++
2 files changed, 14 insertions(+)
diff --git a/tests/hermes_cli/test_curses_arrow_keys.py b/tests/hermes_cli/test_curses_arrow_keys.py
index c1bafbd8c3d..8fe60b7410c 100644
--- a/tests/hermes_cli/test_curses_arrow_keys.py
+++ b/tests/hermes_cli/test_curses_arrow_keys.py
@@ -7,6 +7,13 @@ used to treat the leading ``27`` as ESC/cancel, which dumped the setup wizard's
provider/model picker into its numbered "Select [1-N]" fallback the instant a
user pressed up or down.
"""
+import sys
+
+import pytest
+
+# curses (and its _curses C extension) is Unix-only; skip the whole module on Windows.
+if sys.platform == "win32":
+ pytest.skip("curses is not available on Windows", allow_module_level=True)
import curses
from hermes_cli.curses_ui import (
diff --git a/tests/hermes_cli/test_curses_color_compat.py b/tests/hermes_cli/test_curses_color_compat.py
index 2416ded1230..5b9ed954ea7 100644
--- a/tests/hermes_cli/test_curses_color_compat.py
+++ b/tests/hermes_cli/test_curses_color_compat.py
@@ -8,6 +8,13 @@ The bug was ``curses.init_pair(4, 8, -1)`` using raw color 8 ("bright
black" / dim gray) which does not exist on 8-color terminals. The fix
clamps with ``min(8, curses.COLORS - 1)``.
"""
+import sys
+
+import pytest
+
+# curses (and its _curses C extension) is Unix-only; skip the whole module on Windows.
+if sys.platform == "win32":
+ pytest.skip("curses is not available on Windows", allow_module_level=True)
import curses
import re
From 4ce9caed0415fba0f489ffe1645d97bd571cf376 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:23:42 -0700
Subject: [PATCH 018/174] fix(tui): type execFileNoThrow stdio/ChildProcess and
make memoryMonitor critical test heap-independent (#40612)
Salvaged from #40415; re-verified on main, tightened, tested.
Co-authored-by: psionic73
---
.../packages/hermes-ink/src/utils/execFileNoThrow.ts | 10 +++++-----
ui-tui/src/__tests__/memoryMonitor.test.ts | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/ui-tui/packages/hermes-ink/src/utils/execFileNoThrow.ts b/ui-tui/packages/hermes-ink/src/utils/execFileNoThrow.ts
index 13780c8027c..a4e32ed14b3 100644
--- a/ui-tui/packages/hermes-ink/src/utils/execFileNoThrow.ts
+++ b/ui-tui/packages/hermes-ink/src/utils/execFileNoThrow.ts
@@ -1,4 +1,4 @@
-import { spawn } from 'child_process'
+import { spawn, type ChildProcess, type StdioOptions } from 'child_process'
type ExecFileOptions = {
input?: string
timeout?: number
@@ -32,11 +32,11 @@ export function execFileNoThrow(
// doesn't inherit those pipe FDs — prevents handle leaks that can
// keep the parent process alive. No output data is collected in
// this mode; both stdout and stderr will be empty strings.
- const stdioConfig = options.resolveOnExit
- ? ['pipe', 'ignore', 'ignore'] as const
- : 'pipe' as const
+ const stdioConfig: StdioOptions = options.resolveOnExit
+ ? ['pipe', 'ignore', 'ignore']
+ : 'pipe'
- const child = spawn(file, args, {
+ const child: ChildProcess = spawn(file, args, {
cwd: options.useCwd ? process.cwd() : undefined,
env: options.env,
stdio: stdioConfig
diff --git a/ui-tui/src/__tests__/memoryMonitor.test.ts b/ui-tui/src/__tests__/memoryMonitor.test.ts
index f79d7aa9d4c..0a8d853398f 100644
--- a/ui-tui/src/__tests__/memoryMonitor.test.ts
+++ b/ui-tui/src/__tests__/memoryMonitor.test.ts
@@ -42,7 +42,7 @@ describe('startMemoryMonitor thresholds (#34095)', () => {
// ceiling. With relative thresholds (~88%), 2.5GB is well within normal.
const onCritical = vi.fn()
withHeap(2.5 * GB)
- stop = startMemoryMonitor({ intervalMs: 1, onCritical })
+ stop = startMemoryMonitor({ criticalBytes: 7 * GB, highBytes: 5 * GB, intervalMs: 1, onCritical })
await vi.advanceTimersByTimeAsync(5)
From 2aa316ec9c0406d4e8a057f04297215353ba38d0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:28:23 -0700
Subject: [PATCH 019/174] docs(windows): fix Get-Command PATH guidance to
venv\Scripts\hermes.exe (#40613)
Closes #40464.
Salvaged from #40488; re-verified on main, tightened, tested.
Co-authored-by: gauravsaxena1997
---
tests/hermes_cli/test_windows_native_docs.py | 10 ++
website/docs/user-guide/windows-native.md | 110 ++++++++++---------
2 files changed, 67 insertions(+), 53 deletions(-)
create mode 100644 tests/hermes_cli/test_windows_native_docs.py
diff --git a/tests/hermes_cli/test_windows_native_docs.py b/tests/hermes_cli/test_windows_native_docs.py
new file mode 100644
index 00000000000..10d52394b99
--- /dev/null
+++ b/tests/hermes_cli/test_windows_native_docs.py
@@ -0,0 +1,10 @@
+from pathlib import Path
+
+
+def test_windows_native_install_path_docs_match_installer() -> None:
+ doc = Path("website/docs/user-guide/windows-native.md").read_text()
+ install = Path("scripts/install.ps1").read_text()
+
+ assert "%LOCALAPPDATA%\\hermes\\hermes-agent\\venv\\Scripts" in doc
+ assert "Get-Command hermes # should print C:\\Users\\\\AppData\\Local\\hermes\\hermes-agent\\venv\\Scripts\\hermes.exe" in doc
+ assert '$hermesBin = "$InstallDir\\venv\\Scripts"' in install
diff --git a/website/docs/user-guide/windows-native.md b/website/docs/user-guide/windows-native.md
index d15711fa740..ad9b233c412 100644
--- a/website/docs/user-guide/windows-native.md
+++ b/website/docs/user-guide/windows-native.md
@@ -17,12 +17,10 @@ If you prefer a real POSIX environment (for the dashboard's embedded terminal, `
## Quick install
-[Download the Hermes Desktop installer](https://hermes-agent.nousresearch.com/desktop) from our website and run it.
-
-Or, for a command-line only install, open **PowerShell** (or Windows Terminal) and run:
+Open **PowerShell** (or Windows Terminal) and run:
```powershell
-iex (irm https://hermes-agent.nousresearch.com/install.ps1)
+iex (irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1)
```
No admin rights required. The installer goes to `%LOCALAPPDATA%\hermes\` and adds `hermes` to your **User PATH** — open a new terminal after it finishes.
@@ -30,32 +28,38 @@ No admin rights required. The installer goes to `%LOCALAPPDATA%\hermes\` and add
**Installer options** (requires the scriptblock form to pass parameters):
```powershell
-& ([scriptblock]::Create((irm https://hermes-agent.nousresearch.com/install.ps1))) -NoVenv -SkipSetup -Branch main
+& ([scriptblock]::Create((irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.ps1))) -NoVenv -SkipSetup -Branch main
```
-| Parameter | Default | Purpose |
-| ------------- | ------------------------------------ | ---------------------------------------------------------- |
-| `-Branch` | `main` | Clone a specific branch (useful for testing PRs) |
-| `-Commit` | unset | Pin install to a specific commit SHA (overrides `-Branch`) |
-| `-Tag` | unset | Pin install to a specific git tag (e.g. `v0.14.0`) |
-| `-NoVenv` | off | Skip venv creation (advanced — you manage Python yourself) |
-| `-SkipSetup` | off | Skip the post-install `hermes setup` wizard |
-| `-HermesHome` | `%LOCALAPPDATA%\hermes` | Override data directory |
-| `-InstallDir` | `%LOCALAPPDATA%\hermes\hermes-agent` | Override code location |
+| Parameter | Default | Purpose |
+|---|---|---|
+| `-Branch` | `main` | Clone a specific branch (useful for testing PRs) |
+| `-Commit` | unset | Pin install to a specific commit SHA (overrides `-Branch`) |
+| `-Tag` | unset | Pin install to a specific git tag (e.g. `v0.14.0`) |
+| `-NoVenv` | off | Skip venv creation (advanced — you manage Python yourself) |
+| `-SkipSetup` | off | Skip the post-install `hermes setup` wizard |
+| `-HermesHome` | `%LOCALAPPDATA%\hermes` | Override data directory |
+| `-InstallDir` | `%LOCALAPPDATA%\hermes\hermes-agent` | Override code location |
The installer auto-retries flaky git fetches and strips BOM from any downloaded `install.ps1` payload, so a UTF-8 BOM picked up during HTTP transit no longer breaks the `[scriptblock]::Create((irm ...))` form.
+### Desktop installer (alternative)
+
+A thin GUI installer is also available — useful if you'd rather double-click an `.exe` than open PowerShell. Download Hermes Desktop, run the installer, and on first launch the GUI calls `install.ps1` under the hood to provision Python (via `uv`), Node, PortableGit, and the rest of the dependency bootstrap described below. After the first run, the desktop app and the PowerShell-installed `hermes` CLI share the same `%LOCALAPPDATA%\hermes\hermes-agent` install and `%USERPROFILE%\.hermes` data directory — switch between the GUI and the CLI freely.
+
+Use the desktop installer when you want a familiar Windows install experience or you're handing Hermes to a non-developer; use the PowerShell one-liner when you're already in a terminal.
+
### Dependency bootstrap (`dep_ensure`)
On first launch (and on demand when a missing tool is detected), Hermes runs a small Python bootstrapper — `hermes_cli/dep_ensure.py` — that checks for and lazily installs the non-Python dependencies it needs. On Windows, the relevant ones are:
-| Dependency | Why Hermes needs it |
-| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
-| **PortableGit** | Provides `bash.exe` for the terminal tool and `git` for in-session clones. Provisioned at install time, not by `dep_ensure`. |
-| **Node.js 22** | Required for the browser tool (`agent-browser`), the TUI's web bridge, and the WhatsApp bridge. |
-| **ffmpeg** | Audio format conversion for TTS / voice messages. |
-| **ripgrep** | Fast file search — falls back to `grep` if unavailable. |
-| **npm packages** | `agent-browser`, Playwright Chromium, and any per-toolset Node deps are installed once at first browser-tool use. |
+| Dependency | Why Hermes needs it |
+|---|---|
+| **PortableGit** | Provides `bash.exe` for the terminal tool and `git` for in-session clones. Provisioned at install time, not by `dep_ensure`. |
+| **Node.js 22** | Required for the browser tool (`agent-browser`), the TUI's web bridge, and the WhatsApp bridge. |
+| **ffmpeg** | Audio format conversion for TTS / voice messages. |
+| **ripgrep** | Fast file search — falls back to `grep` if unavailable. |
+| **npm packages** | `agent-browser`, Playwright Chromium, and any per-toolset Node deps are installed once at first browser-tool use. |
Each dep has a `shutil.which(...)`-style check; if a binary is missing and the run is interactive, `dep_ensure` offers to install it (deferring to `scripts\install.ps1 -ensure ` for the actual install logic). Non-interactive runs (gateway, cron, headless desktop launches) skip the prompt and surface a clear `this feature needs ` error instead.
@@ -82,18 +86,18 @@ On Windows, per-tool API key setup (Firecrawl, FAL, Browser Use, OpenAI TTS) is
Everything except the dashboard's embedded terminal pane runs natively on Windows.
-| Feature | Native Windows | WSL2 |
-| --------------------------------------------------------------------- | ------------------- | ---------------------- |
-| CLI (`hermes chat`, `hermes setup`, `hermes gateway`, …) | ✓ | ✓ |
-| Interactive TUI (`hermes --tui`) | ✓ | ✓ |
-| Messaging gateway (Telegram, Discord, Slack, WhatsApp, 15+ platforms) | ✓ | ✓ |
-| Cron scheduler | ✓ | ✓ |
-| Browser tool (Chromium via Node) | ✓ | ✓ |
-| MCP servers (stdio and HTTP) | ✓ | ✓ |
-| Local Ollama / LM Studio / llama-server | ✓ | ✓ (via WSL networking) |
-| Web dashboard (sessions, jobs, metrics, config) | ✓ | ✓ |
-| Dashboard `/chat` embedded terminal pane | ✗ (needs POSIX PTY) | ✓ |
-| Auto-start at login | ✓ (schtasks) | ✓ (systemd) |
+| Feature | Native Windows | WSL2 |
+|---|---|---|
+| CLI (`hermes chat`, `hermes setup`, `hermes gateway`, …) | ✓ | ✓ |
+| Interactive TUI (`hermes --tui`) | ✓ | ✓ |
+| Messaging gateway (Telegram, Discord, Slack, WhatsApp, 15+ platforms) | ✓ | ✓ |
+| Cron scheduler | ✓ | ✓ |
+| Browser tool (Chromium via Node) | ✓ | ✓ |
+| MCP servers (stdio and HTTP) | ✓ | ✓ |
+| Local Ollama / LM Studio / llama-server | ✓ | ✓ (via WSL networking) |
+| Web dashboard (sessions, jobs, metrics, config) | ✓ | ✓ |
+| Dashboard `/chat` embedded terminal pane | ✗ (needs POSIX PTY) | ✓ |
+| Auto-start at login | ✓ (schtasks) | ✓ (systemd) |
The dashboard's `/chat` tab embeds a real terminal via a POSIX PTY (`ptyprocess`). Native Windows has no equivalent primitive; Python's `pywinpty` / Windows ConPTY would work but is a separate implementation — treat as future work. **The rest of the dashboard works natively** — only that one tab shows a "use WSL2 for this" banner.
@@ -136,12 +140,12 @@ Hermes's Windows stdio shim now sets `EDITOR=notepad` as a default. Notepad ship
**User overrides still win** (they're checked before the setdefault):
-| Editor | PowerShell command |
-| --------- | ---------------------------------------------------------------------------------- |
-| VS Code | `$env:EDITOR = "code --wait"` |
+| Editor | PowerShell command |
+|---|---|
+| VS Code | `$env:EDITOR = "code --wait"` |
| Notepad++ | `$env:EDITOR = "'C:\Program Files\Notepad++\notepad++.exe' -multiInst -nosession"` |
-| Neovim | `$env:EDITOR = "nvim"` |
-| Helix | `$env:EDITOR = "hx"` |
+| Neovim | `$env:EDITOR = "nvim"` |
+| Helix | `$env:EDITOR = "hx"` |
The `--wait` flag on VS Code is critical — without it the editor returns immediately and Hermes gets a blank buffer back.
@@ -196,13 +200,13 @@ Services require admin rights to install and tie the gateway's lifecycle to mach
## Data layout
-| Path | Contents |
-| ------------------------------------- | ------------------------------------------------------------------- |
-| `%LOCALAPPDATA%\hermes\hermes-agent\` | Git checkout + venv. Safe to `Remove-Item -Recurse` and reinstall. |
-| `%LOCALAPPDATA%\hermes\git\` | PortableGit (only if the installer provisioned it). |
-| `%LOCALAPPDATA%\hermes\node\` | Portable Node.js (only if the installer provisioned it). |
-| `%LOCALAPPDATA%\hermes\bin\` | `hermes.cmd` shim, added to User PATH. |
-| `%USERPROFILE%\.hermes\` | Your config, auth, skills, sessions, logs. **Survives reinstalls.** |
+| Path | Contents |
+|---|---|
+| `%LOCALAPPDATA%\hermes\hermes-agent\` | Git checkout + venv. Safe to `Remove-Item -Recurse` and reinstall. |
+| `%LOCALAPPDATA%\hermes\git\` | PortableGit (only if the installer provisioned it). |
+| `%LOCALAPPDATA%\hermes\node\` | Portable Node.js (only if the installer provisioned it). |
+| `%LOCALAPPDATA%\hermes\bin\` | `hermes.cmd` shim, added to User PATH. |
+| `%USERPROFILE%\.hermes\` | Your config, auth, skills, sessions, logs. **Survives reinstalls.** |
The split is deliberate: `%LOCALAPPDATA%\hermes` is disposable infrastructure (you can blow it away and the one-liner restores it). `%USERPROFILE%\.hermes` is your data — config, memory, skills, session history — and is identical in shape to a Linux install. Mirror it between machines and your Hermes moves with you.
@@ -220,12 +224,12 @@ The browser tool uses `agent-browser` (a Node helper) to drive Chromium. On Wind
### PATH after install
-The installer adds `%LOCALAPPDATA%\hermes\bin` to your **User PATH** via `[Environment]::SetEnvironmentVariable`. Existing terminals don't pick this up — open a new PowerShell window (or Windows Terminal tab) after installation. Close-and-reopen, don't `$env:PATH += …` by hand unless you know what you're doing.
+The installer adds `%LOCALAPPDATA%\hermes\hermes-agent\venv\Scripts` to your **User PATH** via `[Environment]::SetEnvironmentVariable`. Existing terminals don't pick this up — open a new PowerShell window (or Windows Terminal tab) after installation. Close-and-reopen, don't `$env:PATH += …` by hand unless you know what you're doing.
Verify:
```powershell
-Get-Command hermes # should print C:\Users\\AppData\Local\hermes\bin\hermes.cmd
+Get-Command hermes # should print C:\Users\\AppData\Local\hermes\hermes-agent\venv\Scripts\hermes.exe
hermes --version
```
@@ -244,11 +248,11 @@ Don't put secrets in User environment variables unless you specifically want eve
These only affect native Windows installs:
-| Variable | Effect |
-| ----------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `HERMES_GIT_BASH_PATH` | Override bash.exe discovery. Point at any bash — full Git-for-Windows, WSL bash via symlink, MSYS2, Cygwin. The installer sets this automatically. |
-| `HERMES_DISABLE_WINDOWS_UTF8` | Set to `1` to disable the UTF-8 stdio shim and fall back to the locale code page. Useful for bisecting an encoding bug. |
-| `EDITOR` / `VISUAL` | Your editor for `/edit` and `Ctrl-X Ctrl-E`. Hermes defaults to `notepad` if both are unset. |
+| Variable | Effect |
+|---|---|
+| `HERMES_GIT_BASH_PATH` | Override bash.exe discovery. Point at any bash — full Git-for-Windows, WSL bash via symlink, MSYS2, Cygwin. The installer sets this automatically. |
+| `HERMES_DISABLE_WINDOWS_UTF8` | Set to `1` to disable the UTF-8 stdio shim and fall back to the locale code page. Useful for bisecting an encoding bug. |
+| `EDITOR` / `VISUAL` | Your editor for `/edit` and `Ctrl-X Ctrl-E`. Hermes defaults to `notepad` if both are unset. |
## Uninstall
@@ -283,7 +287,7 @@ Consequence: any codepath that said "check if this PID is alive" via `os.kill(pi
## Common pitfalls
**`hermes: command not found` right after install.**
-Open a new PowerShell window. The installer added `%LOCALAPPDATA%\hermes\bin` to User PATH, but existing shells need to be restarted to pick it up.
+Open a new PowerShell window. The installer added `%LOCALAPPDATA%\hermes\bin` to User PATH, but existing shells need to be restarted to pick it up. In the meantime you can run `& "$env:LOCALAPPDATA\hermes\bin\hermes.cmd"`.
**`WinError 193: %1 is not a valid Win32 application` when running a tool.**
You hit a shebang-script invocation that bypassed the `.cmd` shim. Hermes resolves commands through `shutil.which(cmd, path=local_bin)` so PATHEXT picks up `.CMD` — if you're invoking the tool via a hardcoded path instead, switch to the `.cmd` variant (e.g., `npx.cmd`, not `npx`).
From ad399b922918d88fdef1e00a5094c0d1137a7445 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:29:56 -0700
Subject: [PATCH 020/174] docs(update): document updates.* config keys
(pre_update_backup, backup_keep, non_interactive_local_changes) (#40617)
Salvaged from #40540; re-verified on main, tightened, tested.
Co-authored-by: jiangkoumo
---
cli-config.yaml.example | 2 +-
website/docs/reference/cli-commands.md | 16 ----------------
website/docs/user-guide/configuration.md | 4 ++--
3 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 588f30a7d30..a843998a213 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -885,7 +885,7 @@ delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
# WARNING: values above 10 multiply API cost linearly.
- # max_spawn_depth: 1 # Delegation tree depth (floor 1, no ceiling; default: 1 = flat).
+ # max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
# Raise to 2 to allow workers to spawn their own subagents.
# Requires role="orchestrator" on intermediate agents.
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 790b4bd35bb..6d99ce6a0b6 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -1361,22 +1361,6 @@ hermes dashboard
hermes dashboard --port 8080 --no-open
```
-### `hermes dashboard register`
-
-Register this install as a self-hosted dashboard with your Nous Portal account, so the dashboard's OAuth (Nous) auth gate can be used. Resolves your existing Nous login (run `hermes setup` first if you're not logged in), creates an OAuth client, writes `HERMES_DASHBOARD_OAUTH_CLIENT_ID` into `~/.hermes/.env`, and prints how to engage the login gate. You can also register, name, and revoke dashboards from the Portal [`/local-dashboards`](https://portal.nousresearch.com/local-dashboards) page.
-
-| Option | Default | Description |
-|--------|---------|-------------|
-| `--name` | auto-generated | Human-readable label for the dashboard |
-| `--redirect-uri` | — | Public HTTPS OAuth redirect URI for an internet-facing host, e.g. `https://hermes.example.com/auth/callback`. Omit for localhost-only use. |
-
-```bash
-hermes dashboard register
-# ✓ Registered dashboard "swift_falcon"
-# …writes HERMES_DASHBOARD_OAUTH_CLIENT_ID to ~/.hermes/.env
-```
-
-
## `hermes profile`
```bash
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 907e2d90ea7..d4b4fdb1c05 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -1691,7 +1691,7 @@ delegation:
# api_key: "local-key" # API key for base_url (falls back to OPENAI_API_KEY)
# api_mode: "" # Wire protocol for base_url: "chat_completions", "codex_responses", or "anthropic_messages". Empty = auto-detect from URL (e.g. /anthropic suffix → anthropic_messages). Set explicitly for non-standard endpoints the heuristic can't detect.
max_concurrent_children: 3 # Parallel children per batch (floor 1, no ceiling). Also via DELEGATION_MAX_CONCURRENT_CHILDREN env var.
- max_spawn_depth: 1 # Delegation tree depth (floor 1, no ceiling). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3+ = deeper trees.
+ max_spawn_depth: 1 # Delegation tree depth cap (1-3, clamped). 1 = flat (default): parent spawns leaves that cannot delegate. 2 = orchestrator children can spawn leaf grandchildren. 3 = three levels.
orchestrator_enabled: true # Global kill switch. When false, role="orchestrator" is ignored and every child is forced to leaf regardless of max_spawn_depth.
```
@@ -1705,7 +1705,7 @@ The delegation provider uses the same credential resolution as CLI/gateway start
**Precedence:** `delegation.base_url` in config → `delegation.provider` in config → parent provider (inherited). `delegation.model` in config → parent model (inherited). Setting just `model` without `provider` changes only the model name while keeping the parent's credentials (useful for switching models within the same provider like OpenRouter).
-**Width and depth:** `max_concurrent_children` caps how many subagents run in parallel per batch (default `3`, floor of 1, no ceiling). Can also be set via the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var. When the model submits a `tasks` array longer than the cap, `delegate_task` returns a tool error explaining the limit rather than silently truncating. `max_spawn_depth` controls the delegation tree depth (floor of 1, no upper ceiling). At the default `1`, delegation is flat: children cannot spawn grandchildren, and passing `role="orchestrator"` silently degrades to `leaf`. Raise to `2` so orchestrator children can spawn leaf grandchildren; `3` for three-level trees, and higher for deeper ones. The agent opts into orchestration per call via `role="orchestrator"`; `orchestrator_enabled: false` forces every child back to leaf regardless. Cost scales multiplicatively — at `max_spawn_depth: 3` with `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. See [Subagent Delegation → Depth Limit and Nested Orchestration](features/delegation.md#depth-limit-and-nested-orchestration) for usage patterns.
+**Width and depth:** `max_concurrent_children` caps how many subagents run in parallel per batch (default `3`, floor of 1, no ceiling). Can also be set via the `DELEGATION_MAX_CONCURRENT_CHILDREN` env var. When the model submits a `tasks` array longer than the cap, `delegate_task` returns a tool error explaining the limit rather than silently truncating. `max_spawn_depth` controls the delegation tree depth (clamped to 1-3). At the default `1`, delegation is flat: children cannot spawn grandchildren, and passing `role="orchestrator"` silently degrades to `leaf`. Raise to `2` so orchestrator children can spawn leaf grandchildren; `3` for three-level trees. The agent opts into orchestration per call via `role="orchestrator"`; `orchestrator_enabled: false` forces every child back to leaf regardless. Cost scales multiplicatively — at `max_spawn_depth: 3` with `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. See [Subagent Delegation → Depth Limit and Nested Orchestration](features/delegation.md#depth-limit-and-nested-orchestration) for usage patterns.
## Clarify
From b97cd81c789927c0380ac0b8cd196f42c2781235 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:33:20 -0700
Subject: [PATCH 021/174] refactor(insights): drop dead pricing/duration
wrappers, call usage_pricing directly (#40618)
Salvaged from #40527; re-verified on main, tightened, tested.
Co-authored-by: HeLLGURD
---
agent/insights.py | 25 ++++++++-----------------
tests/agent/test_insights.py | 6 ++++--
2 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/agent/insights.py b/agent/insights.py
index 70907b4f3d5..9977010549c 100644
--- a/agent/insights.py
+++ b/agent/insights.py
@@ -20,23 +20,17 @@ import json
import time
from collections import Counter, defaultdict
from datetime import datetime
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
from agent.usage_pricing import (
CanonicalUsage,
- DEFAULT_PRICING,
estimate_usage_cost,
format_duration_compact,
has_known_pricing,
)
-_DEFAULT_PRICING = DEFAULT_PRICING
-def _has_known_pricing(model_name: str, provider: str = None, base_url: str = None) -> bool:
- """Check if a model has known pricing (vs unknown/custom endpoint)."""
- return has_known_pricing(model_name, provider=provider, base_url=base_url)
-
def _estimate_cost(
session_or_model: Dict[str, Any] | str,
@@ -45,8 +39,8 @@ def _estimate_cost(
*,
cache_read_tokens: int = 0,
cache_write_tokens: int = 0,
- provider: str = None,
- base_url: str = None,
+ provider: Optional[str] = None,
+ base_url: Optional[str] = None,
) -> tuple[float, str]:
"""Estimate the USD cost for a session row or a model/token tuple."""
if isinstance(session_or_model, dict):
@@ -77,9 +71,6 @@ def _estimate_cost(
return float(result.amount_usd or 0.0), result.status
-def _format_duration(seconds: float) -> str:
- """Format seconds into a human-readable duration string."""
- return format_duration_compact(seconds)
def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
@@ -435,7 +426,7 @@ class InsightsEngine:
included_cost_sessions += 1
elif status == "unknown":
unknown_cost_sessions += 1
- if _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url")):
+ if has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url")):
models_with_pricing.add(display)
else:
models_without_pricing.add(display)
@@ -508,7 +499,7 @@ class InsightsEngine:
d["tool_calls"] += s.get("tool_call_count") or 0
estimate, status = _estimate_cost(s)
d["cost"] += estimate
- d["has_pricing"] = _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url"))
+ d["has_pricing"] = has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url"))
d["cost_status"] = status
result = [
@@ -679,7 +670,7 @@ class InsightsEngine:
top.append({
"label": "Longest session",
"session_id": longest["id"][:16],
- "value": _format_duration(dur),
+ "value": format_duration_compact(dur),
"date": datetime.fromtimestamp(longest["started_at"]).strftime("%b %d"),
})
@@ -764,7 +755,7 @@ class InsightsEngine:
lines.append(f" Input tokens: {o['total_input_tokens']:<12,} Output tokens: {o['total_output_tokens']:,}")
lines.append(f" Total tokens: {o['total_tokens']:,}")
if o["total_hours"] > 0:
- lines.append(f" Active time: ~{_format_duration(o['total_hours'] * 3600):<11} Avg session: ~{_format_duration(o['avg_session_duration'])}")
+ lines.append(f" Active time: ~{format_duration_compact(o['total_hours'] * 3600):<11} Avg session: ~{format_duration_compact(o['avg_session_duration'])}")
lines.append(f" Avg msgs/session: {o['avg_messages_per_session']:.1f}")
lines.append("")
@@ -879,7 +870,7 @@ class InsightsEngine:
lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
if o["total_hours"] > 0:
- lines.append(f"**Active time:** ~{_format_duration(o['total_hours'] * 3600)} | **Avg session:** ~{_format_duration(o['avg_session_duration'])}")
+ lines.append(f"**Active time:** ~{format_duration_compact(o['total_hours'] * 3600)} | **Avg session:** ~{format_duration_compact(o['avg_session_duration'])}")
lines.append("")
# Models (top 5)
diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py
index 723a40da4fb..e0aad522227 100644
--- a/tests/agent/test_insights.py
+++ b/tests/agent/test_insights.py
@@ -7,9 +7,11 @@ from hermes_state import SessionDB
from agent.insights import (
InsightsEngine,
_estimate_cost,
- _format_duration,
_bar_chart,
- _has_known_pricing,
+)
+from agent.usage_pricing import (
+ format_duration_compact as _format_duration,
+ has_known_pricing as _has_known_pricing,
)
From d3b670e63e1622560d665ff432193e4f2daf063b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:36:18 -0700
Subject: [PATCH 022/174] docs(codex): document --sandbox danger-full-access
for gateway bubblewrap failures (#40619)
Salvaged from #40435; re-verified on main, tightened, tested.
Co-authored-by: ziwon
---
skills/autonomous-ai-agents/codex/SKILL.md | 19 +++++++++++++++++++
.../autonomous-ai-agents-codex.md | 19 +++++++++++++++++++
2 files changed, 38 insertions(+)
diff --git a/skills/autonomous-ai-agents/codex/SKILL.md b/skills/autonomous-ai-agents/codex/SKILL.md
index a796852b754..87b5666fcda 100644
--- a/skills/autonomous-ai-agents/codex/SKILL.md
+++ b/skills/autonomous-ai-agents/codex/SKILL.md
@@ -74,6 +74,25 @@ process(action="kill", session_id="")
| `exec "prompt"` | One-shot execution, exits when done |
| `--full-auto` | Sandboxed but auto-approves file changes in workspace |
| `--yolo` | No sandbox, no approvals (fastest, most dangerous) |
+| `--sandbox danger-full-access` | No Codex sandbox; useful when the host service context breaks bubblewrap |
+
+## Hermes Gateway Caveat
+
+When invoking the Codex CLI from a Hermes gateway/service context (for example,
+Telegram-driven agent sessions), Codex `workspace-write` sandboxing may fail even
+when the same command works in the user's interactive shell. A typical symptom is
+bubblewrap/user-namespace errors such as `setting up uid map: Permission denied`
+or `loopback: Failed RTM_NEWADDR: Operation not permitted`.
+
+In that context, prefer:
+
+```
+codex exec --sandbox danger-full-access ""
+```
+
+Use process boundaries as the safety layer instead: explicit `workdir`, clean git
+status before launch, narrow task prompts, `git diff` review, targeted tests, and
+human/agent confirmation before committing broad changes.
## PR Reviews
diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
index 3482f2303c1..eb84c50d1e7 100644
--- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
+++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-codex.md
@@ -92,6 +92,25 @@ process(action="kill", session_id="")
| `exec "prompt"` | One-shot execution, exits when done |
| `--full-auto` | Sandboxed but auto-approves file changes in workspace |
| `--yolo` | No sandbox, no approvals (fastest, most dangerous) |
+| `--sandbox danger-full-access` | No Codex sandbox; useful when the host service context breaks bubblewrap |
+
+## Hermes Gateway Caveat
+
+When invoking the Codex CLI from a Hermes gateway/service context (for example,
+Telegram-driven agent sessions), Codex `workspace-write` sandboxing may fail even
+when the same command works in the user's interactive shell. A typical symptom is
+bubblewrap/user-namespace errors such as `setting up uid map: Permission denied`
+or `loopback: Failed RTM_NEWADDR: Operation not permitted`.
+
+In that context, prefer:
+
+```
+codex exec --sandbox danger-full-access ""
+```
+
+Use process boundaries as the safety layer instead: explicit `workdir`, clean git
+status before launch, narrow task prompts, `git diff` review, targeted tests, and
+human/agent confirmation before committing broad changes.
## PR Reviews
From 30c7913617a63773c15a11900d24ac362b7609c8 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Sun, 7 Jun 2026 18:38:54 -0700
Subject: [PATCH 023/174] fix(api_server): report hermes version on /health and
/health/detailed (#40620)
Salvaged from #40479; re-verified on main, tightened, tested.
Co-authored-by: tfournet
---
gateway/platforms/api_server.py | 28 +++++++++++++++++++++++++++-
tests/gateway/test_api_server.py | 15 +++++++++++++++
2 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 13e97f4bd36..fb23664f017 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -61,6 +61,29 @@ from gateway.platforms.base import (
logger = logging.getLogger(__name__)
+
+def _hermes_version() -> str:
+ """Return the hermes-agent version string, or "dev" if it can't be resolved.
+
+ Tries the installed package metadata first (authoritative for a pip/uv
+ install), then the in-tree ``hermes_cli.__version__`` (covers editable /
+ source checkouts where metadata may be stale or absent). Never raises —
+ a version probe must not be able to break the health endpoint.
+ """
+ try:
+ from importlib.metadata import version
+
+ return version("hermes-agent")
+ except Exception:
+ pass
+ try:
+ from hermes_cli import __version__
+
+ return __version__
+ except Exception:
+ return "dev"
+
+
# Default settings
DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = 8642
@@ -1047,7 +1070,9 @@ class APIServerAdapter(BasePlatformAdapter):
async def _handle_health(self, request: "web.Request") -> "web.Response":
"""GET /health — simple health check."""
- return web.json_response({"status": "ok", "platform": "hermes-agent"})
+ return web.json_response(
+ {"status": "ok", "platform": "hermes-agent", "version": _hermes_version()}
+ )
async def _handle_health_detailed(self, request: "web.Request") -> "web.Response":
"""GET /health/detailed — rich status for cross-container dashboard probing.
@@ -1062,6 +1087,7 @@ class APIServerAdapter(BasePlatformAdapter):
return web.json_response({
"status": "ok",
"platform": "hermes-agent",
+ "version": _hermes_version(),
"gateway_state": runtime.get("gateway_state"),
"platforms": runtime.get("platforms", {}),
"active_agents": runtime.get("active_agents", 0),
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index c042fd556c6..95d49d8b4f1 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -497,6 +497,20 @@ class TestHealthEndpoint:
assert data["status"] == "ok"
assert data["platform"] == "hermes-agent"
+ @pytest.mark.asyncio
+ async def test_health_reports_version(self, adapter):
+ """GET /health must expose a non-empty version so orchestrators (e.g.
+ AgentOS) can read the gateway version without scraping. Regression
+ guard for the missing-version gap."""
+ app = _create_app(adapter)
+ async with TestClient(TestServer(app)) as cli:
+ resp = await cli.get("/health")
+ assert resp.status == 200
+ data = await resp.json()
+ assert "version" in data
+ assert isinstance(data["version"], str)
+ assert data["version"] != ""
+
@pytest.mark.asyncio
async def test_v1_health_alias_returns_ok(self, adapter):
"""GET /v1/health should return the same response as /health."""
@@ -507,6 +521,7 @@ class TestHealthEndpoint:
data = await resp.json()
assert data["status"] == "ok"
assert data["platform"] == "hermes-agent"
+ assert data.get("version")
# ---------------------------------------------------------------------------
From fa42ac094dca23c6ae6d05e1487f3e0c7daa29ad Mon Sep 17 00:00:00 2001
From: brooklyn!
Date: Sun, 7 Jun 2026 20:57:08 -0500
Subject: [PATCH 024/174] feat(desktop): Shift+click the status-bar zap to
toggle YOLO globally (#41666)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The status-bar zap currently toggles per-session approval bypass (the same
scope as the TUI's Shift+Tab). This adds a global escape hatch: Shift+clicking
the zap flips the persistent approvals.mode in config.yaml between "off"
(bypass on) and "manual" (bypass off), affecting every session, the CLI, the
TUI, and cron — and it survives restarts.
- statusbar-controls: thread the click's shiftKey through onSelect via a new
StatusbarSelectModifiers arg.
- yolo-session: add setGlobalYolo() that calls config.set with scope="global".
- use-statusbar-items: branch toggleYolo on modifiers.shiftKey; plain click
stays per-session, Shift+click goes global.
- tui_gateway config.set "yolo" key: add scope="global" that reads/writes
approvals.mode through the gateway's own (mtime-cached) config view, honors
an explicit value, and re-emits session.info to every live session so each
window's zap reflects the flip immediately.
- i18n: tooltip copy in en/ja/zh/zh-hant notes Shift+click toggles globally.
Tests: two new tui_gateway tests cover the global toggle and explicit-value
paths; existing session/process-scope yolo tests still pass.
---
.../app/shell/hooks/use-statusbar-items.tsx | 51 ++++++++----
.../src/app/shell/statusbar-controls.tsx | 10 ++-
apps/desktop/src/i18n/en.ts | 4 +-
apps/desktop/src/i18n/ja.ts | 4 +-
apps/desktop/src/i18n/zh-hant.ts | 4 +-
apps/desktop/src/i18n/zh.ts | 4 +-
apps/desktop/src/lib/yolo-session.ts | 24 ++++++
tests/test_tui_gateway_server.py | 60 ++++++++++++++
tui_gateway/server.py | 79 +++++++++++++------
9 files changed, 188 insertions(+), 52 deletions(-)
diff --git a/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx b/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx
index c700cb51019..80843a00f09 100644
--- a/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx
+++ b/apps/desktop/src/app/shell/hooks/use-statusbar-items.tsx
@@ -4,6 +4,7 @@ import { useCallback, useMemo } from 'react'
import type { CommandCenterSection } from '@/app/command-center'
import { GatewayMenuPanel } from '@/app/shell/gateway-menu-panel'
+import { useI18n } from '@/i18n'
import {
Activity,
AlertCircle,
@@ -16,12 +17,11 @@ import {
Zap,
ZapFilled
} from '@/lib/icons'
-import { useI18n } from '@/i18n'
import { formatModelStatusLabel } from '@/lib/model-status-label'
import type { RuntimeReadinessResult } from '@/lib/runtime-readiness'
import { contextBarLabel, LiveDuration, usageContextLabel } from '@/lib/statusbar'
import { cn } from '@/lib/utils'
-import { setSessionYolo } from '@/lib/yolo-session'
+import { setGlobalYolo, setSessionYolo } from '@/lib/yolo-session'
import { $desktopActionTasks } from '@/store/activity'
import { $previewServerRestartStatus } from '@/store/preview'
import {
@@ -44,7 +44,7 @@ import { $desktopVersion, $updateApply, $updateStatus, setUpdateOverlayOpen } fr
import type { StatusResponse } from '@/types/hermes'
import { CRON_ROUTE } from '../../routes'
-import type { StatusbarItem } from '../statusbar-controls'
+import type { StatusbarItem, StatusbarSelectModifiers } from '../statusbar-controls'
interface StatusbarItemsOptions {
agentsOpen: boolean
@@ -105,22 +105,39 @@ export function useStatusbarItems({
// Per-session approval bypass (same scope as the TUI's Shift+Tab). On a
// new-chat draft (no runtime session yet) we arm locally; the session-create
// path applies it once the backend session exists.
- const toggleYolo = useCallback(async () => {
- const next = !$yoloActive.get()
- const sid = $activeSessionId.get()
+ //
+ // Shift+click flips the GLOBAL approvals.mode instead — a persistent,
+ // all-sessions/CLI/TUI/cron bypass that survives restarts.
+ const toggleYolo = useCallback(
+ async (modifiers?: StatusbarSelectModifiers) => {
+ const next = !$yoloActive.get()
- setYoloActive(next)
+ setYoloActive(next)
- if (!sid) {
- return
- }
+ if (modifiers?.shiftKey) {
+ try {
+ await setGlobalYolo(requestGateway, next)
+ } catch {
+ setYoloActive(!next)
+ }
- try {
- await setSessionYolo(requestGateway, sid, next)
- } catch {
- setYoloActive(!next)
- }
- }, [requestGateway])
+ return
+ }
+
+ const sid = $activeSessionId.get()
+
+ if (!sid) {
+ return
+ }
+
+ try {
+ await setSessionYolo(requestGateway, sid, next)
+ } catch {
+ setYoloActive(!next)
+ }
+ },
+ [requestGateway]
+ )
const showYoloToggle = gatewayState === 'open' && (!!activeSessionId || freshDraftReady)
@@ -333,7 +350,7 @@ export function useStatusbarItems({
),
id: 'yolo',
- onSelect: () => void toggleYolo(),
+ onSelect: modifiers => void toggleYolo(modifiers),
title: yoloActive ? copy.yoloOn : copy.yoloOff,
variant: 'action'
},
diff --git a/apps/desktop/src/app/shell/statusbar-controls.tsx b/apps/desktop/src/app/shell/statusbar-controls.tsx
index 6a103160e65..dc3a4d77382 100644
--- a/apps/desktop/src/app/shell/statusbar-controls.tsx
+++ b/apps/desktop/src/app/shell/statusbar-controls.tsx
@@ -35,12 +35,16 @@ export interface StatusbarItem {
menuClassName?: string
menuContent?: ReactNode
menuItems?: readonly StatusbarMenuItem[]
- onSelect?: () => void
+ onSelect?: (modifiers: StatusbarSelectModifiers) => void
title?: string
to?: string
variant?: 'action' | 'link' | 'menu' | 'text'
}
+export interface StatusbarSelectModifiers {
+ shiftKey: boolean
+}
+
export type StatusbarItemSide = 'left' | 'right'
export type SetStatusbarItemGroup = (id: string, items: readonly StatusbarItem[], side?: StatusbarItemSide) => void
@@ -170,12 +174,12 @@ function StatusbarItemView({ item, navigate }: { item: StatusbarItem; navigate: