Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

This commit is contained in:
Brooklyn Nicholson 2026-05-03 12:40:12 -05:00
commit 9ca5ea1375
109 changed files with 11761 additions and 479 deletions

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import asyncio
import contextvars
import json
import logging
import os
from collections import defaultdict, deque
@ -47,6 +48,7 @@ from acp.schema import (
TextContentBlock,
UnstructuredCommandInput,
Usage,
UsageUpdate,
UserMessageChunk,
)
@ -65,6 +67,7 @@ from acp_adapter.events import (
)
from acp_adapter.permissions import make_approval_callback
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
from acp_adapter.tools import build_tool_complete, build_tool_start
logger = logging.getLogger(__name__)
@ -315,6 +318,66 @@ class HermesACPAgent(acp.Agent):
return target_provider, new_model
@staticmethod
def _build_usage_update(state: SessionState) -> UsageUpdate | None:
"""Build ACP native context-usage data for clients like Zed.
Zed's circular context indicator is driven by ACP ``usage_update``
session updates: ``size`` is the model context window and ``used`` is
the current request pressure. Hermes estimates ``used`` from the same
buckets it sends to providers: system prompt, conversation history, and
tool schemas.
"""
agent = state.agent
compressor = getattr(agent, "context_compressor", None)
size = int(getattr(compressor, "context_length", 0) or 0)
if size <= 0:
return None
try:
from agent.model_metadata import estimate_request_tokens_rough
used = estimate_request_tokens_rough(
state.history,
system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
tools=getattr(agent, "tools", None) or None,
)
except Exception:
logger.debug("Could not estimate ACP native context usage", exc_info=True)
used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
return UsageUpdate(
session_update="usage_update",
size=max(size, 0),
used=max(used, 0),
)
async def _send_usage_update(self, state: SessionState) -> None:
"""Send ACP native context usage to the connected client."""
if not self._conn:
return
update = self._build_usage_update(state)
if update is None:
return
try:
await self._conn.session_update(
session_id=state.session_id,
update=update,
)
except Exception:
logger.warning(
"Failed to send ACP usage update for session %s",
state.session_id,
exc_info=True,
)
def _schedule_usage_update(self, state: SessionState) -> None:
"""Schedule native context indicator refresh after ACP responses."""
if not self._conn:
return
loop = asyncio.get_running_loop()
loop.call_soon(asyncio.create_task, self._send_usage_update(state))
async def _register_session_mcp_servers(
self,
state: SessionState,
@ -485,37 +548,99 @@ class HermesACPAgent(acp.Agent):
)
return None
@staticmethod
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
"""Extract function name/arguments from an OpenAI-style tool_call."""
function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
if isinstance(raw_args, str):
try:
parsed = json.loads(raw_args)
except Exception:
parsed = {"raw": raw_args}
raw_args = parsed
if not isinstance(raw_args, dict):
raw_args = {}
return name, raw_args
@staticmethod
def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
"""Return the stable provider tool call id for ACP history replay."""
return str(
tool_call.get("id")
or tool_call.get("call_id")
or tool_call.get("tool_call_id")
or ""
).strip()
async def _replay_session_history(self, state: SessionState) -> None:
"""Send persisted user/assistant history to clients during session/load.
Zed's ACP history UI calls ``session/load`` after the user picks an item
from the Agents sidebar. The agent must then replay the full conversation
as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
restoring server-side state makes Hermes remember context, but leaves the
editor looking like a clean thread.
as user/assistant chunks plus reconstructed tool-call start/completion
notifications; merely restoring server-side state makes Hermes remember
context, but leaves the editor looking like a clean thread.
"""
if not self._conn or not state.history:
return
for message in state.history:
role = str(message.get("role") or "")
if role not in {"user", "assistant"}:
continue
text = self._history_message_text(message)
if not text:
continue
update = self._history_message_update(role=role, text=text)
if update is None:
continue
active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
async def _send(update: Any) -> bool:
try:
await self._conn.session_update(session_id=state.session_id, update=update)
return True
except Exception:
logger.warning(
"Failed to replay ACP history for session %s",
state.session_id,
exc_info=True,
)
return
return False
for message in state.history:
role = str(message.get("role") or "")
if role in {"user", "assistant"}:
text = self._history_message_text(message)
if text:
update = self._history_message_update(role=role, text=text)
if update is not None and not await _send(update):
return
if role == "assistant" and isinstance(message.get("tool_calls"), list):
for tool_call in message["tool_calls"]:
if not isinstance(tool_call, dict):
continue
tool_call_id = self._history_tool_call_id(tool_call)
if not tool_call_id:
continue
tool_name, args = self._history_tool_call_name_args(tool_call)
active_tool_calls[tool_call_id] = (tool_name, args)
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
return
continue
if role == "tool":
tool_call_id = str(message.get("tool_call_id") or "").strip()
tool_name = str(message.get("tool_name") or "").strip()
function_args: dict[str, Any] | None = None
if tool_call_id in active_tool_calls:
tool_name, function_args = active_tool_calls.pop(tool_call_id)
if not tool_call_id or not tool_name:
continue
result = message.get("content")
if not await _send(
build_tool_complete(
tool_call_id,
tool_name,
result=result if isinstance(result, str) else None,
function_args=function_args,
)
):
return
async def new_session(
self,
@ -527,11 +652,24 @@ class HermesACPAgent(acp.Agent):
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
self._schedule_available_commands_update(state.session_id)
self._schedule_usage_update(state)
return NewSessionResponse(
session_id=state.session_id,
models=self._build_model_state(state),
)
def _schedule_history_replay(self, state: SessionState) -> None:
"""Replay persisted history after session/load or session/resume returns.
Zed only attaches streamed transcript/tool updates once the load/resume
response has completed. Sending replay notifications while the request is
still in-flight can make the server look correct in logs while the editor
drops or fails to attach the tool-call history.
"""
loop = asyncio.get_running_loop()
replay_coro = self._replay_session_history(state)
loop.call_soon(asyncio.create_task, replay_coro)
async def load_session(
self,
cwd: str,
@ -545,8 +683,9 @@ class HermesACPAgent(acp.Agent):
return None
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Loaded session %s", session_id)
await self._replay_session_history(state)
self._schedule_history_replay(state)
self._schedule_available_commands_update(session_id)
self._schedule_usage_update(state)
return LoadSessionResponse(models=self._build_model_state(state))
async def resume_session(
@ -562,8 +701,9 @@ class HermesACPAgent(acp.Agent):
state = self.session_manager.create_session(cwd=cwd)
await self._register_session_mcp_servers(state, mcp_servers)
logger.info("Resumed session %s", state.session_id)
await self._replay_session_history(state)
self._schedule_history_replay(state)
self._schedule_available_commands_update(state.session_id)
self._schedule_usage_update(state)
return ResumeSessionResponse(models=self._build_model_state(state))
async def cancel(self, session_id: str, **kwargs: Any) -> None:
@ -712,6 +852,7 @@ class HermesACPAgent(acp.Agent):
if self._conn:
update = acp.update_agent_message_text(response_text)
await self._conn.session_update(session_id, update)
await self._send_usage_update(state)
return PromptResponse(stop_reason="end_turn")
# If Zed sends another regular prompt while the same ACP session is
@ -744,24 +885,37 @@ class HermesACPAgent(acp.Agent):
tool_call_meta: dict[str, dict[str, Any]] = {}
previous_approval_cb = None
streamed_message = False
if conn:
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
thinking_cb = make_thinking_cb(conn, session_id, loop)
reasoning_cb = make_thinking_cb(conn, session_id, loop)
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
message_cb = make_message_cb(conn, session_id, loop)
def stream_delta_cb(text: str) -> None:
nonlocal streamed_message
if text:
streamed_message = True
message_cb(text)
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
else:
tool_progress_cb = None
thinking_cb = None
reasoning_cb = None
step_cb = None
message_cb = None
stream_delta_cb = None
approval_cb = None
agent = state.agent
agent.tool_progress_callback = tool_progress_cb
agent.thinking_callback = thinking_cb
# ACP thought panes should not receive Hermes' local kawaii waiting/status
# updates. Route provider/model reasoning deltas instead; if the provider
# emits no reasoning, Zed should not get a fake "thinking" accordion.
agent.thinking_callback = None
agent.reasoning_callback = reasoning_cb
agent.step_callback = step_cb
agent.message_callback = message_cb
agent.stream_delta_callback = stream_delta_cb
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
@ -867,7 +1021,7 @@ class HermesACPAgent(acp.Agent):
)
except Exception:
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
if final_response and conn:
if final_response and conn and not streamed_message:
update = acp.update_agent_message_text(final_response)
await conn.session_update(session_id, update)
@ -903,6 +1057,8 @@ class HermesACPAgent(acp.Agent):
cached_read_tokens=result.get("cache_read_tokens"),
)
await self._send_usage_update(state)
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
return PromptResponse(stop_reason=stop_reason, usage=usage)
@ -1035,22 +1191,84 @@ class HermesACPAgent(acp.Agent):
return f"Could not list tools: {e}"
def _cmd_context(self, args: str, state: SessionState) -> str:
"""Show ACP session context pressure and compression guidance."""
n_messages = len(state.history)
if n_messages == 0:
return "Conversation is empty (no messages yet)."
# Count by role
# Count by role.
roles: dict[str, int] = {}
for msg in state.history:
role = msg.get("role", "unknown")
roles[role] = roles.get(role, 0) + 1
agent = state.agent
model = state.model or getattr(agent, "model", "")
provider = getattr(agent, "provider", None) or "auto"
compressor = getattr(agent, "context_compressor", None)
context_length = int(getattr(compressor, "context_length", 0) or 0)
threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
try:
from agent.model_metadata import estimate_request_tokens_rough
system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
tools = getattr(agent, "tools", None) or None
approx_tokens = estimate_request_tokens_rough(
state.history,
system_prompt=system_prompt,
tools=tools,
)
except Exception:
logger.debug("Could not estimate ACP context usage", exc_info=True)
approx_tokens = 0
if threshold_tokens <= 0 and context_length > 0:
threshold_tokens = int(context_length * 0.80)
lines = [
f"Conversation: {n_messages} messages",
f"Conversation: {n_messages} messages"
if n_messages
else "Conversation is empty (no messages yet).",
f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
]
model = state.model or getattr(state.agent, "model", "")
if model:
lines.append(f"Model: {model}")
lines.append(f"Provider: {provider}")
if approx_tokens > 0:
if context_length > 0:
usage_pct = (approx_tokens / context_length) * 100
lines.append(
f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
)
else:
lines.append(f"Context usage: ~{approx_tokens:,} tokens")
if threshold_tokens > 0:
if approx_tokens > 0:
threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
remaining = max(threshold_tokens - approx_tokens, 0)
if approx_tokens >= threshold_tokens:
lines.append(
f"Compression: due now (threshold ~{threshold_tokens:,}"
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
+ "). Run /compact."
)
else:
lines.append(
f"Compression: ~{remaining:,} tokens until threshold "
f"(~{threshold_tokens:,}"
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
+ ")."
)
else:
lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
if getattr(agent, "compression_enabled", True) is False:
lines.append("Compression is disabled for this agent.")
else:
lines.append("Tip: run /compact to compress manually before the threshold.")
return "\n".join(lines)
def _cmd_reset(self, args: str, state: SessionState) -> str:

View file

@ -28,6 +28,11 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
"terminal": "execute",
"process": "execute",
"execute_code": "execute",
# Session/meta tools
"todo": "other",
"skill_view": "read",
"skills_list": "read",
"skill_manage": "edit",
# Web / fetch
"web_search": "fetch",
"web_extract": "fetch",
@ -51,6 +56,28 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
}
_POLISHED_TOOLS = {
# Core operator loop
"todo", "memory", "session_search", "delegate_task",
# Files / execution
"read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
# Skills / web / browser / media
"skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
"browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
"browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
"vision_analyze", "image_generate", "text_to_speech",
# Schedulers / platform integrations
"cronjob", "send_message", "clarify", "discord", "discord_admin",
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
"feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
"feishu_drive_reply_comment", "feishu_drive_add_comment",
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
"kanban_block", "kanban_link", "kanban_heartbeat",
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
}
def get_tool_kind(tool_name: str) -> ToolKind:
"""Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
return TOOL_KIND_MAP.get(tool_name, "other")
@ -85,18 +112,645 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
if urls:
return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
return "web extract"
if tool_name == "process":
action = str(args.get("action") or "").strip() or "manage"
sid = str(args.get("session_id") or "").strip()
return f"process {action}: {sid}" if sid else f"process {action}"
if tool_name == "delegate_task":
tasks = args.get("tasks")
if isinstance(tasks, list) and tasks:
return f"delegate batch ({len(tasks)} tasks)"
goal = args.get("goal", "")
if goal and len(goal) > 60:
goal = goal[:57] + "..."
return f"delegate: {goal}" if goal else "delegate task"
if tool_name == "session_search":
query = str(args.get("query") or "").strip()
return f"session search: {query}" if query else "recent sessions"
if tool_name == "memory":
action = str(args.get("action") or "manage").strip() or "manage"
target = str(args.get("target") or "memory").strip() or "memory"
return f"memory {action}: {target}"
if tool_name == "execute_code":
return "execute code"
code = str(args.get("code") or "").strip()
first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
if first_line:
if len(first_line) > 70:
first_line = first_line[:67] + "..."
return f"python: {first_line}"
return "python code"
if tool_name == "todo":
items = args.get("todos")
if isinstance(items, list):
return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
return "todo"
if tool_name == "skill_view":
name = str(args.get("name") or "?").strip() or "?"
file_path = str(args.get("file_path") or "").strip()
suffix = f"/{file_path}" if file_path else ""
return f"skill view ({name}{suffix})"
if tool_name == "skills_list":
category = str(args.get("category") or "").strip()
return f"skills list ({category})" if category else "skills list"
if tool_name == "skill_manage":
action = str(args.get("action") or "manage").strip() or "manage"
name = str(args.get("name") or "?").strip() or "?"
file_path = str(args.get("file_path") or "").strip()
target = f"{name}/{file_path}" if file_path else name
if len(target) > 64:
target = target[:61] + "..."
return f"skill {action}: {target}"
if tool_name == "browser_navigate":
return f"navigate: {args.get('url', '?')}"
if tool_name == "browser_snapshot":
return "browser snapshot"
if tool_name == "browser_vision":
return f"browser vision: {str(args.get('question', '?'))[:50]}"
if tool_name == "browser_get_images":
return "browser images"
if tool_name == "vision_analyze":
return f"analyze image: {args.get('question', '?')[:50]}"
return f"analyze image: {str(args.get('question', '?'))[:50]}"
if tool_name == "image_generate":
prompt = str(args.get("prompt") or args.get("description") or "").strip()
return f"generate image: {prompt[:50]}" if prompt else "generate image"
if tool_name == "cronjob":
action = str(args.get("action") or "manage").strip() or "manage"
job_id = str(args.get("job_id") or args.get("id") or "").strip()
return f"cron {action}: {job_id}" if job_id else f"cron {action}"
return tool_name
def _text(content: str) -> Any:
return acp.tool_content(acp.text_block(content))
def _json_loads_maybe(value: Optional[str]) -> Any:
if not isinstance(value, str):
return value
try:
return json.loads(value)
except Exception:
pass
# Some Hermes tools append a human hint after a JSON payload, e.g.
# ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
# by decoding the first JSON value instead of falling back to raw text.
try:
decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
return decoded
except Exception:
return None
def _truncate_text(text: str, limit: int = 5000) -> str:
if len(text) <= limit:
return text
return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
def _fenced_text(text: str, language: str = "") -> str:
"""Return a Markdown fence that cannot be broken by backticks in text."""
longest = max((len(run) for run in text.split("`")[1::2]), default=0)
fence = "`" * max(3, longest + 1)
return f"{fence}{language}\n{text}\n{fence}"
def _format_todo_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
return None
summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
icon = {
"completed": "",
"in_progress": "🔄",
"pending": "",
"cancelled": "",
}
lines = ["**Todo list**", ""]
for item in data["todos"]:
if not isinstance(item, dict):
continue
status = str(item.get("status") or "pending")
content = str(item.get("content") or item.get("id") or "").strip()
if content:
lines.append(f"- {icon.get(status, '')} {content}")
if summary:
cancelled = summary.get("cancelled", 0)
lines.extend([
"",
"**Progress:** "
f"{summary.get('completed', 0)} completed, "
f"{summary.get('in_progress', 0)} in progress, "
f"{summary.get('pending', 0)} pending"
+ (f", {cancelled} cancelled" if cancelled else ""),
])
return "\n".join(lines)
def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
if data.get("error") and not data.get("content"):
return f"Read failed: {data.get('error')}"
content = data.get("content")
if not isinstance(content, str):
return None
path = str((args or {}).get("path") or data.get("path") or "file").strip()
offset = (args or {}).get("offset")
limit = (args or {}).get("limit")
range_bits = []
if offset:
range_bits.append(f"from line {offset}")
if limit:
range_bits.append(f"limit {limit}")
suffix = f" ({', '.join(range_bits)})" if range_bits else ""
header = f"Read {path}{suffix}"
if data.get("total_lines") is not None:
header += f"{data.get('total_lines')} total lines"
# Hermes read_file output is line-numbered with `|`. If we send it as raw
# Markdown, Zed can interpret pipes as tables and collapse the layout.
# Fence the payload so file lines stay readable and literal.
return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
def _format_search_files_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
matches = data.get("matches")
if not isinstance(matches, list):
return None
total = data.get("total_count", len(matches))
shown = min(len(matches), 12)
truncated = bool(data.get("truncated")) or len(matches) > shown
lines = [
"Search results",
f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
"",
]
for match in matches[:shown]:
if not isinstance(match, dict):
lines.append(f"- {match}")
continue
path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
line = match.get("line") or match.get("line_number")
content = str(match.get("content") or match.get("text") or "").strip()
loc = f"{path}:{line}" if line else path
lines.append(f"- {loc}")
if content:
snippet = _truncate_text(" ".join(content.split()), 300)
lines.append(f" {snippet}")
if truncated:
lines.extend([
"",
"Results truncated. Narrow the search, add file_glob, or use offset to page.",
])
return _truncate_text("\n".join(lines), limit=7000)
def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return result if isinstance(result, str) and result.strip() else None
output = str(data.get("output") or "")
error = str(data.get("error") or "")
exit_code = data.get("exit_code")
parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
if output:
parts.extend(["", "Output:", output])
if error:
parts.extend(["", "Error:", error])
return _truncate_text("\n".join(parts))
def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
headings: list[str] = []
for line in content.splitlines():
stripped = line.strip()
if stripped.startswith("#"):
heading = stripped.lstrip("#").strip()
if heading:
headings.append(heading)
if len(headings) >= limit:
break
return headings
def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
if data.get("success") is False:
return f"Skill view failed: {data.get('error', 'unknown error')}"
name = str(data.get("name") or "skill")
file_path = str(data.get("file") or data.get("path") or "SKILL.md")
description = str(data.get("description") or "").strip()
content = str(data.get("content") or "")
linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
if description:
lines.append(f"- **Description:** {description}")
if content:
lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
if linked:
linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
lines.append(f"- **Linked files:** {linked_count}")
headings = _extract_markdown_headings(content)
if headings:
lines.extend(["", "**Sections**"])
lines.extend(f"- {heading}" for heading in headings)
lines.extend([
"",
"_Full skill content is available to the agent but hidden here to keep ACP readable._",
])
return "\n".join(lines)
def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
action = str((args or {}).get("action") or "manage").strip() or "manage"
name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
success = data.get("success")
status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
if action not in {"delete"}:
lines.append(f"- **File:** `{file_path}`")
message = str(data.get("message") or data.get("error") or "").strip()
if message:
lines.append(f"- **Result:** {message}")
replacements = data.get("replacements") or data.get("replacement_count")
if replacements is not None:
lines.append(f"- **Replacements:** {replacements}")
path = str(data.get("path") or "").strip()
if path:
lines.append(f"- **Path:** `{path}`")
return "\n".join(lines)
def _format_web_search_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
if not isinstance(web, list):
return None
lines = [f"Web results: {len(web)}"]
for item in web[:10]:
if not isinstance(item, dict):
continue
title = str(item.get("title") or item.get("url") or "result").strip()
url = str(item.get("url") or "").strip()
desc = str(item.get("description") or "").strip()
lines.append(f"{title}" + (f"{url}" if url else ""))
if desc:
lines.append(f" {desc}")
return _truncate_text("\n".join(lines))
def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
"""Return only web_extract errors for ACP; success stays compact via title."""
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
if data.get("success") is False and data.get("error"):
return f"Web extract failed: {data.get('error')}"
results = data.get("results")
if not isinstance(results, list):
return None
failures: list[str] = []
for item in results[:10]:
if not isinstance(item, dict):
continue
error = str(item.get("error") or "").strip()
if not error or error in {"None", "null"}:
continue
url = str(item.get("url") or "").strip()
title = str(item.get("title") or url or "Untitled").strip()
failures.append(
f"- {title}" + (f"{url}" if url and url != title else "") + f"\n Error: {_truncate_text(error, limit=500)}"
)
if not failures:
return None
lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
lines.extend(failures)
return "\n".join(lines)
def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return result if isinstance(result, str) and result.strip() else None
if data.get("success") is False and data.get("error"):
return f"Process error: {data.get('error')}"
action = str((args or {}).get("action") or "process").strip() or "process"
if isinstance(data.get("processes"), list):
processes = data["processes"]
lines = [f"Processes: {len(processes)}"]
for proc in processes[:20]:
if not isinstance(proc, dict):
lines.append(f"- {proc}")
continue
sid = str(proc.get("session_id") or proc.get("id") or "?")
status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
cmd = str(proc.get("command") or "").strip()
pid = proc.get("pid")
code = proc.get("exit_code")
bits = [status]
if pid is not None:
bits.append(f"pid {pid}")
if code is not None:
bits.append(f"exit {code}")
lines.append(f"- `{sid}` — {', '.join(bits)}" + (f"{cmd[:120]}" if cmd else ""))
if len(processes) > 20:
lines.append(f"... {len(processes) - 20} more process(es)")
return "\n".join(lines)
status = str(data.get("status") or data.get("state") or action).strip()
sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
if data.get(key) is not None:
lines.append(f"- **{label}:** {data.get(key)}")
output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
error = data.get("error") or data.get("stderr")
if output:
lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
if error:
lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
msg = data.get("message")
if msg and not output and not error:
lines.append(str(msg))
return _truncate_text("\n".join(lines), limit=7000)
def _format_delegate_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
if data.get("error") and not isinstance(data.get("results"), list):
return f"Delegation failed: {data.get('error')}"
results = data.get("results")
if not isinstance(results, list):
return None
total = data.get("total_duration_seconds")
lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
icon = {"completed": "", "failed": "", "error": "", "timeout": "", "interrupted": ""}
for item in results:
if not isinstance(item, dict):
lines.append(f"- {item}")
continue
idx = item.get("task_index")
status = str(item.get("status") or "unknown")
model = item.get("model")
dur = item.get("duration_seconds")
role = item.get("_child_role")
header = f"{icon.get(status, '')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
bits = []
if model:
bits.append(str(model))
if role:
bits.append(f"role={role}")
if dur is not None:
bits.append(f"{dur}s")
if bits:
header += " (" + ", ".join(bits) + ")"
lines.extend(["", header])
summary = str(item.get("summary") or "").strip()
error = str(item.get("error") or "").strip()
if summary:
lines.append(_truncate_text(summary, limit=1200))
if error:
lines.append("Error: " + _truncate_text(error, limit=800))
trace = item.get("tool_trace")
if isinstance(trace, list) and trace:
names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
if names:
lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
return _truncate_text("\n".join(lines), limit=8000)
def _format_session_search_result(result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
if data.get("success") is False:
return f"Session search failed: {data.get('error', 'unknown error')}"
results = data.get("results")
if not isinstance(results, list):
return None
mode = data.get("mode") or "search"
query = data.get("query")
lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
if not results:
lines.append(str(data.get("message") or "No matching sessions found."))
return "\n".join(lines)
for item in results:
if not isinstance(item, dict):
continue
sid = str(item.get("session_id") or "?")
title = str(item.get("title") or item.get("when") or "Untitled session").strip()
when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
count = item.get("message_count")
source = str(item.get("source") or "").strip()
meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
lines.append(f"- **{title}** (`{sid}`)" + (f"{meta}" if meta else ""))
summary = str(item.get("summary") or item.get("preview") or "").strip()
if summary:
lines.append(" " + _truncate_text(" ".join(summary.split()), limit=500))
return _truncate_text("\n".join(lines), limit=7000)
def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return None
action = str((args or {}).get("action") or "memory").strip() or "memory"
target = str(data.get("target") or (args or {}).get("target") or "memory")
if data.get("success") is False:
lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
matches = data.get("matches")
if isinstance(matches, list) and matches:
lines.append("Matches:")
lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
return "\n".join(lines)
lines = [f"✅ Memory {action} saved ({target})"]
if data.get("message"):
lines.append(str(data.get("message")))
if data.get("entry_count") is not None:
lines.append(f"Entries: {data.get('entry_count')}")
if data.get("usage"):
lines.append(f"Usage: {data.get('usage')}")
# Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
if preview:
lines.append("Preview: " + _truncate_text(preview, limit=300))
return "\n".join(lines)
def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
path = str((args or {}).get("path") or "file").strip()
if isinstance(data, dict):
if data.get("success") is False or data.get("error"):
return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
message = str(data.get("message") or "").strip()
replacements = data.get("replacements") or data.get("replacement_count")
lines = [f"{tool_name} completed" + (f" for `{path}`" if path else "")]
if message:
lines.append(message)
if replacements is not None:
lines.append(f"Replacements: {replacements}")
if data.get("files_modified"):
files = data.get("files_modified")
if isinstance(files, list):
lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
return "\n".join(lines)
if isinstance(result, str) and result.strip():
return _truncate_text(result, limit=3000)
return f"{tool_name} completed" + (f" for `{path}`" if path else "")
def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return result if isinstance(result, str) and result.strip() else None
if data.get("success") is False or data.get("error"):
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
if tool_name == "browser_get_images":
images = data.get("images") or data.get("data")
if isinstance(images, list):
lines = [f"Images found: {len(images)}"]
for img in images[:12]:
if isinstance(img, dict):
alt = str(img.get("alt") or "").strip()
url = str(img.get("url") or img.get("src") or "").strip()
lines.append(f"- {alt or 'image'}" + (f"{url}" if url else ""))
return _truncate_text("\n".join(lines), limit=5000)
title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
lines = [title]
if data.get("url") and data.get("url") != title:
lines.append(str(data.get("url")))
if text:
lines.extend(["", _truncate_text(text, limit=5000)])
return _truncate_text("\n".join(lines), limit=7000)
def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, dict):
return result if isinstance(result, str) and result.strip() else None
if data.get("success") is False or data.get("error"):
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
lines = [f"{tool_name} completed"]
for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
if data.get(key):
lines.append(f"- **{key}:** {data.get(key)}")
return "\n".join(lines)
def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
data = _json_loads_maybe(result)
if not isinstance(data, (dict, list)):
return result if isinstance(result, str) and result.strip() else None
if isinstance(data, list):
lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
for item in data[:12]:
lines.append(f"- {_truncate_text(str(item), limit=240)}")
return _truncate_text("\n".join(lines), limit=5000)
if data.get("success") is False or data.get("error"):
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
lines = [f"{tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
priority_keys = (
"message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
"state", "service", "url", "path", "file_path", "count", "total", "next_run",
)
seen = set()
for key in priority_keys:
value = data.get(key)
if value in (None, "", [], {}):
continue
seen.add(key)
lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
for key, value in data.items():
if key in seen or key in {"success", "raw", "content", "entries"}:
continue
if value in (None, "", [], {}):
continue
if isinstance(value, (dict, list)):
preview = json.dumps(value, ensure_ascii=False, default=str)
else:
preview = str(value)
lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
if len(lines) >= 14:
break
content = data.get("content")
if isinstance(content, str) and content.strip():
lines.extend(["", _truncate_text(content.strip(), limit=1500)])
return _truncate_text("\n".join(lines), limit=7000)
def _build_polished_completion_content(
tool_name: str,
result: Optional[str],
function_args: Optional[Dict[str, Any]],
) -> Optional[List[Any]]:
formatter = {
"todo": lambda: _format_todo_result(result),
"read_file": lambda: _format_read_file_result(result, function_args),
"write_file": lambda: _format_edit_result(tool_name, result, function_args),
"patch": lambda: _format_edit_result(tool_name, result, function_args),
"search_files": lambda: _format_search_files_result(result),
"execute_code": lambda: _format_execute_code_result(result),
"process": lambda: _format_process_result(result, function_args),
"delegate_task": lambda: _format_delegate_result(result),
"session_search": lambda: _format_session_search_result(result),
"memory": lambda: _format_memory_result(result, function_args),
"skill_view": lambda: _format_skill_view_result(result),
"skill_manage": lambda: _format_skill_manage_result(result, function_args),
"web_search": lambda: _format_web_search_result(result),
"web_extract": lambda: _format_web_extract_result(result),
"browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
"browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
"browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
"browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
"vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
"image_generate": lambda: _format_media_or_cron_result(tool_name, result),
"cronjob": lambda: _format_media_or_cron_result(tool_name, result),
}.get(tool_name)
if formatter is None and tool_name in _POLISHED_TOOLS:
formatter = lambda: _format_generic_structured_result(tool_name, result)
if formatter is None:
return None
text = formatter()
if not text:
return None
return [_text(text)]
def _build_patch_mode_content(patch_text: str) -> List[Any]:
"""Parse V4A patch mode input into ACP diff blocks when possible."""
if not patch_text:
@ -258,7 +912,11 @@ def _build_tool_complete_content(
except Exception:
pass
return [acp.tool_content(acp.text_block(display_result))]
polished_content = _build_polished_completion_content(tool_name, result, function_args)
if polished_content:
return polished_content
return [_text(display_result)]
# ---------------------------------------------------------------------------
@ -288,7 +946,6 @@ def build_tool_start(
content = _build_patch_mode_content(patch_text)
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
)
if tool_name == "write_file":
@ -297,32 +954,172 @@ def build_tool_start(
content = [acp.tool_diff_content(path=path, new_text=file_content)]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
)
if tool_name == "terminal":
command = arguments.get("command", "")
content = [acp.tool_content(acp.text_block(f"$ {command}"))]
content = [_text(f"$ {command}")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
)
if tool_name == "read_file":
path = arguments.get("path", "")
content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
# The title and location already identify the file. Sending a synthetic
# "Reading ..." content block makes Zed render an unhelpful Output
# section before the real file contents arrive on completion.
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
tool_call_id, title, kind=kind, content=None, locations=locations,
)
if tool_name == "search_files":
pattern = arguments.get("pattern", "")
target = arguments.get("target", "content")
content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
search_path = arguments.get("path")
where = f" in {search_path}" if search_path else ""
content = [_text(f"Searching for '{pattern}' ({target}){where}")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "todo":
items = arguments.get("todos")
if isinstance(items, list):
preview_lines = ["Updating todo list", ""]
for item in items[:8]:
if isinstance(item, dict):
preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
if len(items) > 8:
preview_lines.append(f"... {len(items) - 8} more")
content = [_text("\n".join(preview_lines))]
else:
content = [_text("Reading todo list")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "skill_view":
name = str(arguments.get("name") or "?").strip() or "?"
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
content = [_text(f"Loading skill '{name}' ({file_path})")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "skill_manage":
action = str(arguments.get("action") or "manage").strip() or "manage"
name = str(arguments.get("name") or "?").strip() or "?"
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
if action == "patch":
old = str(arguments.get("old_string") or "")
new = str(arguments.get("new_string") or "")
content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
elif action in {"edit", "create"}:
content = [
acp.tool_diff_content(
path=path,
new_text=str(arguments.get("content") or ""),
)
]
elif action == "write_file":
target = str(arguments.get("file_path") or "file")
content = [
acp.tool_diff_content(
path=f"skills/{name}/{target}",
new_text=str(arguments.get("file_content") or ""),
)
]
elif action in {"delete", "remove_file"}:
target = str(arguments.get("file_path") or file_path or name)
content = [_text(f"Removing {target} from skill '{name}'")]
else:
content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "execute_code":
code = str(arguments.get("code") or "").strip()
preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "web_search":
query = str(arguments.get("query") or "").strip()
content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "web_extract":
# The title identifies the URL(s). Avoid a duplicate content block so
# Zed renders this like read_file: compact start, concise completion.
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=None, locations=locations,
)
if tool_name == "process":
action = str(arguments.get("action") or "").strip() or "manage"
sid = str(arguments.get("session_id") or "").strip()
data_preview = str(arguments.get("data") or "").strip()
text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
if data_preview:
text += "\nInput: " + _truncate_text(data_preview, limit=500)
content = [_text(text)]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "delegate_task":
tasks = arguments.get("tasks")
if isinstance(tasks, list) and tasks:
lines = [f"Delegating {len(tasks)} tasks", ""]
for i, task in enumerate(tasks[:8], 1):
if isinstance(task, dict):
goal = str(task.get("goal") or "").strip()
role = str(task.get("role") or "").strip()
lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
if len(tasks) > 8:
lines.append(f"... {len(tasks) - 8} more")
content = [_text("\n".join(lines))]
else:
goal = str(arguments.get("goal") or "").strip()
content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "session_search":
query = str(arguments.get("query") or "").strip()
content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name == "memory":
action = str(arguments.get("action") or "manage").strip() or "manage"
target = str(arguments.get("target") or "memory").strip() or "memory"
preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
text = f"Memory {action} ({target})"
if preview:
text += "\nPreview: " + _truncate_text(preview, limit=500)
content = [_text(text)]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
)
if tool_name in _POLISHED_TOOLS:
try:
args_text = json.dumps(arguments, indent=2, default=str)
except (TypeError, ValueError):
args_text = str(arguments)
content = [_text(_truncate_text(args_text, limit=1200))]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
)
# Generic fallback
@ -334,7 +1131,7 @@ def build_tool_start(
content = [acp.tool_content(acp.text_block(args_text))]
return acp.start_tool_call(
tool_call_id, title, kind=kind, content=content, locations=locations,
raw_input=arguments,
raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
)
@ -347,18 +1144,22 @@ def build_tool_complete(
) -> ToolCallProgress:
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
kind = get_tool_kind(tool_name)
content = _build_tool_complete_content(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
if tool_name == "web_extract":
error_text = _format_web_extract_result(result)
content = [_text(error_text)] if error_text else None
else:
content = _build_tool_complete_content(
tool_name,
result,
function_args=function_args,
snapshot=snapshot,
)
return acp.update_tool_call(
tool_call_id,
kind=kind,
status="completed",
content=content,
raw_output=result,
raw_output=None if tool_name in _POLISHED_TOOLS else result,
)

View file

@ -1241,10 +1241,24 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
if not tools:
return []
result = []
seen_names: set = set()
for t in tools:
fn = t.get("function", {})
name = fn.get("name", "")
# Defensive dedup: Anthropic rejects requests with duplicate tool
# names. Upstream injection paths already dedup, but this guard
# converts a hard API failure into a warning. See: #18478
if name and name in seen_names:
logger.warning(
"convert_tools_to_anthropic: duplicate tool name '%s' "
"— dropping second occurrence",
name,
)
continue
if name:
seen_names.add(name)
result.append({
"name": fn.get("name", ""),
"name": name,
"description": fn.get("description", ""),
"input_schema": _normalize_tool_input_schema(
fn.get("parameters", {"type": "object", "properties": {}})

View file

@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
"kimi-coding-cn",
})
# OpenRouter app attribution headers
_OR_HEADERS = {
# OpenRouter app attribution headers (base — always sent)
_OR_HEADERS_BASE = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Truthy values for boolean env-var parsing.
_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
def build_or_headers(or_config: dict | None = None) -> dict:
"""Build OpenRouter headers, optionally including response-cache headers.
Precedence for response cache: env var > config.yaml > default (enabled).
Environment variables:
``HERMES_OPENROUTER_CACHE`` truthy (``1``/``true``/``yes``/``on``)
enables caching; ``0``/``false``/``no``/``off`` disables.
Overrides ``openrouter.response_cache`` in config.yaml.
``HERMES_OPENROUTER_CACHE_TTL`` integer seconds (1-86400).
Overrides ``openrouter.response_cache_ttl`` in config.yaml.
*or_config* is the ``openrouter`` section from config.yaml. When *None*,
falls back to reading config from disk via ``load_config()``.
"""
headers = dict(_OR_HEADERS_BASE)
# Resolve config from disk if not provided.
if or_config is None:
try:
from hermes_cli.config import load_config
or_config = load_config().get("openrouter", {})
except Exception:
or_config = {}
# Determine cache enabled: env var overrides config.
env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
if env_cache:
cache_enabled = env_cache in _TRUTHY_ENV_VALUES
else:
cache_enabled = or_config.get("response_cache", False)
if not cache_enabled:
return headers
headers["X-OpenRouter-Cache"] = "true"
# Determine TTL: env var overrides config.
env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
if env_ttl:
if env_ttl.isdigit():
ttl = int(env_ttl)
if 1 <= ttl <= 86400:
headers["X-OpenRouter-Cache-TTL"] = str(ttl)
else:
ttl = or_config.get("response_cache_ttl", 300)
if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
return headers
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
@ -1149,23 +1204,23 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = _pool_runtime_api_key(entry)
or_key = explicit_api_key or _pool_runtime_api_key(entry)
if not or_key:
return None, None
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
default_headers=build_or_headers()), _OPENROUTER_MODEL
or_key = os.getenv("OPENROUTER_API_KEY")
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
return None, None
logger.debug("Auxiliary client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
default_headers=build_or_headers()), _OPENROUTER_MODEL
def _describe_openrouter_unavailable() -> str:
@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
}
sync_base_url = str(sync_client.base_url)
if base_url_host_matches(sync_base_url, "openrouter.ai"):
async_kwargs["default_headers"] = dict(_OR_HEADERS)
async_kwargs["default_headers"] = build_or_headers()
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
@ -2053,9 +2108,9 @@ def resolve_provider_client(
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
# ── OpenRouter ───────────────────────────────────────────
if provider == "openrouter":
client, default = _try_openrouter()
client, default = _try_openrouter(explicit_api_key=explicit_api_key)
if client is None:
logger.warning(
"resolve_provider_client: openrouter requested but %s",
@ -3237,7 +3292,26 @@ def _build_call_kwargs(
kwargs["max_tokens"] = max_tokens
if tools:
kwargs["tools"] = tools
# Defensive dedup: providers like Google Vertex, Azure, and Bedrock
# reject requests with duplicate tool names (HTTP 400). The upstream
# injection paths (run_agent.py) already dedup, but this guard
# converts a hard API failure into a warning if an upstream regression
# reintroduces duplicates. See: #18478
_seen: set = set()
_deduped: list = []
for _t in tools:
_tname = (_t.get("function") or {}).get("name", "")
if _tname and _tname in _seen:
logger.warning(
"_build_call_kwargs: duplicate tool name '%s' removed "
"(provider=%s model=%s)",
_tname, provider, model,
)
continue
if _tname:
_seen.add(_tname)
_deduped.append(_t)
kwargs["tools"] = _deduped
# Provider-specific extra_body
merged_extra = dict(extra_body or {})

View file

@ -3,6 +3,7 @@
from __future__ import annotations
import logging
import os
import random
import threading
import time
@ -13,7 +14,7 @@ from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.config import get_env_value
from hermes_cli.config import get_env_value, load_env
import hermes_cli.auth as auth_mod
from hermes_cli.auth import (
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@ -1380,6 +1381,16 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Prefer ~/.hermes/.env over os.environ — the user's config file is the
# authoritative source for Hermes credentials. Stale env vars from parent
# processes (Codex CLI, test scripts, etc.) should not override deliberate
# changes to the .env file.
def _get_env_prefer_dotenv(key: str) -> str:
env_file = load_env()
val = env_file.get(key) or os.environ.get(key) or ""
return val.strip()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
@ -1391,8 +1402,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
# Prefer ~/.hermes/.env over os.environ
token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
@ -1418,7 +1429,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
env_url = ""
if pconfig.base_url_env_var:
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")
env_vars = list(pconfig.api_key_env_vars)
if provider == "anthropic":
@ -1429,8 +1440,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
]
for env_var in env_vars:
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value(env_var) or "").strip()
# Prefer ~/.hermes/.env over os.environ
token = _get_env_prefer_dotenv(env_var)
if not token:
continue
source = f"env:{env_var}"

View file

@ -387,6 +387,11 @@ CURATOR_REVIEW_PROMPT = (
" - skill_manage action=write_file — add a references/, templates/, "
"or scripts/ file under an existing skill (the skill must already "
"exist)\n"
" - skill_manage action=delete — archive a skill. MUST pass "
"`absorbed_into=<umbrella>` when you've merged its content into another "
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
"forwarding target. This drives cron-job skill-reference migration — "
"guessing from your YAML summary after the fact is fragile.\n"
" - terminal — mv a sibling into the archive "
"OR move its content into a support subfile\n\n"
"'keep' is a legitimate decision ONLY when the skill is already a "
@ -637,15 +642,76 @@ def _parse_structured_summary(
return out
def _extract_absorbed_into_declarations(
tool_calls: List[Dict[str, Any]],
) -> Dict[str, Dict[str, Any]]:
"""Walk this run's tool calls and extract model-declared absorption targets.
The curator prompt requires every ``skill_manage(action='delete')`` call
to pass ``absorbed_into=<umbrella>`` when consolidating, or
``absorbed_into=""`` when truly pruning. This is the single authoritative
signal for classification the model's own declaration at the moment of
deletion, which beats both post-hoc YAML summary parsing and substring
heuristics on other tool calls.
Returns ``{skill_name: {"into": "<umbrella>" | "", "declared": True}}``.
Entries with ``into == ""`` are explicit prunings.
Skills without a ``skill_manage(delete)`` call, or with one that omitted
``absorbed_into``, are not in the returned dict caller falls back to
the existing heuristic/YAML logic for those (backward compat with older
curator runs and any callers that don't populate the arg).
"""
out: Dict[str, Dict[str, Any]] = {}
for tc in tool_calls or []:
if not isinstance(tc, dict):
continue
if tc.get("name") != "skill_manage":
continue
raw = tc.get("arguments") or ""
args: Dict[str, Any] = {}
if isinstance(raw, dict):
args = raw
elif isinstance(raw, str):
try:
args = json.loads(raw)
except Exception:
continue
if not isinstance(args, dict):
continue
if args.get("action") != "delete":
continue
name = args.get("name")
if not isinstance(name, str) or not name.strip():
continue
# absorbed_into must be present (even empty string is meaningful);
# missing key means the model didn't declare intent.
if "absorbed_into" not in args:
continue
target = args.get("absorbed_into")
if target is None:
continue
if not isinstance(target, str):
continue
out[name.strip()] = {"into": target.strip(), "declared": True}
return out
def _reconcile_classification(
removed: List[str],
heuristic: Dict[str, List[Dict[str, Any]]],
model_block: Dict[str, List[Dict[str, str]]],
destinations: Set[str],
absorbed_declarations: Optional[Dict[str, Dict[str, Any]]] = None,
) -> Dict[str, List[Dict[str, Any]]]:
"""Merge heuristic (tool-call evidence) with the model's structured block.
Rules:
Rules (evaluated in order; first match wins):
- **Model-declared `absorbed_into` at delete time is authoritative.** Any
entry in ``absorbed_declarations`` beats every other signal. This is
the model telling us directly, at the moment of deletion, what it did.
``into != ""`` and target exists consolidated. ``into == ""``
pruned. ``into != ""`` but target doesn't exist → hallucination; fall
through to the usual signals.
- Model-declared consolidation wins when its ``into`` target exists
in ``destinations`` (survived or newly-created). This gives the
model authority over intent + rationale.
@ -666,6 +732,8 @@ def _reconcile_classification(
model_cons = {e["from"]: e for e in model_block.get("consolidations", [])}
model_pruned = {e["name"]: e for e in model_block.get("prunings", [])}
declared = absorbed_declarations or {}
consolidated: List[Dict[str, Any]] = []
pruned: List[Dict[str, Any]] = []
@ -673,6 +741,36 @@ def _reconcile_classification(
mc = model_cons.get(name)
mp = model_pruned.get(name)
hc = heur_cons.get(name)
dec = declared.get(name)
# Authoritative: model declared `absorbed_into` at the delete call.
if dec is not None:
into_claim = dec.get("into", "")
if into_claim and into_claim in destinations:
entry: Dict[str, Any] = {
"name": name,
"into": into_claim,
"source": "absorbed_into (model-declared at delete)",
"reason": (mc.get("reason") or "") if mc else "",
}
if hc and hc.get("evidence"):
entry["evidence"] = hc["evidence"]
consolidated.append(entry)
continue
if into_claim == "":
# Explicit prune declaration
pruned.append({
"name": name,
"source": "absorbed_into=\"\" (model-declared prune)",
"reason": (mp.get("reason") or "") if mp else "",
})
continue
# into_claim is non-empty but target doesn't exist: the model
# named a nonexistent umbrella at delete time. The tool already
# rejects this at the skill_manage layer, so we shouldn't see it
# in practice — but if it slips through (e.g. the umbrella was
# deleted LATER in the same run), fall through to the usual
# signals rather than trusting a broken reference.
# Model says consolidated — trust it if the destination is real.
if mc and mc.get("into") in destinations:
@ -808,11 +906,20 @@ def _write_run_report(
)
model_block = _parse_structured_summary(llm_meta.get("final", "") or "")
destinations = set(after_names) | set(added or [])
# Authoritative signal: extract per-delete `absorbed_into` declarations
# from this run's tool calls. These beat both the YAML summary block and
# the substring heuristic — the model is telling us directly, at the
# moment of deletion, whether each archived skill was consolidated
# (into=<umbrella>) or pruned (into="").
absorbed_declarations = _extract_absorbed_into_declarations(
llm_meta.get("tool_calls", []) or []
)
classification = _reconcile_classification(
removed=removed,
heuristic=heuristic,
model_block=model_block,
destinations=destinations,
absorbed_declarations=absorbed_declarations,
)
consolidated = classification["consolidated"]
pruned = classification["pruned"]

View file

@ -21,6 +21,18 @@ It DOES include:
pointer otherwise the curator would immediately re-fire on the next
tick)
- ``.bundled_manifest`` (so protection markers stay consistent)
Alongside the skills tarball, each snapshot also captures a copy of
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
jobs reference skills by name in their ``skills``/``skill`` fields; the
curator's consolidation pass rewrites those in place via
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
rolling back the skills tree would leave cron jobs pointing at the
umbrella skills even though the narrow skills they were originally
configured with have been restored. We store the whole jobs.json for
fidelity but rollback only touches the ``skills``/``skill`` fields the
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
we leave it alone.
"""
from __future__ import annotations
@ -63,6 +75,60 @@ def _skills_dir() -> Path:
return get_hermes_home() / "skills"
def _cron_jobs_file() -> Path:
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
return get_hermes_home() / "cron" / "jobs.json"
CRON_JOBS_FILENAME = "cron-jobs.json"
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
Returns a small dict describing what was captured so the caller can
fold it into the manifest. Never raises if the cron file is missing
or unreadable, the return dict has ``backed_up=False`` and the reason,
and the snapshot proceeds without cron data (the snapshot is still
useful for rolling back skills).
"""
src = _cron_jobs_file()
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
if not src.exists():
info["reason"] = "no cron/jobs.json present"
return info
try:
raw = src.read_text(encoding="utf-8")
except OSError as e:
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
info["reason"] = f"read error: {e}"
return info
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
# file is unparseable; just store the raw text and let rollback deal
# with it (or not, if it's corrupted). jobs.json wraps the list as
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
# fall back to bare-list shape just in case the format ever changes.
try:
parsed = json.loads(raw)
if isinstance(parsed, dict):
inner = parsed.get("jobs")
if isinstance(inner, list):
info["jobs_count"] = len(inner)
elif isinstance(parsed, list):
info["jobs_count"] = len(parsed)
except (json.JSONDecodeError, TypeError):
info["jobs_count"] = 0
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
try:
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
except OSError as e:
logger.debug("Failed to write cron backup file: %s", e)
info["reason"] = f"write error: {e}"
return info
info["backed_up"] = True
return info
def _utc_id(now: Optional[datetime] = None) -> str:
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
if now is None:
@ -116,7 +182,8 @@ def _count_skill_files(base: Path) -> int:
def _write_manifest(dest: Path, reason: str, archive_path: Path,
skills_counted: int) -> None:
skills_counted: int,
cron_info: Optional[Dict[str, Any]] = None) -> None:
manifest = {
"id": dest.name,
"reason": reason,
@ -125,6 +192,15 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path,
"archive_bytes": archive_path.stat().st_size,
"skill_files": skills_counted,
}
if cron_info is not None:
manifest["cron_jobs"] = {
"backed_up": bool(cron_info.get("backed_up", False)),
"jobs_count": int(cron_info.get("jobs_count", 0)),
}
if not cron_info.get("backed_up"):
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
if cron_info.get("parse_warning"):
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
(dest / "manifest.json").write_text(
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
)
@ -181,7 +257,14 @@ def snapshot_skills(reason: str = "manual") -> Optional[Path]:
# arcname: store paths relative to skills/ so extraction
# drops cleanly back into the skills dir.
tf.add(str(entry), arcname=entry.name, recursive=True)
_write_manifest(dest, reason, archive, _count_skill_files(skills))
# Capture cron/jobs.json alongside the tarball. Never fails the
# snapshot — the skills side is the core guarantee; cron is
# additive. We still record in the manifest whether it was
# captured so rollback can surface "no cron data in this snapshot".
cron_info = _backup_cron_jobs_into(dest)
_write_manifest(dest, reason, archive,
_count_skill_files(skills),
cron_info=cron_info)
except (OSError, tarfile.TarError) as e:
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
# Clean up partial snapshot
@ -298,6 +381,149 @@ def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
return candidates[0] if candidates else None
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
We do NOT overwrite the whole cron file. Only the ``skills`` and
``skill`` fields are restored, and only on jobs that still exist in the
current file (matched by ``id``). Everything else about the job
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks
is live state that the user/scheduler has modified since the snapshot;
overwriting it would regress unrelated cron activity.
Rules:
- Jobs present in backup AND live, with differing skills skills restored.
- Jobs present in backup AND live, with matching skills no-op.
- Jobs present in backup but gone from live (user deleted the job
after the snapshot) skipped, noted in the return report.
- Jobs present in live but not in backup (user created a new cron
job after the snapshot) left untouched.
Never raises; failures are captured in the return dict. Writes through
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
uses, so we don't race the scheduler.
"""
report: Dict[str, Any] = {
"attempted": False,
"restored": [],
"skipped_missing": [],
"unchanged": 0,
"error": None,
}
backup_file = snapshot_dir / CRON_JOBS_FILENAME
if not backup_file.exists():
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
return report
try:
backup_text = backup_file.read_text(encoding="utf-8")
backup_parsed = json.loads(backup_text)
except (OSError, json.JSONDecodeError) as e:
report["error"] = f"failed to load backed-up jobs: {e}"
return report
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
# that shape and a bare list for forward compat.
if isinstance(backup_parsed, dict):
backup_jobs = backup_parsed.get("jobs")
elif isinstance(backup_parsed, list):
backup_jobs = backup_parsed
else:
backup_jobs = None
if not isinstance(backup_jobs, list):
report["error"] = "backed-up cron-jobs.json has no jobs list"
return report
# Build a lookup of the backed-up skill state keyed by job id.
# We only need the two skill-ish fields (legacy single and modern list).
backup_by_id: Dict[str, Dict[str, Any]] = {}
for job in backup_jobs:
if not isinstance(job, dict):
continue
jid = job.get("id")
if not isinstance(jid, str) or not jid:
continue
backup_by_id[jid] = {
"skills": job.get("skills"),
"skill": job.get("skill"),
"name": job.get("name") or jid,
}
if not backup_by_id:
report["attempted"] = True # we tried but there was nothing to do
return report
# Load and rewrite the live jobs under the scheduler's lock.
try:
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
except ImportError as e:
report["error"] = f"cron module unavailable: {e}"
return report
report["attempted"] = True
try:
with _jobs_file_lock:
live_jobs = load_jobs()
changed = False
live_ids = set()
for live in live_jobs:
if not isinstance(live, dict):
continue
jid = live.get("id")
if not isinstance(jid, str) or not jid:
continue
live_ids.add(jid)
backup = backup_by_id.get(jid)
if backup is None:
continue # live job didn't exist at snapshot time
cur_skills = live.get("skills")
cur_skill = live.get("skill")
bkp_skills = backup.get("skills")
bkp_skill = backup.get("skill")
if cur_skills == bkp_skills and cur_skill == bkp_skill:
report["unchanged"] += 1
continue
# Restore. Preserve absence (don't force the key to appear
# if the backup didn't have it either).
if bkp_skills is None:
live.pop("skills", None)
else:
live["skills"] = bkp_skills
if bkp_skill is None:
live.pop("skill", None)
else:
live["skill"] = bkp_skill
report["restored"].append({
"job_id": jid,
"job_name": backup.get("name") or jid,
"from": {"skills": cur_skills, "skill": cur_skill},
"to": {"skills": bkp_skills, "skill": bkp_skill},
})
changed = True
# Jobs in backup but not in live = user deleted them after snapshot
for jid, backup in backup_by_id.items():
if jid not in live_ids:
report["skipped_missing"].append({
"job_id": jid,
"job_name": backup.get("name") or jid,
})
if changed:
save_jobs(live_jobs)
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
report["error"] = f"restore failed mid-flight: {e}"
return report
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
"""Restore ``~/.hermes/skills/`` from a snapshot.
@ -408,8 +634,35 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]
except OSError:
pass
logger.info("Curator rollback: restored from %s", target.name)
return (True, f"restored from snapshot {target.name}", target)
# Reconcile cron skill-links. Surgical: only the skills/skill fields
# on jobs matched by id. Everything else in jobs.json is live state
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
# alone. Failures here don't fail the overall rollback — the skills
# tree is already restored, which is the main guarantee.
cron_report = _restore_cron_skill_links(target)
summary_bits = [f"restored from snapshot {target.name}"]
if cron_report.get("attempted"):
restored_n = len(cron_report.get("restored") or [])
skipped_n = len(cron_report.get("skipped_missing") or [])
if cron_report.get("error"):
summary_bits.append(f"cron links: error — {cron_report['error']}")
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
# Attempted but nothing matched — empty snapshot or no overlapping ids.
pass
else:
parts = []
if restored_n:
parts.append(f"{restored_n} job(s) had skill links restored")
if skipped_n:
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
if cron_report.get("unchanged"):
parts.append(f"{cron_report['unchanged']} already matched")
summary_bits.append("cron links: " + ", ".join(parts))
logger.info("Curator rollback: restored from %s (cron_report=%s)",
target.name, cron_report)
return (True, "; ".join(summary_bits), target)
# ---------------------------------------------------------------------------

View file

@ -6,6 +6,7 @@ can invoke skills via /skill-name commands.
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, Optional
@ -20,10 +21,35 @@ from agent.skill_preprocessing import (
logger = logging.getLogger(__name__)
_skill_commands: Dict[str, Dict[str, Any]] = {}
_skill_commands_platform: Optional[str] = None
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
def _resolve_skill_commands_platform() -> Optional[str]:
"""Return the current platform scope used for disabled-skill filtering.
Used to detect when the active platform has shifted so
:func:`get_skill_commands` can drop a stale cache that was populated
for a different platform's ``skills.platform_disabled`` view (#14536).
Resolves from (in order) ``HERMES_PLATFORM`` env var and
``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
``None`` when no platform scope is active (e.g. classic CLI, RL
rollouts, standalone scripts).
"""
try:
from gateway.session_context import get_session_env
resolved_platform = (
os.getenv("HERMES_PLATFORM")
or get_session_env("HERMES_SESSION_PLATFORM")
)
except Exception:
resolved_platform = os.getenv("HERMES_PLATFORM")
return resolved_platform or None
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
raw_identifier = (skill_identifier or "").strip()
@ -218,7 +244,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
Returns:
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
"""
global _skill_commands
global _skill_commands, _skill_commands_platform
_skill_commands_platform = _resolve_skill_commands_platform()
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
@ -278,8 +305,16 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
"""Return the current skill commands mapping (scan first if empty)."""
if not _skill_commands:
"""Return the current skill commands mapping (scan first if empty).
Rescans when the active platform scope changes (e.g. a gateway
process serving Telegram and Discord concurrently) so each platform
sees its own ``skills.platform_disabled`` view (#14536).
"""
if (
not _skill_commands
or _skill_commands_platform != _resolve_skill_commands_platform()
):
scan_skill_commands()
return _skill_commands

View file

@ -121,6 +121,18 @@ model:
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
# # data_collection: "deny"
# =============================================================================
# OpenRouter Response Caching (only applies when using OpenRouter)
# =============================================================================
# Cache identical API responses at the OpenRouter edge for free instant replays.
# When enabled, identical requests (same model, messages, parameters) return
# cached responses with zero billing. Separate from Anthropic prompt caching.
# See: https://openrouter.ai/docs/guides/features/response-caching
#
# openrouter:
# response_cache: true # Enable response caching (default: true)
# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300)
# =============================================================================
# Git Worktree Isolation
# =============================================================================

11
cli.py
View file

@ -2928,7 +2928,14 @@ class HermesCLI:
def _expand_ref(match):
path = Path(match.group(1))
return path.read_text(encoding="utf-8") if path.exists() else match.group(0)
# Use try/except instead of path.exists() to avoid TOCTOU race:
# the paste file may be deleted between check and read, causing
# the input to be silently dropped (#17666).
try:
return path.read_text(encoding="utf-8")
except (OSError, IOError):
logger.warning("Paste file gone or unreadable, returning placeholder: %s", path)
return match.group(0)
return paste_ref_re.sub(_expand_ref, text)
@ -11584,7 +11591,7 @@ class HermesCLI:
pass # Non-fatal — don't break the main loop
except Exception as e:
print(f"Error: {e}")
logger.warning("process_loop unhandled error (msg may be lost): %s", e)
# Start processing thread
process_thread = threading.Thread(target=process_loop, daemon=True)

View file

@ -123,9 +123,19 @@ _LOCK_FILE = _LOCK_DIR / ".tick.lock"
def _resolve_origin(job: dict) -> Optional[dict]:
"""Extract origin info from a job, preserving any extra routing metadata."""
"""Extract origin info from a job, preserving any extra routing metadata.
Treats non-dict origins (free-form provenance strings, ints, lists from
migration scripts or hand-edited jobs.json) as missing instead of
crashing with ``AttributeError`` on ``origin.get(...)``. Without this
guard, a job tagged with e.g. ``"combined-digest-replaces-x-and-y"``
crashed every fire attempt with
``'str' object has no attribute 'get'`` ``mark_job_run`` recorded the
failure, but the next tick re-loaded the same poisoned origin and
crashed identically until the field was patched manually (#18722).
"""
origin = job.get("origin")
if not origin:
if not isinstance(origin, dict):
return None
platform = origin.get("platform")
chat_id = origin.get("chat_id")
@ -147,6 +157,19 @@ def _get_home_target_chat_id(platform_name: str) -> str:
return value
def _get_home_target_thread_id(platform_name: str) -> Optional[str]:
"""Return the optional thread/topic ID for a platform home target."""
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
if not env_var:
return None
value = os.getenv(f"{env_var}_THREAD_ID", "").strip()
if not value:
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
if legacy:
value = os.getenv(f"{legacy}_THREAD_ID", "").strip()
return value or None
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
"""Resolve one concrete auto-delivery target for a cron job."""
@ -175,7 +198,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": None,
"thread_id": _get_home_target_thread_id(platform_name),
}
return None
@ -229,7 +252,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
return {
"platform": platform_name,
"chat_id": chat_id,
"thread_id": None,
"thread_id": _get_home_target_thread_id(platform_name),
}

View file

@ -186,18 +186,24 @@ class HomeChannel:
Default destination for a platform.
When a cron job specifies deliver="telegram" without a specific chat ID,
messages are sent to this home channel.
messages are sent to this home channel. Thread-aware platforms may also
store a thread/topic ID so the bare platform target routes to the exact
conversation where /sethome was run.
"""
platform: Platform
chat_id: str
name: str # Human-readable name for display
thread_id: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
result = {
"platform": self.platform.value,
"chat_id": self.chat_id,
"name": self.name,
}
if self.thread_id:
result["thread_id"] = self.thread_id
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "HomeChannel":
@ -205,6 +211,7 @@ class HomeChannel:
platform=Platform(data["platform"]),
chat_id=str(data["chat_id"]),
name=data.get("name", "Home"),
thread_id=str(data["thread_id"]) if data.get("thread_id") else None,
)
@ -1071,6 +1078,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.TELEGRAM,
chat_id=telegram_home,
name=os.getenv("TELEGRAM_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("TELEGRAM_HOME_CHANNEL_THREAD_ID") or None,
)
# Discord
@ -1087,6 +1095,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.DISCORD,
chat_id=discord_home,
name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("DISCORD_HOME_CHANNEL_THREAD_ID") or None,
)
# Reply threading mode for Discord (off/first/all)
@ -1108,6 +1117,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.WHATSAPP,
chat_id=whatsapp_home,
name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None,
)
# Slack
@ -1135,6 +1145,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.SLACK,
chat_id=slack_home,
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
thread_id=os.getenv("SLACK_HOME_CHANNEL_THREAD_ID") or None,
)
# Signal
@ -1155,6 +1166,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.SIGNAL,
chat_id=signal_home,
name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("SIGNAL_HOME_CHANNEL_THREAD_ID") or None,
)
# Mattermost
@ -1174,6 +1186,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.MATTERMOST,
chat_id=mattermost_home,
name=os.getenv("MATTERMOST_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("MATTERMOST_HOME_CHANNEL_THREAD_ID") or None,
)
# Matrix
@ -1205,6 +1218,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.MATRIX,
chat_id=matrix_home,
name=os.getenv("MATRIX_HOME_ROOM_NAME", "Home"),
thread_id=os.getenv("MATRIX_HOME_ROOM_THREAD_ID") or None,
)
# Home Assistant
@ -1238,6 +1252,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.EMAIL,
chat_id=email_home,
name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
thread_id=os.getenv("EMAIL_HOME_ADDRESS_THREAD_ID") or None,
)
# SMS (Twilio)
@ -1253,6 +1268,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.SMS,
chat_id=sms_home,
name=os.getenv("SMS_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("SMS_HOME_CHANNEL_THREAD_ID") or None,
)
# API Server
@ -1315,6 +1331,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.DINGTALK,
chat_id=dingtalk_home,
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("DINGTALK_HOME_CHANNEL_THREAD_ID") or None,
)
# Feishu / Lark
@ -1342,6 +1359,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.FEISHU,
chat_id=feishu_home,
name=os.getenv("FEISHU_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("FEISHU_HOME_CHANNEL_THREAD_ID") or None,
)
# WeCom (Enterprise WeChat)
@ -1364,6 +1382,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.WECOM,
chat_id=wecom_home,
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("WECOM_HOME_CHANNEL_THREAD_ID") or None,
)
# WeCom callback mode (self-built apps)
@ -1422,6 +1441,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.WEIXIN,
chat_id=weixin_home,
name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("WEIXIN_HOME_CHANNEL_THREAD_ID") or None,
)
# BlueBubbles (iMessage)
@ -1445,6 +1465,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.BLUEBUBBLES,
chat_id=bluebubbles_home,
name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("BLUEBUBBLES_HOME_CHANNEL_THREAD_ID") or None,
)
# QQ (Official Bot API v2)
@ -1482,6 +1503,11 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.QQBOT,
chat_id=qq_home,
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
thread_id=(
os.getenv("QQBOT_HOME_CHANNEL_THREAD_ID")
or os.getenv("QQ_HOME_CHANNEL_THREAD_ID")
or None
),
)
# Yuanbao — YUANBAO_APP_ID preferred
@ -1512,6 +1538,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
platform=Platform.YUANBAO,
chat_id=yuanbao_home,
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
thread_id=os.getenv("YUANBAO_HOME_CHANNEL_THREAD_ID") or None,
)
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
if yuanbao_dm_policy:

View file

@ -0,0 +1,84 @@
"""Shared HTTP client factory for long-lived platform adapters.
Gateway messaging platforms (QQ Bot, Feishu, WeCom, DingTalk, Signal,
BlueBubbles, WeCom-callback) keep a persistent ``httpx.AsyncClient``
alive for the adapter's lifetime. That amortises TLS/connection setup
across many API calls, but it also means the process's file-descriptor
pressure is sensitive to how aggressively the pool recycles idle keep-
alive connections.
httpx's default ``keepalive_expiry`` is 5 seconds. On macOS behind
Cloudflare Warp (and other transparent proxies), peer-initiated FIN can
sit in ``CLOSE_WAIT`` longer than that before the local socket actually
drains which, multiplied across 7 long-lived adapters plus the LLM
client and MCP clients, walks straight into the default 256 fd limit.
See #18451.
``platform_httpx_limits()`` returns a tighter ``httpx.Limits`` the
adapter factories use instead of the httpx default. The values chosen:
* ``max_keepalive_connections=10`` plenty for any single adapter;
platform APIs rarely parallelise beyond this.
* ``keepalive_expiry=2.0`` close idle sockets aggressively so a
proxy's lingering CLOSE_WAIT window can't starve the process.
Override via ``HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY`` /
``HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE`` env vars when tuning under load.
"""
from __future__ import annotations
import os
try:
import httpx
except ImportError: # pragma: no cover — optional dep
httpx = None # type: ignore[assignment]
_DEFAULT_KEEPALIVE_EXPIRY_S = 2.0
_DEFAULT_MAX_KEEPALIVE = 10
def platform_httpx_limits() -> "httpx.Limits | None":
"""Return ``httpx.Limits`` tuned for persistent platform-adapter clients.
Returns ``None`` when httpx isn't importable, so callers can fall
back to httpx's built-in default without a hard dependency on this
helper being reachable.
"""
if httpx is None:
return None
def _env_float(name: str, default: float) -> float:
raw = os.environ.get(name, "").strip()
if not raw:
return default
try:
val = float(raw)
except (TypeError, ValueError):
return default
return val if val > 0 else default
def _env_int(name: str, default: int) -> int:
raw = os.environ.get(name, "").strip()
if not raw:
return default
try:
val = int(raw)
except (TypeError, ValueError):
return default
return val if val > 0 else default
keepalive_expiry = _env_float(
"HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", _DEFAULT_KEEPALIVE_EXPIRY_S
)
max_keepalive = _env_int(
"HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", _DEFAULT_MAX_KEEPALIVE
)
return httpx.Limits(
max_keepalive_connections=max_keepalive,
# Leave max_connections at httpx default (100) — plenty of headroom.
keepalive_expiry=keepalive_expiry,
)

View file

@ -2489,15 +2489,20 @@ class BasePlatformAdapter(ABC):
try:
response = await self._message_handler(event)
# Old adapter task (if any) is cancelled AFTER the runner has
# fully handled the command — keeps ordering deterministic.
await self.cancel_session_processing(
session_key,
release_guard=False,
discard_pending=False,
)
_text, _eph_ttl = self._unwrap_ephemeral(response)
# Send the response BEFORE cancelling the old task so the send
# cannot be affected by task-cancellation side effects (race
# condition fix — issue #18912). Previously the send happened
# after cancel_session_processing, which could silently drop the
# "/new" confirmation when an agent was actively running.
if _text:
logger.info(
"[%s] Sending command '/%s' response (%d chars) to %s",
self.name,
cmd,
len(_text),
event.source.chat_id,
)
_r = await self._send_with_retry(
chat_id=event.source.chat_id,
content=_text,
@ -2510,6 +2515,13 @@ class BasePlatformAdapter(ABC):
message_id=_r.message_id,
ttl_seconds=_eph_ttl,
)
# Old adapter task (if any) is cancelled AFTER the response has
# been sent — keeps ordering deterministic and avoids the race.
await self.cancel_session_processing(
session_key,
release_guard=False,
discard_pending=False,
)
except Exception:
# On failure, restore the original guard if one still exists so
# we don't leave the session in a half-reset state.

View file

@ -162,7 +162,9 @@ class BlueBubblesAdapter(BasePlatformAdapter):
return False
from aiohttp import web
self.client = httpx.AsyncClient(timeout=30.0)
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
try:
await self._api_get("/api/v1/ping")
info = await self._api_get("/api/v1/server/info")

View file

@ -228,7 +228,11 @@ class DingTalkAdapter(BasePlatformAdapter):
return False
try:
self._http_client = httpx.AsyncClient(timeout=30.0)
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self._http_client = httpx.AsyncClient(
timeout=30.0, limits=platform_httpx_limits(),
)
credential = dingtalk_stream.Credential(
self._client_id, self._client_secret

View file

@ -497,6 +497,7 @@ class DiscordAdapter(BasePlatformAdapter):
self._ready_event = asyncio.Event()
self._allowed_user_ids: set = set() # For button approval authorization
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
self.gateway_runner = None # Set by gateway/run.py for cross-platform delivery
# Voice channel state (per-guild)
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
@ -613,6 +614,21 @@ class DiscordAdapter(BasePlatformAdapter):
# so LLM output or echoed user content can't ping the whole
# server; override per DISCORD_ALLOW_MENTION_* env vars or the
# discord.allow_mentions.* block in config.yaml.
# Close any existing client to prevent zombie websocket connections
# on reconnect (see #18187). Without this, the old client remains
# connected to Discord gateway and both fire on_message, causing
# double responses.
if self._client is not None:
try:
if not self._client.is_closed():
await self._client.close()
except Exception:
logger.debug("[%s] Failed to close previous Discord client", self.name)
finally:
self._client = None
self._ready_event.clear()
self._client = commands.Bot(
command_prefix="!", # Not really used, we handle raw messages
intents=intents,
@ -1914,6 +1930,225 @@ class DiscordAdapter(BasePlatformAdapter):
return True
return False
# ── Slash command authorization ─────────────────────────────────────
# Slash commands (``_run_simple_slash`` and ``_handle_thread_create_slash``)
# are a separate Discord interaction surface from regular messages and
# historically ran with NO authorization check — bypassing every gate
# ``on_message`` enforces (DISCORD_ALLOWED_USERS, DISCORD_ALLOWED_ROLES,
# DISCORD_ALLOWED_CHANNELS, DISCORD_IGNORED_CHANNELS). Any guild member
# could invoke ``/background``, ``/restart``, ``/sethome``, etc. as the
# operator. ``_check_slash_authorization`` mirrors the on_message gates
# one-for-one so the slash surface honors the same trust boundary.
#
# By design, this is a no-op for deployments with no allowlist env vars
# set — ``_is_allowed_user`` returns True and the channel checks early-out
# — preserving the existing "single-tenant, all guild members trusted"
# default. Deployments that DO set any DISCORD_ALLOWED_* var get slash
# parity with on_message.
def _evaluate_slash_authorization(
self, interaction: "discord.Interaction",
) -> Tuple[bool, Optional[str]]:
"""Evaluate slash authorization without producing any response.
Returns ``(allowed, reason)``. ``reason`` is populated only when
``allowed`` is False. This is the shared core used by both the
responding wrapper (``_check_slash_authorization``) and side-effect-
free callers like the ``/skill`` autocomplete callback, which must
return an empty list for unauthorized users instead of leaking an
ephemeral rejection per-keystroke.
Fail-closed semantics for malformed payloads: when an allowlist is
configured but the interaction is missing the data needed to
evaluate it (no channel id with channel policy active, no user
with user/role policy active), the gate REJECTS rather than
falling through. Without these guards a guild interaction that
happens to deserialize without a channel id would silently bypass
``DISCORD_ALLOWED_CHANNELS`` and a payload missing ``user`` would
raise ``AttributeError`` in the user check below, surfacing as
an opaque interaction failure rather than a clean rejection.
"""
chan_obj = getattr(interaction, "channel", None)
in_dm = isinstance(chan_obj, discord.DMChannel) if chan_obj is not None else False
# ── Channel scope (mirrors on_message lines 3374-3388) ──
# DMs aren't channel-gated — DMs follow on_message's DM lockdown
# path which has its own user-allowlist enforcement.
if not in_dm:
chan_id_raw = getattr(interaction, "channel_id", None) or getattr(
chan_obj, "id", None,
)
channel_ids: set = set()
if chan_id_raw is not None:
channel_ids.add(str(chan_id_raw))
# Mirror on_message: also test the parent channel for threads
# so per-channel allow/deny lists work consistently.
if isinstance(chan_obj, discord.Thread):
parent_id = self._get_parent_channel_id(chan_obj)
if parent_id:
channel_ids.add(str(parent_id))
allowed_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
if allowed_raw:
allowed = {c.strip() for c in allowed_raw.split(",") if c.strip()}
if "*" not in allowed:
if not channel_ids:
# Channel policy is configured but the interaction
# has no resolvable channel id. Fail closed.
return (
False,
"channel id missing with DISCORD_ALLOWED_CHANNELS configured",
)
if not (channel_ids & allowed):
return (False, "channel not in DISCORD_ALLOWED_CHANNELS")
# Ignored beats allowed: even when a thread's parent channel
# is on the allowlist, an explicit DISCORD_IGNORED_CHANNELS
# entry on the thread or its parent rejects the interaction.
ignored_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
if ignored_raw and channel_ids:
ignored = {c.strip() for c in ignored_raw.split(",") if c.strip()}
if "*" in ignored or (channel_ids & ignored):
return (False, "channel in DISCORD_IGNORED_CHANNELS")
# ── User / role allowlist (mirrors on_message line 681) ──
user = getattr(interaction, "user", None)
allowed_users = getattr(self, "_allowed_user_ids", set()) or set()
allowed_roles = getattr(self, "_allowed_role_ids", set()) or set()
if user is None or getattr(user, "id", None) is None:
# No identifiable user. With any user/role allowlist
# configured, fail closed rather than raise AttributeError
# on ``interaction.user.id`` below. With no allowlist this
# is the existing "no allowlist = everyone" backwards-compat.
if allowed_users or allowed_roles:
return (False, "missing interaction.user with allowlist configured")
return (True, None)
user_id = str(user.id)
if not self._is_allowed_user(user_id, author=user):
return (
False,
"user not in DISCORD_ALLOWED_USERS / DISCORD_ALLOWED_ROLES",
)
return (True, None)
async def _check_slash_authorization(
self, interaction: "discord.Interaction", command_text: str,
) -> bool:
"""Mirror on_message's user/role/channel gates onto a slash invocation.
Returns True to proceed. Returns False *after* sending an ephemeral
rejection, logging a warning, and scheduling a cross-platform admin
alert the caller must stop on False (the interaction has already
been responded to).
"""
allowed, reason = self._evaluate_slash_authorization(interaction)
if allowed:
return True
return await self._reject_slash(
interaction, command_text, reason=reason or "unauthorized",
)
async def _reject_slash(
self, interaction: "discord.Interaction", command_text: str, *, reason: str,
) -> bool:
"""Send ephemeral reject + log warning + schedule admin alert. Returns False.
Tolerates a missing ``interaction.user`` -- the fail-closed branch
in ``_evaluate_slash_authorization`` deliberately routes here for
malformed payloads (no user) when an allowlist is configured, and
``str(interaction.user.id)`` would raise AttributeError before the
ephemeral rejection could be sent.
"""
user = getattr(interaction, "user", None)
if user is not None:
user_id = str(getattr(user, "id", "?"))
user_name = getattr(user, "name", "?")
else:
user_id = "?"
user_name = "?"
chan_id = getattr(interaction, "channel_id", None) or getattr(
getattr(interaction, "channel", None), "id", None,
)
guild_id = getattr(interaction, "guild_id", None)
logger.warning(
"[Discord] Unauthorized slash attempt: user=%s id=%s channel=%s "
"guild=%s cmd=%r reason=%r",
user_name, user_id, chan_id, guild_id, command_text, reason,
)
try:
await interaction.response.send_message(
"You're not authorized to use this command.",
ephemeral=True,
)
except Exception as e:
# Interaction may already be responded to (e.g. caller deferred
# before the auth check, or Discord retried). Best-effort only.
logger.debug("[Discord] Could not send unauthorized ephemeral: %s", e)
# Fire-and-forget: don't block the interaction handler on Telegram I/O.
try:
asyncio.create_task(self._notify_unauthorized_slash(
user_name, user_id, chan_id, guild_id, command_text, reason,
))
except Exception as e:
logger.debug("[Discord] Could not schedule admin notify task: %s", e)
return False
async def _notify_unauthorized_slash(
self, user_name: str, user_id: str, chan_id, guild_id,
command_text: str, reason: str,
) -> None:
"""Best-effort cross-platform alert to the gateway operator.
Tries TELEGRAM first (most operators set TELEGRAM_HOME_CHANNEL),
then SLACK. Silently no-ops if no other platform is configured
with a home channel.
A soft send failure -- adapter.send() returning a result with
``success=False`` rather than raising -- continues the fallback
chain. Treating a SendResult(success=False) as delivered would
mean a Telegram outage that the adapter politely surfaces (e.g.
rate-limit, auth failure) silently swallows the alert without
attempting Slack. Hard exceptions still take the same path via
the except branch below.
"""
runner = getattr(self, "gateway_runner", None)
if not runner:
return
for target in (Platform.TELEGRAM, Platform.SLACK):
try:
adapter = runner.adapters.get(target)
if not adapter:
continue
home = runner.config.get_home_channel(target)
if not home or not getattr(home, "chat_id", None):
continue
msg = (
"⚠️ Unauthorized Discord slash attempt\n"
f"User: {user_name} ({user_id})\n"
f"Channel: {chan_id} (guild {guild_id})\n"
f"Command: {command_text}\n"
f"Reason: {reason}"
)
result = await adapter.send(str(home.chat_id), msg)
# Only return on confirmed delivery. SendResult(success=False)
# -> continue to the next platform.
if getattr(result, "success", None) is False:
logger.debug(
"[Discord] Admin notify via %s returned success=False"
" (error=%r); falling through",
target, getattr(result, "error", None),
)
continue
return
except Exception as e:
logger.debug("[Discord] Admin notify via %s failed: %s", target, e)
async def send_image_file(
self,
chat_id: str,
@ -2301,6 +2536,11 @@ class DiscordAdapter(BasePlatformAdapter):
except Exception:
pass # logging must never block command dispatch
# Auth gate — must run before defer() so an ephemeral rejection can
# be delivered on the still-unresponded interaction.
if not await self._check_slash_authorization(interaction, command_text):
return
await interaction.response.defer(ephemeral=True)
event = self._build_slash_event(interaction, command_text)
await self.handle_message(event)
@ -2445,7 +2685,8 @@ class DiscordAdapter(BasePlatformAdapter):
message: str = "",
auto_archive_duration: int = 1440,
):
await interaction.response.defer(ephemeral=True)
# defer() is performed inside the handler *after* the auth gate
# so a rejected invoker can receive an ephemeral rejection.
await self._handle_thread_create_slash(interaction, name, message, auto_archive_duration)
@tree.command(name="queue", description="Queue a prompt for the next turn (doesn't interrupt)")
@ -2566,6 +2807,54 @@ class DiscordAdapter(BasePlatformAdapter):
# supporting up to 25 categories × 25 skills = 625 skills.
self._register_skill_group(tree)
# Optional defense-in-depth: hide every slash command from non-admin
# guild members in Discord's slash picker. Server-side authorization
# (``_check_slash_authorization``) is the actual gate; this is purely
# UX so users don't see commands they can't invoke. Off by default
# to preserve the slash UX for deployments that intentionally allow
# everyone in the guild.
if os.getenv("DISCORD_HIDE_SLASH_COMMANDS", "false").strip().lower() in (
"true", "1", "yes", "on",
):
self._apply_owner_only_visibility(tree)
def _apply_owner_only_visibility(self, tree) -> None:
"""Set default_member_permissions=0 on every registered slash command.
Discord interprets ``Permissions(0)`` as "requires no permissions",
which paradoxically means the command is hidden from every guild
member except those with the Administrator permission. Server admins
can re-grant per user/role via Server Settings Integrations
<bot> Permissions.
Authoritative gate is ``_check_slash_authorization`` on every
invocation, which catches stale clients, role grants made by
mistake, and direct API calls bypassing Discord's UI hide.
"""
try:
no_perms = discord.Permissions(0)
except Exception as e:
logger.warning(
"[Discord] _apply_owner_only_visibility: cannot build Permissions(0): %s",
e,
)
return
applied = 0
for cmd in tree.get_commands():
try:
cmd.default_permissions = no_perms
applied += 1
except Exception as e:
logger.debug(
"[Discord] Could not set default_permissions on %r: %s",
getattr(cmd, "name", "?"), e,
)
logger.info(
"[Discord] Hid %d slash command(s) from non-admin guild members "
"(opt-in defense in depth via DISCORD_HIDE_SLASH_COMMANDS).",
applied,
)
def _register_skill_group(self, tree) -> None:
"""Register a single ``/skill`` command with autocomplete on the name.
@ -2584,40 +2873,32 @@ class DiscordAdapter(BasePlatformAdapter):
hidden skills. The slash picker also becomes more discoverable
Discord live-filters by the user's typed prefix against both the
skill name and its description.
The entries list and lookup dict are stored on ``self`` rather
than captured in closure variables so :meth:`refresh_skill_group`
can repopulate them when the user runs ``/reload-skills`` without
needing to touch the Discord slash-command tree or trigger a
``tree.sync()`` call.
"""
try:
from hermes_cli.commands import discord_skill_commands_by_category
existing_names = set()
try:
existing_names = {cmd.name for cmd in tree.get_commands()}
except Exception:
pass
# Reuse the existing collector for consistent filtering
# (per-platform disabled, hub-excluded, name clamping), then
# flatten — the category grouping was only useful for the
# nested layout.
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=existing_names,
)
entries: list[tuple[str, str, str]] = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Populate the instance-level entries/lookup so the
# autocomplete + handler callbacks below always read the
# freshest state. refresh_skill_group() re-runs the same
# collector and mutates these two attributes in place.
self._skill_entries: list[tuple[str, str, str]] = []
self._skill_lookup: dict[str, tuple[str, str]] = {}
self._skill_group_reserved_names: set[str] = set(existing_names)
self._refresh_skill_catalog_state()
if not entries:
if not self._skill_entries:
return
# Stable alphabetical order so the autocomplete suggestion
# list is predictable across restarts.
entries.sort(key=lambda t: t[0])
# name -> (description, cmd_key) — used by both the autocomplete
# callback and the handler for O(1) dispatch.
skill_lookup: dict[str, tuple[str, str]] = {
n: (d, k) for n, d, k in entries
}
async def _autocomplete_name(
interaction: "discord.Interaction", current: str,
) -> list:
@ -2627,10 +2908,29 @@ class DiscordAdapter(BasePlatformAdapter):
"/skill pdf" surfaces skills whose description mentions
PDFs even if the name doesn't. Discord caps this list at
25 entries per query.
Authorization: a quiet pre-check evaluates the slash
allowlists and returns ``[]`` for unauthorized users so
the installed skill catalog is not leaked to anyone who
can see the command in the picker. Returning a generic
empty list here is intentional sending a per-keystroke
ephemeral rejection would produce a barrage of error
popups during typing.
Reads ``self._skill_entries`` so a ``/reload-skills`` run
since process start shows up on the very next keystroke.
"""
try:
allowed, _reason = self._evaluate_slash_authorization(interaction)
except Exception:
# Defensive: never raise from autocomplete. Fail
# closed by returning an empty suggestion list.
return []
if not allowed:
return []
q = (current or "").strip().lower()
choices: list = []
for name, desc, _key in entries:
for name, desc, _key in self._skill_entries:
if not q or q in name.lower() or (desc and q in desc.lower()):
if desc:
label = f"{name}{desc}"
@ -2654,7 +2954,13 @@ class DiscordAdapter(BasePlatformAdapter):
async def _skill_handler(
interaction: "discord.Interaction", name: str, args: str = "",
):
entry = skill_lookup.get(name)
# Authorize BEFORE any skill lookup so that known and
# unknown skill names produce identical rejections for
# unauthorized users (no probing the installed catalog
# via "Unknown skill: <name>" responses).
if not await self._check_slash_authorization(interaction, "/skill"):
return
entry = self._skill_lookup.get(name)
if not entry:
await interaction.response.send_message(
f"Unknown skill: `{name}`. Start typing for "
@ -2676,16 +2982,74 @@ class DiscordAdapter(BasePlatformAdapter):
logger.info(
"[%s] Registered /skill command with %d skill(s) via autocomplete",
self.name, len(entries),
self.name, len(self._skill_entries),
)
if hidden:
if self._skill_group_hidden_count:
logger.info(
"[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
self.name, hidden,
self.name, self._skill_group_hidden_count,
)
except Exception as exc:
logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)
def _refresh_skill_catalog_state(self) -> None:
"""Re-scan disk for skills and repopulate ``self._skill_entries``.
Called once from :meth:`_register_skill_group` at startup and
again from :meth:`refresh_skill_group` whenever the user runs
``/reload-skills``. No Discord API calls are made autocomplete
and the handler both read from these instance attributes
directly, so an in-place mutation is sufficient.
"""
from hermes_cli.commands import discord_skill_commands_by_category
reserved = getattr(self, "_skill_group_reserved_names", set())
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(reserved),
)
entries: list[tuple[str, str, str]] = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Stable alphabetical order so the autocomplete suggestion
# list is predictable across restarts.
entries.sort(key=lambda t: t[0])
self._skill_entries = entries
self._skill_lookup = {n: (d, k) for n, d, k in entries}
self._skill_group_hidden_count = hidden
def refresh_skill_group(self) -> tuple[int, int]:
"""Rescan skills and update the live ``/skill`` autocomplete state.
Invoked by :meth:`gateway.run.GatewayOrchestrator._handle_reload_skills_command`
after :func:`agent.skill_commands.reload_skills` has refreshed
the in-process skill-command registry. Without this call, the
``/skill`` autocomplete dropdown keeps showing the list captured
at process start new skills stay invisible and deleted skills
return an "Unknown skill" error when clicked.
Because autocomplete options are fetched dynamically by Discord,
we only need to mutate the entries/lookup attributes read by the
callbacks no ``tree.sync()`` is required.
Returns ``(new_count, hidden_count)``.
"""
try:
self._refresh_skill_catalog_state()
except Exception as exc:
logger.warning(
"[%s] Failed to refresh /skill autocomplete after reload: %s",
self.name, exc,
)
return (len(getattr(self, "_skill_entries", [])), 0)
logger.info(
"[%s] Refreshed /skill autocomplete: %d skill(s) available (%d filtered)",
self.name,
len(self._skill_entries),
self._skill_group_hidden_count,
)
return (len(self._skill_entries), self._skill_group_hidden_count)
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
"""Build a MessageEvent from a Discord slash command interaction."""
is_dm = isinstance(interaction.channel, discord.DMChannel)
@ -2743,6 +3107,9 @@ class DiscordAdapter(BasePlatformAdapter):
auto_archive_duration: int = 1440,
) -> None:
"""Create a Discord thread from a slash command and start a session in it."""
if not await self._check_slash_authorization(interaction, "/thread"):
return
await interaction.response.defer(ephemeral=True)
result = await self._create_thread(
interaction,
name=name,
@ -3037,6 +3404,7 @@ class DiscordAdapter(BasePlatformAdapter):
view = ExecApprovalView(
session_key=session_key,
allowed_user_ids=self._allowed_user_ids,
allowed_role_ids=self._allowed_role_ids,
)
msg = await channel.send(embed=embed, view=view)
@ -3075,6 +3443,7 @@ class DiscordAdapter(BasePlatformAdapter):
session_key=session_key,
confirm_id=confirm_id,
allowed_user_ids=self._allowed_user_ids,
allowed_role_ids=self._allowed_role_ids,
)
msg = await channel.send(embed=embed, view=view)
@ -3109,6 +3478,7 @@ class DiscordAdapter(BasePlatformAdapter):
view = UpdatePromptView(
session_key=session_key,
allowed_user_ids=self._allowed_user_ids,
allowed_role_ids=self._allowed_role_ids,
)
msg = await channel.send(embed=embed, view=view)
return SendResult(success=True, message_id=str(msg.id))
@ -3166,6 +3536,7 @@ class DiscordAdapter(BasePlatformAdapter):
session_key=session_key,
on_model_selected=on_model_selected,
allowed_user_ids=self._allowed_user_ids,
allowed_role_ids=self._allowed_role_ids,
)
msg = await channel.send(embed=embed, view=view)
@ -3721,6 +4092,72 @@ class DiscordAdapter(BasePlatformAdapter):
# Discord UI Components (outside the adapter class)
# ---------------------------------------------------------------------------
def _component_check_auth(
interaction,
allowed_user_ids: Optional[set],
allowed_role_ids: Optional[set],
) -> bool:
"""Shared user-or-role OR semantics for component view button clicks.
Mirrors ``DiscordAdapter._is_allowed_user`` / the slash and on_message
gates so every Discord interaction surface honors the same trust
boundary. Component views (ExecApprovalView, SlashConfirmView,
UpdatePromptView, ModelPickerView) used to receive only
``allowed_user_ids``: in role-only deployments
(DISCORD_ALLOWED_ROLES set, DISCORD_ALLOWED_USERS empty) the user
set was empty and the legacy "no allowlist = allow everyone" branch
let any guild member click the buttons -- approving exec commands,
cancelling slash confirmations, switching the model.
Behavior:
- both allowlists empty -> allow (preserves existing no-allowlist
deployments, no regression)
- user is in user allowlist -> allow
- role allowlist set + user has a role in it -> allow
- role allowlist set + interaction.user has no resolvable
``roles`` attribute (e.g. DM context with a role policy active)
-> reject (fail closed)
- otherwise -> reject
"""
user_set = allowed_user_ids or set()
role_set = allowed_role_ids or set()
has_users = bool(user_set)
has_roles = bool(role_set)
if not has_users and not has_roles:
return True
user = getattr(interaction, "user", None)
if user is None:
return False
if has_users:
try:
uid = str(user.id)
except AttributeError:
uid = ""
if uid and uid in user_set:
return True
if has_roles:
roles_attr = getattr(user, "roles", None)
if roles_attr is None:
# Role policy is configured but the interaction doesn't
# carry role data (DM-context Member, raw User payload).
# Fail closed: a user without a resolvable role list cannot
# satisfy a role allowlist.
return False
try:
user_role_ids = {getattr(r, "id", None) for r in roles_attr}
except TypeError:
return False
if user_role_ids & role_set:
return True
return False
if DISCORD_AVAILABLE:
class ExecApprovalView(discord.ui.View):
@ -3733,17 +4170,23 @@ if DISCORD_AVAILABLE:
Only users in the allowed list can click. Times out after 5 minutes.
"""
def __init__(self, session_key: str, allowed_user_ids: set):
def __init__(
self,
session_key: str,
allowed_user_ids: set,
allowed_role_ids: Optional[set] = None,
):
super().__init__(timeout=300) # 5-minute timeout
self.session_key = session_key
self.allowed_user_ids = allowed_user_ids
self.allowed_role_ids = allowed_role_ids or set()
self.resolved = False
def _check_auth(self, interaction: discord.Interaction) -> bool:
"""Verify the user clicking is authorized."""
if not self.allowed_user_ids:
return True # No allowlist = anyone can approve
return str(interaction.user.id) in self.allowed_user_ids
return _component_check_auth(
interaction, self.allowed_user_ids, self.allowed_role_ids,
)
async def _resolve(
self, interaction: discord.Interaction, choice: str,
@ -3835,17 +4278,24 @@ if DISCORD_AVAILABLE:
5 minutes (matches the gateway primitive's timeout).
"""
def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
def __init__(
self,
session_key: str,
confirm_id: str,
allowed_user_ids: set,
allowed_role_ids: Optional[set] = None,
):
super().__init__(timeout=300)
self.session_key = session_key
self.confirm_id = confirm_id
self.allowed_user_ids = allowed_user_ids
self.allowed_role_ids = allowed_role_ids or set()
self.resolved = False
def _check_auth(self, interaction: discord.Interaction) -> bool:
if not self.allowed_user_ids:
return True
return str(interaction.user.id) in self.allowed_user_ids
return _component_check_auth(
interaction, self.allowed_user_ids, self.allowed_role_ids,
)
async def _resolve(
self, interaction: discord.Interaction, choice: str,
@ -3923,16 +4373,22 @@ if DISCORD_AVAILABLE:
5-minute timeout on its side).
"""
def __init__(self, session_key: str, allowed_user_ids: set):
def __init__(
self,
session_key: str,
allowed_user_ids: set,
allowed_role_ids: Optional[set] = None,
):
super().__init__(timeout=300)
self.session_key = session_key
self.allowed_user_ids = allowed_user_ids
self.allowed_role_ids = allowed_role_ids or set()
self.resolved = False
def _check_auth(self, interaction: discord.Interaction) -> bool:
if not self.allowed_user_ids:
return True
return str(interaction.user.id) in self.allowed_user_ids
return _component_check_auth(
interaction, self.allowed_user_ids, self.allowed_role_ids,
)
async def _respond(
self, interaction: discord.Interaction, answer: str,
@ -4009,6 +4465,7 @@ if DISCORD_AVAILABLE:
session_key: str,
on_model_selected,
allowed_user_ids: set,
allowed_role_ids: Optional[set] = None,
):
super().__init__(timeout=120)
self.providers = providers
@ -4017,15 +4474,16 @@ if DISCORD_AVAILABLE:
self.session_key = session_key
self.on_model_selected = on_model_selected
self.allowed_user_ids = allowed_user_ids
self.allowed_role_ids = allowed_role_ids or set()
self.resolved = False
self._selected_provider: str = ""
self._build_provider_select()
def _check_auth(self, interaction: discord.Interaction) -> bool:
if not self.allowed_user_ids:
return True
return str(interaction.user.id) in self.allowed_user_ids
return _component_check_auth(
interaction, self.allowed_user_ids, self.allowed_role_ids,
)
def _build_provider_select(self):
"""Build the provider dropdown menu."""

View file

@ -2922,13 +2922,18 @@ class FeishuAdapter(BasePlatformAdapter):
},
)
response.raise_for_status()
# Snapshot Content-Type and body while the client context is
# still active so pooled connections fully release on exit.
# See #18451.
content_type_hdr = str(response.headers.get("Content-Type", ""))
body = response.content
filename = self._derive_remote_filename(
file_url,
content_type=str(response.headers.get("Content-Type", "")),
content_type=content_type_hdr,
default_name=preferred_name,
default_ext=default_ext,
)
cached_path = cache_document_from_bytes(response.content, filename)
cached_path = cache_document_from_bytes(body, filename)
return cached_path, filename
@staticmethod

View file

@ -139,7 +139,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
async def _ws_connect(self) -> bool:
"""Establish WebSocket connection and authenticate."""
ws_url = self._hass_url.replace("http://", "ws://").replace("https://", "wss://")
ws_url = self._hass_url.replace("https://", "wss://").replace("http://", "ws://")
ws_url = f"{ws_url}/api/websocket"
self._session = aiohttp.ClientSession(

View file

@ -243,10 +243,14 @@ class QQAdapter(BasePlatformAdapter):
return False
try:
# Tighter keepalive pool so idle CLOSE_WAIT sockets drain
# faster behind proxies like Cloudflare Warp (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self._http_client = httpx.AsyncClient(
timeout=30.0,
follow_redirects=True,
event_hooks={"response": [_ssrf_redirect_guard]},
limits=platform_httpx_limits(),
)
# 1. Get access token

View file

@ -248,7 +248,9 @@ class SignalAdapter(BasePlatformAdapter):
except Exception as e:
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
self.client = httpx.AsyncClient(timeout=30.0)
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
try:
# Health check — verify signal-cli daemon is reachable
try:

View file

@ -528,6 +528,21 @@ class SlackAdapter(BasePlatformAdapter):
return False
lock_acquired = True
# Close any previous handler before creating a new one so that
# calling connect() a second time (e.g. during a gateway restart or
# in-process reconnect attempt) does not leave a zombie Socket Mode
# connection alive. Both the old and new connections would otherwise
# receive every Slack event and dispatch it twice, producing double
# responses — the same bug that affected DiscordAdapter (#18187).
if self._handler is not None:
try:
await self._handler.close_async()
except Exception:
logger.debug("[%s] Failed to close previous Slack handler", self.name)
finally:
self._handler = None
self._app = None
# First token is the primary — used for AsyncApp / Socket Mode
primary_token = bot_tokens[0]
self._app = AsyncApp(token=primary_token)

View file

@ -512,6 +512,17 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, attempt,
)
self._polling_network_error_count = 0
# start_polling() returning is necessary but not sufficient:
# PTB's Updater can be left in a state where `running` is True
# but the underlying long-poll task is wedged on a stale httpx
# connection and never makes progress. No error_callback fires
# in that state, so the reconnect ladder won't advance on its
# own. Schedule a deferred probe to detect the wedge and
# re-enter the ladder if needed.
if not self.has_fatal_error:
probe = asyncio.ensure_future(self._verify_polling_after_reconnect())
self._background_tasks.add(probe)
probe.add_done_callback(self._background_tasks.discard)
except Exception as retry_err:
logger.warning("[%s] Telegram polling reconnect failed: %s", self.name, retry_err)
# start_polling failed — polling is dead and no further error
@ -523,6 +534,50 @@ class TelegramAdapter(BasePlatformAdapter):
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
async def _verify_polling_after_reconnect(self) -> None:
"""Heartbeat probe scheduled after a successful reconnect.
PTB's Updater can survive a botched stop()+start_polling() cycle
with `running=True` but a wedged consumer task. No error callback
fires, so the reconnect ladder doesn't advance on its own. This
probe detects the wedge by:
1. Sleeping HEARTBEAT_PROBE_DELAY so a healthy long-poll has time
to complete at least one cycle.
2. Verifying `Updater.running` is still True.
3. Probing the bot endpoint with a tight asyncio timeout. A
wedged httpx pool fails this probe; a healthy one returns
well under the timeout.
On any failure, re-enter the reconnect ladder so the existing
MAX_NETWORK_RETRIES path can ultimately escalate to fatal-error.
"""
HEARTBEAT_PROBE_DELAY = 60
PROBE_TIMEOUT = 10
await asyncio.sleep(HEARTBEAT_PROBE_DELAY)
if self.has_fatal_error:
return
if not (self._app and self._app.updater and self._app.updater.running):
logger.warning(
"[%s] Updater not running %ds after reconnect — treating as wedged",
self.name, HEARTBEAT_PROBE_DELAY,
)
await self._handle_polling_network_error(
RuntimeError("Updater not running after reconnect heartbeat")
)
return
try:
await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
except Exception as probe_err:
logger.warning(
"[%s] Polling heartbeat probe failed %ds after reconnect: %s",
self.name, HEARTBEAT_PROBE_DELAY, probe_err,
)
await self._handle_polling_network_error(probe_err)
async def _handle_polling_conflict(self, error: Exception) -> None:
if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
return

View file

@ -206,7 +206,11 @@ class WeComAdapter(BasePlatformAdapter):
return False
try:
self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True)
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self._http_client = httpx.AsyncClient(
timeout=30.0, follow_redirects=True, limits=platform_httpx_limits(),
)
await self._open_connection()
self._mark_connected()
self._listen_task = asyncio.create_task(self._listen_loop())

View file

@ -119,7 +119,9 @@ class WecomCallbackAdapter(BasePlatformAdapter):
pass
try:
self._http_client = httpx.AsyncClient(timeout=20.0)
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
from gateway.platforms._http_client_limits import platform_httpx_limits
self._http_client = httpx.AsyncClient(timeout=20.0, limits=platform_httpx_limits())
self._app = web.Application()
self._app.router.add_get("/health", self._handle_health)
self._app.router.add_get(self._path, self._handle_verify)

View file

@ -2030,7 +2030,9 @@ async def send_weixin_direct(
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
send_session = getattr(live_adapter, '_send_session', None)
if live_adapter is not None and send_session is not None and not send_session.closed:
if (live_adapter is not None and send_session is not None
and not send_session.closed
and send_session._loop is asyncio.get_running_loop()):
last_result: Optional[SendResult] = None
cleaned = live_adapter.format_message(message)
if cleaned:

View file

@ -185,6 +185,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
self._bridge_log: Optional[Path] = None
self._poll_task: Optional[asyncio.Task] = None
self._http_session: Optional["aiohttp.ClientSession"] = None
# Set to True by disconnect() before we SIGTERM our child bridge so
# _check_managed_bridge_exit() can distinguish an intentional
# shutdown-time exit (returncode -15 / -2 / 0) from a real crash.
# Without this, every graceful gateway shutdown/restart would log
# "Fatal whatsapp adapter error" plus dispatch a fatal-error
# notification before the normal "✓ whatsapp disconnected" fires.
self._shutting_down: bool = False
def _whatsapp_require_mention(self) -> bool:
configured = self.config.extra.get("require_mention")
@ -555,6 +562,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
if returncode is None:
return None
# Planned shutdown: disconnect() sets _shutting_down before it sends
# SIGTERM to the bridge, so a returncode of -15 (SIGTERM), -2 (SIGINT),
# or 0 (clean exit) at that point is expected, not a crash. Treat it
# as informational and skip the fatal-error path.
# getattr-with-default keeps tests that construct the adapter via
# ``WhatsAppAdapter.__new__`` (bypassing __init__) working without
# every _make_adapter() helper having to seed the attribute.
if getattr(self, "_shutting_down", False) and returncode in (0, -2, -15):
logger.info(
"[%s] Bridge exited during shutdown (code %d).",
self.name,
returncode,
)
return None
message = f"WhatsApp bridge process exited unexpectedly (code {returncode})."
if not self.has_fatal_error:
logger.error("[%s] %s", self.name, message)
@ -565,6 +587,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
async def disconnect(self) -> None:
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
# Flip the shutdown flag BEFORE signalling the child so the exit-check
# path (which runs from other tasks like send() and the poll loop)
# doesn't race us and report the intentional termination as fatal.
self._shutting_down = True
if self._bridge_process:
try:
try:
@ -876,11 +902,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
try:
import aiohttp
await self._http_session.post(
# Must wrap in `async with` — a bare `await session.post(...)`
# leaves the response object alive until GC, holding its TCP
# socket in CLOSE_WAIT. See #18451.
async with self._http_session.post(
f"http://127.0.0.1:{self._bridge_port}/typing",
json={"chatId": chat_id},
timeout=aiohttp.ClientTimeout(total=5)
)
):
pass
except Exception:
pass # Ignore typing indicator failures

View file

@ -15,6 +15,7 @@ Usage:
import asyncio
import dataclasses
import inspect
import json
import logging
import os
@ -282,6 +283,16 @@ def _home_target_env_var(platform_name: str) -> str:
)
def _home_thread_env_var(platform_name: str) -> str:
"""Return the optional thread/topic env var for a platform home target."""
return f"{_home_target_env_var(platform_name)}_THREAD_ID"
def _restart_notification_pending() -> bool:
"""Return True when a /restart completion marker is waiting to be delivered."""
return (_hermes_home / ".restart_notify.json").exists()
_ensure_ssl_certs()
# Add parent directory to path
@ -406,37 +417,37 @@ if _config_path.exists():
os.environ[_env_map["base_url"]] = _base_url
if _api_key:
os.environ[_env_map["api_key"]] = _api_key
# config.yaml is the documented, authoritative source for these
# settings — it unconditionally wins over .env values. Previously
# the guards below read `if X not in os.environ` and let stale
# .env entries (e.g. HERMES_MAX_ITERATIONS=60 written by an old
# `hermes setup` run) silently shadow the user's current config.
# See PR #18413 / the 60-vs-500 max_turns incident.
_agent_cfg = _cfg.get("agent", {})
if _agent_cfg and isinstance(_agent_cfg, dict):
if "max_turns" in _agent_cfg:
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
# Bridge agent.gateway_timeout → HERMES_AGENT_TIMEOUT env var.
# Env var from .env takes precedence (already in os.environ).
if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
if "gateway_timeout" in _agent_cfg:
os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
if "gateway_timeout_warning" in _agent_cfg:
os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
if "gateway_notify_interval" in _agent_cfg:
os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
if "restart_drain_timeout" in _agent_cfg:
os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
if (
"gateway_auto_continue_freshness" in _agent_cfg
and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
):
if "gateway_auto_continue_freshness" in _agent_cfg:
os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
_agent_cfg["gateway_auto_continue_freshness"]
)
_display_cfg = _cfg.get("display", {})
if _display_cfg and isinstance(_display_cfg, dict):
if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
if "busy_input_mode" in _display_cfg:
os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
if "busy_ack_enabled" in _display_cfg and "HERMES_GATEWAY_BUSY_ACK_ENABLED" not in os.environ:
if "busy_ack_enabled" in _display_cfg:
os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
_tz_cfg = _cfg.get("timezone", "")
if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
if _tz_cfg and isinstance(_tz_cfg, str):
os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
# Security settings
_security_cfg = _cfg.get("security", {})
@ -444,8 +455,24 @@ if _config_path.exists():
_redact = _security_cfg.get("redact_secrets")
if _redact is not None:
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
except Exception:
pass # Non-fatal; gateway can still run with .env values
except Exception as _bridge_err:
# Previously this was silent (`except Exception: pass`), which
# hid partial bridge failures and let .env defaults shadow
# config.yaml values — users observed max_turns=500 in config
# but a 60-iteration cap in practice. Surface the failure to
# stderr so operators see it even though `logger` is not yet
# initialized at module-import time (logger is defined further
# down this module).
print(
f" Warning: config.yaml → env bridge failed: "
f"{type(_bridge_err).__name__}: {_bridge_err}",
file=sys.stderr,
)
print(
" Gateway will fall back to .env values, which may not match "
"your current config.yaml. Run `hermes doctor` to investigate.",
file=sys.stderr,
)
# Apply IPv4 preference if configured (before any HTTP clients are created).
try:
@ -490,6 +517,8 @@ from gateway.config import (
Platform,
_BUILTIN_PLATFORM_VALUES,
GatewayConfig,
HomeChannel,
PlatformConfig,
load_gateway_config,
)
from gateway.session import (
@ -673,11 +702,69 @@ def _is_control_interrupt_message(message: Optional[str]) -> bool:
return normalized in _CONTROL_INTERRUPT_MESSAGES
def _skill_slug_from_frontmatter(skill_md: Path) -> tuple[str | None, str | None]:
"""Derive the /command slug and declared frontmatter name from a SKILL.md.
Matches the exact normalization used by
:func:`agent.skill_commands.scan_skill_commands` so the slug here is the
same string a user types after the leading ``/`` (e.g. a skill with
frontmatter ``name: Stable Diffusion Image Generation`` resolves to
``stable-diffusion-image-generation`` NOT the parent directory name,
which is commonly shorter/different, e.g. ``stable-diffusion``).
Using the directory name silently broke :func:`_check_unavailable_skill`
for every skill whose directory name drifted from its frontmatter name
(19 such skills on a standard install as of 2026-05), causing a generic
"unknown command" response where a "disabled — enable with …" or
"not installed — install with …" hint was expected.
Returns ``(slug, declared_name)`` or ``(None, None)`` when the file
can't be read or lacks a ``name:`` in its frontmatter.
"""
try:
content = skill_md.read_text(encoding="utf-8", errors="replace")
except Exception:
return None, None
if not content.startswith("---"):
return None, None
end = content.find("\n---", 3)
if end < 0:
return None, None
declared_name: str | None = None
for line in content[3:end].splitlines():
line = line.strip()
if line.startswith("name:"):
raw = line.split(":", 1)[1].strip()
# Strip YAML quote wrappers if present
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
raw = raw[1:-1]
declared_name = raw.strip()
break
if not declared_name:
return None, None
slug = declared_name.lower().replace(" ", "-").replace("_", "-")
# Mirror _SKILL_INVALID_CHARS and _SKILL_MULTI_HYPHEN from skill_commands
import re as _re
slug = _re.sub(r"[^a-z0-9-]", "", slug)
slug = _re.sub(r"-{2,}", "-", slug).strip("-")
if not slug:
return None, declared_name
return slug, declared_name
def _check_unavailable_skill(command_name: str) -> str | None:
"""Check if a command matches a known-but-inactive skill.
Returns a helpful message if the skill exists but is disabled or only
available as an optional install. Returns None if no match found.
The slug for each on-disk skill is derived from its frontmatter ``name:``
(via :func:`_skill_slug_from_frontmatter`), NOT from its containing
directory name because the two can differ (e.g. directory
``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``
yields slug ``stable-diffusion-image-generation``). Matching on
directory name would miss that slug entirely and fall through to the
generic "unknown command" path.
"""
# Normalize: command uses hyphens, skill names may use hyphens or underscores
normalized = command_name.lower().replace("_", "-")
@ -693,8 +780,12 @@ def _check_unavailable_skill(command_name: str) -> str | None:
for skill_md in skills_dir.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
continue
name = skill_md.parent.name.lower().replace("_", "-")
if name == normalized and name in disabled:
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
if not slug or not declared_name:
continue
# disabled is keyed by the declared frontmatter name (what
# skills.disabled / skills.platform_disabled store).
if slug == normalized and declared_name in disabled:
return (
f"The **{command_name}** skill is installed but disabled.\n"
f"Enable it with: `hermes skills config`"
@ -706,8 +797,10 @@ def _check_unavailable_skill(command_name: str) -> str | None:
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
if optional_dir.exists():
for skill_md in optional_dir.rglob("SKILL.md"):
name = skill_md.parent.name.lower().replace("_", "-")
if name == normalized:
slug, _declared = _skill_slug_from_frontmatter(skill_md)
if not slug:
continue
if slug == normalized:
# Build install path: official/<category>/<name>
rel = skill_md.parent.relative_to(optional_dir)
parts = list(rel.parts)
@ -2176,15 +2269,13 @@ class GatewayRunner:
logger.debug("Failed interrupting agent during shutdown: %s", e)
async def _notify_active_sessions_of_shutdown(self) -> None:
"""Send a notification to every chat with an active agent.
"""Send shutdown/restart notifications to active chats and home channels.
Called at the very start of stop() adapters are still connected so
messages can be delivered. Best-effort: individual send failures are
messages can be delivered. Best-effort: individual send failures are
logged and swallowed so they never block the shutdown sequence.
"""
active = self._snapshot_running_agents()
if not active:
return
action = "restarting" if self._restart_requested else "shutting down"
hint = (
@ -2195,7 +2286,7 @@ class GatewayRunner:
)
msg = f"⚠️ Gateway {action}{hint}"
notified: set = set()
notified: set[tuple[str, str, Optional[str]]] = set()
for session_key in active:
source = None
try:
@ -2212,7 +2303,7 @@ class GatewayRunner:
if source is not None:
platform_str = source.platform.value
chat_id = source.chat_id
chat_id = str(source.chat_id)
thread_id = source.thread_id
else:
# Fall back to parsing the session key when no persisted
@ -2224,9 +2315,10 @@ class GatewayRunner:
chat_id = _parsed["chat_id"]
thread_id = _parsed.get("thread_id")
# Deduplicate: one notification per chat, even if multiple
# sessions (different users/threads) share the same chat.
dedup_key = (platform_str, chat_id)
# Deduplicate only identical delivery targets. Thread/topic-aware
# platforms can share a parent chat while still routing to distinct
# destinations via metadata.
dedup_key = (platform_str, chat_id, str(thread_id) if thread_id else None)
if dedup_key in notified:
continue
@ -2240,10 +2332,19 @@ class GatewayRunner:
# correct forum topic / thread.
metadata = {"thread_id": thread_id} if thread_id else None
await adapter.send(chat_id, msg, metadata=metadata)
result = await adapter.send(chat_id, msg, metadata=metadata)
if result is not None and getattr(result, "success", True) is False:
logger.debug(
"Failed to send shutdown notification to %s:%s: %s",
platform_str,
chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
notified.add(dedup_key)
logger.info(
"Sent shutdown notification to %s:%s",
"Sent shutdown notification to active chat %s:%s",
platform_str, chat_id,
)
except Exception as e:
@ -2252,6 +2353,44 @@ class GatewayRunner:
platform_str, chat_id, e,
)
for platform, adapter in self.adapters.items():
home = self.config.get_home_channel(platform)
if not home or not home.chat_id:
continue
dedup_key = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
if dedup_key in notified:
continue
try:
metadata = {"thread_id": home.thread_id} if home.thread_id else None
if metadata:
result = await adapter.send(str(home.chat_id), msg, metadata=metadata)
else:
result = await adapter.send(str(home.chat_id), msg)
if result is not None and getattr(result, "success", True) is False:
logger.debug(
"Failed to send shutdown notification to home channel %s:%s: %s",
platform.value,
home.chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
notified.add(dedup_key)
logger.info(
"Sent shutdown notification to home channel %s:%s",
platform.value,
home.chat_id,
)
except Exception as e:
logger.debug(
"Failed to send shutdown notification to home channel %s:%s: %s",
platform.value,
home.chat_id,
e,
)
def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
for agent in active_agents.values():
try:
@ -2519,6 +2658,18 @@ class GatewayRunner:
"""
logger.info("Starting Hermes Gateway...")
logger.info("Session storage: %s", self.config.sessions_dir)
# Log the resolved max_iterations budget so operators can verify the
# config.yaml → env bridge did the right thing at a glance (instead
# of silently running at a stale .env value for weeks).
try:
_effective_max_iter = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
logger.info(
"Agent budget: max_iterations=%d (agent.max_turns from config.yaml, "
"or HERMES_MAX_ITERATIONS from .env, or default 90)",
_effective_max_iter,
)
except Exception:
pass
try:
from hermes_cli.profiles import get_active_profile_name
_profile = get_active_profile_name()
@ -2662,7 +2813,7 @@ class GatewayRunner:
try:
suspended = self.session_store.suspend_recently_active()
if suspended:
logger.info("Suspended %d in-flight session(s) from previous run", suspended)
logger.info("Marked %d in-flight session(s) as resumable from previous run", suspended)
except Exception as e:
logger.warning("Session suspension on startup failed: %s", e)
@ -2860,8 +3011,28 @@ class GatewayRunner:
):
self._schedule_update_notification_watch()
# Give freshly connected platform adapters a brief moment to settle
# before sending restart/startup lifecycle messages. In practice this
# helps Discord thread deliveries right after reconnect.
if connected_count > 0:
await asyncio.sleep(1.0)
# Notify the chat that initiated /restart that the gateway is back.
await self._send_restart_notification()
restart_notification_pending = _restart_notification_pending()
delivered_restart_target = await self._send_restart_notification()
# Broadcast a lightweight "gateway is back" message to configured
# home channels only when this startup is resuming from /restart. If a
# /restart requester already received a direct completion notice in the
# same chat, skip the generic broadcast there to avoid duplicates while
# still allowing a home-channel fallback when the direct send fails.
if restart_notification_pending or delivered_restart_target is not None:
skip_home_targets = (
{delivered_restart_target} if delivered_restart_target else None
)
await self._send_home_channel_startup_notifications(
skip_targets=skip_home_targets,
)
# Drain any recovered process watchers (from crash recovery checkpoint)
try:
@ -3889,7 +4060,9 @@ class GatewayRunner:
if not check_discord_requirements():
logger.warning("Discord: discord.py not installed")
return None
return DiscordAdapter(config)
adapter = DiscordAdapter(config)
adapter.gateway_runner = self # For cross-platform admin alerts on unauthorized slash
return adapter
elif platform == Platform.WHATSAPP:
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
@ -7792,24 +7965,33 @@ class GatewayRunner:
msg = decision.get("message") or ""
# Send the status line back to the user so they see the judge's
# verdict. Fire-and-forget via the adapter.
# verdict. Fire-and-forget via the adapter's ``send()`` method —
# adapters expose ``send(chat_id, content, reply_to, metadata)``,
# not a ``send_message(source, msg)`` wrapper, so an earlier
# ``hasattr(adapter, "send_message")`` gate here was dead code and
# users never saw ``✓ Goal achieved`` / ``⏸ budget exhausted``
# verdicts.
if msg and source is not None:
try:
adapter = self.adapters.get(source.platform)
if adapter and hasattr(adapter, "send_message"):
if adapter is not None and hasattr(adapter, "send"):
import asyncio as _asyncio
coro = adapter.send_message(source, msg)
thread_meta = (
{"thread_id": source.thread_id} if source.thread_id else None
)
coro = adapter.send(
chat_id=source.chat_id,
content=msg,
metadata=thread_meta,
)
if _asyncio.iscoroutine(coro):
try:
loop = _asyncio.get_event_loop()
if loop.is_running():
loop.create_task(coro)
else:
loop.run_until_complete(coro)
loop = _asyncio.get_running_loop()
loop.create_task(coro)
except RuntimeError:
# No event loop in this thread — schedule on the main one.
# No running loop in this thread — best effort.
try:
_asyncio.run_coroutine_threadsafe(coro, self._loop)
_asyncio.run(coro)
except Exception:
pass
except Exception as exc:
@ -7872,14 +8054,33 @@ class GatewayRunner:
chat_name = source.chat_name or chat_id
env_key = _home_target_env_var(platform_name)
thread_env_key = _home_thread_env_var(platform_name)
thread_id = source.thread_id
# Save to .env so it persists across restarts
try:
from hermes_cli.config import save_env_value
save_env_value(env_key, str(chat_id))
# Keep thread/topic routing explicit and clear stale values when
# /sethome is run from the parent chat instead of a thread.
save_env_value(thread_env_key, str(thread_id or ""))
except Exception as e:
return f"Failed to save home channel: {e}"
# Keep the running gateway config in sync too. The pre-restart
# notification path reads self.config before the process reloads env.
if source.platform:
platform_config = self.config.platforms.setdefault(
source.platform,
PlatformConfig(enabled=True),
)
platform_config.home_channel = HomeChannel(
platform=source.platform,
chat_id=str(chat_id),
name=chat_name,
thread_id=str(thread_id) if thread_id else None,
)
return (
f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
f"Cron jobs and cross-platform messages will be delivered here."
@ -9623,6 +9824,28 @@ class GatewayRunner:
removed = result.get("removed", []) # [{"name", "description"}, ...]
total = result.get("total", 0)
# Let each connected adapter refresh any platform-side state
# that cached the skill list at startup. Today that's the
# Discord /skill autocomplete (registered once per connect);
# without this call, new skills stay invisible in the
# dropdown and deleted skills error out when clicked. Other
# adapters that don't override refresh_skill_group (Telegram's
# BotCommand menu, Slack subcommand map, etc.) are silently
# skipped — the in-process reload above is enough for them.
for adapter in list(self.adapters.values()):
refresh = getattr(adapter, "refresh_skill_group", None)
if not callable(refresh):
continue
try:
maybe = refresh()
if inspect.isawaitable(maybe):
await maybe
except Exception as exc:
logger.warning(
"Adapter %s refresh_skill_group raised: %s",
getattr(adapter, "name", adapter), exc,
)
lines = ["🔄 **Skills Reloaded**\n"]
if not added and not removed:
lines.append("No new skills detected.")
@ -10341,11 +10564,11 @@ class GatewayRunner:
return True
async def _send_restart_notification(self) -> None:
async def _send_restart_notification(self) -> Optional[tuple[str, str, Optional[str]]]:
"""Notify the chat that initiated /restart that the gateway is back."""
notify_path = _hermes_home / ".restart_notify.json"
if not notify_path.exists():
return
return None
try:
data = json.loads(notify_path.read_text())
@ -10354,7 +10577,7 @@ class GatewayRunner:
thread_id = data.get("thread_id")
if not platform_str or not chat_id:
return
return None
platform = Platform(platform_str)
adapter = self.adapters.get(platform)
@ -10363,24 +10586,94 @@ class GatewayRunner:
"Restart notification skipped: %s adapter not connected",
platform_str,
)
return
return None
metadata = {"thread_id": thread_id} if thread_id else None
await adapter.send(
chat_id,
result = await adapter.send(
str(chat_id),
"♻ Gateway restarted successfully. Your session continues.",
metadata=metadata,
)
# adapter.send() catches provider errors (e.g. "Chat not found")
# and returns SendResult(success=False) rather than raising, so
# we must inspect the result before claiming success — otherwise
# the log line is misleading and hides real delivery failures.
if result is not None and getattr(result, "success", True) is False:
logger.warning(
"Restart notification to %s:%s was not delivered: %s",
platform_str,
chat_id,
getattr(result, "error", "send returned success=False"),
)
return None
logger.info(
"Sent restart notification to %s:%s",
platform_str,
chat_id,
)
return str(platform_str), str(chat_id), str(thread_id) if thread_id else None
except Exception as e:
logger.warning("Restart notification failed: %s", e)
return None
finally:
notify_path.unlink(missing_ok=True)
async def _send_home_channel_startup_notifications(
self,
*,
skip_targets: Optional[set[tuple[str, str, Optional[str]]]] = None,
) -> set[tuple[str, str, Optional[str]]]:
"""Notify configured home channels that the gateway is back online.
The notification is best-effort and sent once per connected platform
home channel. ``skip_targets`` lets startup avoid duplicate messages
when a more specific restart notification is queued for the same chat.
"""
delivered: set[tuple[str, str, Optional[str]]] = set()
skipped = skip_targets or set()
message = "♻️ Gateway online — Hermes is back and ready."
for platform, adapter in self.adapters.items():
home = self.config.get_home_channel(platform)
if not home or not home.chat_id:
continue
target = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
if target in skipped or target in delivered:
continue
try:
metadata = {"thread_id": home.thread_id} if home.thread_id else None
if metadata:
result = await adapter.send(str(home.chat_id), message, metadata=metadata)
else:
result = await adapter.send(str(home.chat_id), message)
if result is not None and getattr(result, "success", True) is False:
logger.warning(
"Home-channel startup notification failed for %s:%s: %s",
platform.value,
home.chat_id,
getattr(result, "error", "send returned success=False"),
)
continue
delivered.add(target)
logger.info(
"Sent home-channel startup notification to %s:%s",
platform.value,
home.chat_id,
)
except Exception as exc:
logger.warning(
"Home-channel startup notification failed for %s:%s: %s",
platform.value,
home.chat_id,
exc,
)
return delivered
def _set_session_env(self, context: SessionContext) -> list:
"""Set session context variables for the current async task.

View file

@ -1086,19 +1086,22 @@ class SessionStore:
return len(removed_keys)
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
"""Mark recently-active sessions as suspended.
"""Mark recently-active sessions as resumable after an unexpected exit.
Called on gateway startup to prevent sessions that were likely
in-flight when the gateway last exited from being blindly resumed
(#7536). Only suspends sessions updated within *max_age_seconds*
to avoid resetting long-idle sessions that are harmless to resume.
Returns the number of sessions that were suspended.
Called on gateway startup after a crash or fast restart to preserve
in-flight sessions instead of destroying their conversation history
(#7536). Only marks sessions updated within *max_age_seconds* to
avoid touching long-idle sessions. Sets ``resume_pending=True`` so
the next incoming message on the same session_key auto-resumes from
the existing transcript.
Entries flagged ``resume_pending=True`` are skipped those were
marked intentionally by the drain-timeout path as recoverable.
Terminal escalation for genuinely stuck ``resume_pending`` sessions
is handled by the existing ``.restart_failure_counts`` stuck-loop
counter, which runs after this method on startup.
Entries already flagged ``resume_pending=True`` are skipped. Entries
explicitly ``suspended=True`` (from /stop or stuck-loop escalation)
are also skipped. Terminal escalation for genuinely stuck sessions
is still handled by the existing ``.restart_failure_counts`` counter
(threshold 3), which runs after this method and sets ``suspended=True``.
Returns the number of sessions marked resumable.
"""
from datetime import timedelta
@ -1110,7 +1113,9 @@ class SessionStore:
if entry.resume_pending:
continue
if not entry.suspended and entry.updated_at >= cutoff:
entry.suspended = True
entry.resume_pending = True
entry.resume_reason = "restart_interrupted"
entry.last_resume_marked_at = _now()
count += 1
if count:
self._save()

View file

@ -10,6 +10,7 @@ To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.
from __future__ import annotations
import logging
import os
import re
import shutil
@ -21,6 +22,8 @@ from typing import Any
from utils import is_truthy_value
logger = logging.getLogger(__name__)
# prompt_toolkit is an optional CLI dependency — only needed for
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
# environments that lack it must still be able to import this module
@ -499,9 +502,9 @@ def _sanitize_telegram_name(raw: str) -> str:
def _clamp_command_names(
entries: list[tuple[str, str]],
entries: list[tuple[str, ...]],
reserved: set[str],
) -> list[tuple[str, str]]:
) -> list[tuple[str, ...]]:
"""Enforce 32-char command name limit with collision avoidance.
Both Telegram and Discord cap slash command names at 32 characters.
@ -509,10 +512,15 @@ def _clamp_command_names(
(against *reserved* names or earlier entries in the same batch), the name is
shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
If all 10 digit slots are taken the entry is silently dropped.
Accepts tuples of any length >= 2. Extra elements beyond ``(name, desc)``
(e.g. ``cmd_key``) are passed through unchanged, so callers can attach
metadata that survives the rename.
"""
used: set[str] = set(reserved)
result: list[tuple[str, str]] = []
for name, desc in entries:
result: list[tuple] = []
for entry in entries:
name, desc, *extra = entry
if len(name) > _CMD_NAME_LIMIT:
candidate = name[:_CMD_NAME_LIMIT]
if candidate in used:
@ -528,7 +536,7 @@ def _clamp_command_names(
if name in used:
continue
used.add(name)
result.append((name, desc))
result.append((name, desc, *extra))
return result
@ -611,13 +619,26 @@ def _collect_gateway_skill_entries(
try:
from agent.skill_commands import get_skill_commands
from tools.skills_tool import SKILLS_DIR
from agent.skill_utils import get_external_skills_dirs
_skills_dir = str(SKILLS_DIR.resolve())
_hub_dir = str((SKILLS_DIR / ".hub").resolve())
_hub_dir = str((SKILLS_DIR / ".hub").resolve()).rstrip("/") + "/"
# Build set of allowed directory prefixes: local skills dir + any
# user-configured ``skills.external_dirs``. Ensure each prefix ends
# with ``/`` so ``/my-skills`` does not also match ``/my-skills-extra``.
# Without this widening, external skills are visible in
# ``hermes skills list`` and the agent's ``/skill-name`` dispatch but
# silently excluded from gateway slash menus (#8110).
_allowed_prefixes = [_skills_dir.rstrip("/") + "/"]
_allowed_prefixes.extend(
str(d).rstrip("/") + "/" for d in get_external_skills_dirs()
)
skill_cmds = get_skill_commands()
for cmd_key in sorted(skill_cmds):
info = skill_cmds[cmd_key]
skill_path = info.get("skill_md_path", "")
if not skill_path.startswith(_skills_dir):
if not skill_path:
continue
if not any(skill_path.startswith(prefix) for prefix in _allowed_prefixes):
continue
if skill_path.startswith(_hub_dir):
continue
@ -635,17 +656,15 @@ def _collect_gateway_skill_entries(
except Exception:
pass
# Clamp names; _clamp_command_names works on (name, desc) pairs so we
# need to zip/unzip.
skill_pairs = [(n, d) for n, d, _ in skill_triples]
key_by_pair = {(n, d): k for n, d, k in skill_triples}
skill_pairs = _clamp_command_names(skill_pairs, reserved_names)
# Clamp names; cmd_key is passed through as extra payload so it survives
# any clamp-induced renames.
skill_triples = _clamp_command_names(skill_triples, reserved_names)
# Skills fill remaining slots — only tier that gets trimmed
remaining = max(0, max_slots - len(all_entries))
hidden_count = max(0, len(skill_pairs) - remaining)
for n, d in skill_pairs[:remaining]:
all_entries.append((n, d, key_by_pair.get((n, d), "")))
hidden_count = max(0, len(skill_triples) - remaining)
for n, d, k in skill_triples[:remaining]:
all_entries.append((n, d, k))
return all_entries[:max_slots], hidden_count
@ -721,24 +740,40 @@ def discord_skill_commands(
def discord_skill_commands_by_category(
reserved_names: set[str],
) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
"""Return skill entries organized by category for Discord ``/skill`` subcommand groups.
"""Return skill entries organized by category for Discord ``/skill`` autocomplete.
Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
Skills whose directory is nested at least 2 levels under a scan root
(e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
category. Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
*uncategorized* the caller should register them as direct subcommands
of the ``/skill`` group.
*uncategorized*.
The same filtering as :func:`discord_skill_commands` is applied: hub
skills excluded, per-platform disabled excluded, names clamped.
Scan roots include the local ``SKILLS_DIR`` **and** any configured
``skills.external_dirs`` matching the widened filter applied to the
flat ``discord_skill_commands()`` collector in #18741. Without this
parity, external-dir skills are visible via ``hermes skills list`` and
the agent's ``/skill-name`` dispatch but silently absent from Discord's
``/skill`` autocomplete.
Filtering mirrors :func:`discord_skill_commands`: hub skills excluded,
per-platform disabled excluded, names clamped to 32 chars, descriptions
clamped to 100 chars.
The legacy 25-group × 25-subcommand caps (from the old nested
``/skill <cat> <name>`` layout) are **not** applied the live caller
(``_register_skill_group`` in ``gateway/platforms/discord.py``, refactored
in PR #11580) flattens these results and feeds them into a single
autocomplete callback, which scales to thousands of entries without any
per-command payload concerns. ``hidden_count`` is retained in the return
tuple for backward compatibility and still reports skills dropped for
other reasons (32-char clamp collision vs a reserved name).
Returns:
``(categories, uncategorized, hidden_count)``
- *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
- *uncategorized*: ``[(name, description, cmd_key), ...]``
- *hidden_count*: skills dropped due to Discord group limits
(25 subcommand groups, 25 subcommands per group)
- *hidden_count*: skills dropped due to name clamp collisions
against already-registered command names.
"""
from pathlib import Path as _P
@ -752,14 +787,33 @@ def discord_skill_commands_by_category(
# Collect raw skill data --------------------------------------------------
categories: dict[str, list[tuple[str, str, str]]] = {}
uncategorized: list[tuple[str, str, str]] = []
_names_used: set[str] = set(reserved_names)
# Map clamped-32-char-name → what it came from, so we can emit an
# actionable warning on collision. Reserved (gateway-builtin) command
# names are marked with a sentinel so the warning distinguishes
# "skill collided with a reserved command" from "two skills collided
# on the 32-char clamp" — the latter is the rename-worthy case.
_names_used: dict[str, str] = {n: "<reserved>" for n in reserved_names}
hidden = 0
try:
from agent.skill_commands import get_skill_commands
from agent.skill_utils import get_external_skills_dirs
from tools.skills_tool import SKILLS_DIR
_skills_dir = SKILLS_DIR.resolve()
_hub_dir = (SKILLS_DIR / ".hub").resolve()
# Build list of (resolved_root, is_local) tuples. Each external dir
# becomes its own scan root for category derivation — a skill at
# ``<external>/mlops/foo/SKILL.md`` is still categorized as "mlops".
_scan_roots: list[_P] = [_skills_dir]
try:
for ext in get_external_skills_dirs():
try:
_scan_roots.append(_P(ext).resolve())
except Exception:
continue
except Exception:
pass
skill_cmds = get_skill_commands()
for cmd_key in sorted(skill_cmds):
@ -768,33 +822,72 @@ def discord_skill_commands_by_category(
if not skill_path:
continue
sp = _P(skill_path).resolve()
# Skip skills outside SKILLS_DIR or from the hub
if not str(sp).startswith(str(_skills_dir)):
continue
# Hub skills are loaded via the skill hub, not surfaced as
# slash commands.
if str(sp).startswith(str(_hub_dir)):
continue
# Accept skill if it lives under any scan root; record the
# matching root so we can derive the category correctly.
matched_root: _P | None = None
for root in _scan_roots:
try:
sp.relative_to(root)
except ValueError:
continue
matched_root = root
break
if matched_root is None:
continue
skill_name = info.get("name", "")
if skill_name in _platform_disabled:
continue
raw_name = cmd_key.lstrip("/")
# Clamp to 32 chars (Discord limit)
# Clamp to 32 chars (Discord per-command name limit)
discord_name = raw_name[:32]
if discord_name in _names_used:
# Two skills whose first 32 chars are identical. One wins
# (the first one seen, which is alphabetical because the
# caller iterates ``sorted(skill_cmds)``); the other is
# dropped from Discord's /skill autocomplete.
#
# Silently counting this as ``hidden`` (the old behavior)
# meant skill authors had no way to discover the drop —
# their skill just didn't appear in the picker. Emit a
# WARNING naming both sides so the author can rename the
# losing skill's frontmatter name to something with a
# distinct 32-char prefix.
prior = _names_used[discord_name]
if prior == "<reserved>":
logger.warning(
"Discord /skill: %r (from %r) collides on its 32-char "
"clamp with a reserved gateway command name %r — the "
"skill will not appear in the /skill autocomplete. "
"Rename the skill's frontmatter ``name:`` to differ "
"in its first 32 chars.",
discord_name, cmd_key, discord_name,
)
else:
logger.warning(
"Discord /skill: %r and %r both clamp to %r on "
"Discord's 32-char command-name limit — only %r "
"will appear in the /skill autocomplete. Rename "
"one skill's frontmatter ``name:`` to differ in "
"its first 32 chars.",
prior, cmd_key, discord_name, prior,
)
hidden += 1
continue
_names_used.add(discord_name)
_names_used[discord_name] = cmd_key
desc = info.get("description", "")
if len(desc) > 100:
desc = desc[:97] + "..."
# Determine category from the relative path within SKILLS_DIR.
# e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
try:
rel = sp.parent.relative_to(_skills_dir)
except ValueError:
continue
# Determine category from the relative path within the matched
# scan root. e.g. creative/ascii-art/SKILL.md → ("creative", ...)
rel = sp.parent.relative_to(matched_root)
parts = rel.parts
if len(parts) >= 2:
cat = parts[0]
@ -804,28 +897,7 @@ def discord_skill_commands_by_category(
except Exception:
pass
# Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
_MAX_GROUPS = 25
_MAX_PER_GROUP = 25
trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
group_count = 0
for cat in sorted(categories):
if group_count >= _MAX_GROUPS:
hidden += len(categories[cat])
continue
entries = categories[cat][:_MAX_PER_GROUP]
hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
trimmed_categories[cat] = entries
group_count += 1
# Uncategorized skills also count against the 25 top-level limit
remaining_slots = _MAX_GROUPS - group_count
if len(uncategorized) > remaining_slots:
hidden += len(uncategorized) - remaining_slots
uncategorized = uncategorized[:remaining_slots]
return trimmed_categories, uncategorized, hidden
return categories, uncategorized, hidden
# ---------------------------------------------------------------------------

View file

@ -400,7 +400,12 @@ DEFAULT_CONFIG = {
# The gateway stops accepting new work, waits for running agents
# to finish, then interrupts any remaining runs after the timeout.
# 0 = no drain, interrupt immediately.
"restart_drain_timeout": 60,
#
# 180s is calibrated for realistic in-flight agent turns: a typical
# coding conversation mid-reasoning runs 60150s per call, so a 60s
# budget routinely interrupted legitimate work on /restart. Raise
# further in config.yaml if you run very-long-reasoning models.
"restart_drain_timeout": 180,
# Max app-level retry attempts for API errors (connection drops,
# provider timeouts, 5xx, etc.) before the agent surfaces the
# failure. The OpenAI SDK already does its own low-level retries
@ -639,6 +644,18 @@ DEFAULT_CONFIG = {
"cache_ttl": "5m",
},
# OpenRouter-specific settings.
# response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
# When enabled, identical requests return cached responses for free (zero billing).
# This is separate from Anthropic prompt caching and works alongside it.
# See: https://openrouter.ai/docs/guides/features/response-caching
# response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
# Default 300 (5 minutes). Only used when response_cache is enabled.
"openrouter": {
"response_cache": True,
"response_cache_ttl": 300,
},
# AWS Bedrock provider configuration.
# Only used when model.provider is "bedrock".
"bedrock": {
@ -825,7 +842,7 @@ DEFAULT_CONFIG = {
# Voices: alloy, echo, fable, onyx, nova, shimmer
},
"xai": {
"voice_id": "eve",
"voice_id": "eve", # or custom voice ID — see https://docs.x.ai/developers/model-capabilities/audio/custom-voices
"language": "en",
"sample_rate": 24000,
"bit_rate": 128000,

View file

@ -302,9 +302,21 @@ def _cmd_rollback(args) -> int:
print(f" reason: {manifest.get('reason', '?')}")
print(f" created_at: {manifest.get('created_at', '?')}")
print(f" skill files: {manifest.get('skill_files', '?')}")
cron = manifest.get("cron_jobs") or {}
if isinstance(cron, dict):
if cron.get("backed_up"):
print(
f" cron jobs: {cron.get('jobs_count', 0)} "
f"(will be restored for skill-link fields only)"
)
else:
reason = cron.get("reason", "not captured")
print(f" cron jobs: not in snapshot ({reason})")
print(
"\nThis will replace the current ~/.hermes/skills/ tree (a safety "
"snapshot of the current state is taken first so this is undoable)."
"snapshot of the current state is taken first so this is undoable). "
"Cron jobs that still exist will have their skills/skill fields "
"restored from the snapshot; all other cron fields are left alone."
)
if not getattr(args, "yes", False):

View file

@ -263,8 +263,11 @@ def run_doctor(args):
if env_path.exists():
check_ok(f"{_DHH}/.env file exists")
# Check for common issues
content = env_path.read_text()
# Check for common issues. Pin encoding to UTF-8 because .env files are
# written as UTF-8 everywhere in the codebase, while Path.read_text()
# defaults to the system locale — which crashes on non-UTF-8 Windows
# locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
content = env_path.read_text(encoding="utf-8")
if _has_provider_env_config(content):
check_ok("API key or custom endpoint configured")
else:

View file

@ -289,7 +289,7 @@ def _has_any_provider_configured() -> bool:
env_file = get_env_path()
if env_file.exists():
try:
for line in env_file.read_text().splitlines():
for line in env_file.read_text(encoding="utf-8").splitlines():
line = line.strip()
if line.startswith("#") or "=" not in line:
continue

View file

@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
existing_lines = []
if env_path.exists():
existing_lines = env_path.read_text().splitlines()
existing_lines = env_path.read_text(encoding="utf-8").splitlines()
updated_keys = set()
new_lines = []

View file

@ -1057,6 +1057,45 @@ def list_authenticated_providers(
if normed:
_builtin_endpoints.add(normed)
def _has_fast_aws_sdk_signal() -> bool:
"""Return True when explicit AWS auth config is present.
This intentionally avoids botocore's full credential chain. Provider
picker/model-switch discovery can run for non-Bedrock providers, and
botocore may otherwise probe EC2 IMDS (169.254.169.254) on local
machines before returning no credentials.
"""
if os.environ.get("AWS_BEARER_TOKEN_BEDROCK", "").strip():
return True
if (
os.environ.get("AWS_ACCESS_KEY_ID", "").strip()
and os.environ.get("AWS_SECRET_ACCESS_KEY", "").strip()
):
return True
return any(
os.environ.get(name, "").strip()
for name in (
"AWS_PROFILE",
"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
"AWS_CONTAINER_CREDENTIALS_FULL_URI",
"AWS_WEB_IDENTITY_TOKEN_FILE",
)
)
def _has_aws_sdk_creds_for_listing(slug: str) -> bool:
"""Credential check for AWS SDK providers in non-runtime discovery."""
slug_norm = str(slug or "").strip().lower()
current_norm = str(current_provider or "").strip().lower()
if _has_fast_aws_sdk_signal():
return True
if slug_norm != current_norm:
return False
try:
from agent.bedrock_adapter import has_aws_credentials
return bool(has_aws_credentials())
except Exception:
return False
data = fetch_models_dev()
# Build curated model lists keyed by hermes provider ID
@ -1184,7 +1223,9 @@ def list_authenticated_providers(
# Check if credentials exist
has_creds = False
if overlay.extra_env_vars:
if overlay.auth_type == "aws_sdk":
has_creds = _has_aws_sdk_creds_for_listing(hermes_slug)
elif overlay.extra_env_vars:
has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
# Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
if not has_creds and overlay.auth_type == "api_key":
@ -1324,11 +1365,7 @@ def list_authenticated_providers(
# credentials come from the boto3 credential chain (env vars,
# ~/.aws/credentials, instance roles, etc.)
if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
try:
from agent.bedrock_adapter import has_aws_credentials
_cp_has_creds = has_aws_credentials()
except Exception:
pass
_cp_has_creds = _has_aws_sdk_creds_for_listing(_cp.slug)
if not _cp_has_creds:
continue

View file

@ -1190,6 +1190,13 @@ def _setup_tts_provider(config: dict):
"Falling back to Edge TTS."
)
selected = "edge"
if selected == "xai":
print()
voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")
if voice_id and voice_id.strip():
config.setdefault("tts", {}).setdefault("xai", {})["voice_id"] = voice_id.strip()
print_success(f"xAI voice_id set to: {voice_id.strip()}")
elif selected == "minimax":
existing = get_env_value("MINIMAX_API_KEY")
@ -1643,7 +1650,11 @@ def setup_terminal_backend(config: dict):
def _apply_default_agent_settings(config: dict):
"""Apply recommended defaults for all agent settings without prompting."""
config.setdefault("agent", {})["max_turns"] = 90
save_env_value("HERMES_MAX_ITERATIONS", "90")
# config.yaml is the authoritative source for max_turns; the gateway
# bridges it into HERMES_MAX_ITERATIONS at startup. We no longer write
# to .env to avoid the dual-source inconsistency that caused the
# 60-vs-500 bug (stale .env entry silently shadowing config.yaml).
remove_env_value("HERMES_MAX_ITERATIONS")
config.setdefault("display", {})["tool_progress"] = "all"
@ -1673,9 +1684,10 @@ def setup_agent_settings(config: dict):
print()
# ── Max Iterations ──
current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
cfg_get(config, "agent", "max_turns", default=90)
)
# config.yaml is authoritative; read from there. If a legacy .env
# entry is still around (from pre-PR#18413 setups), prefer the
# config value so we don't surface a stale number to the user.
current_max = str(cfg_get(config, "agent", "max_turns", default=90))
print_info("Maximum tool-calling iterations per conversation.")
print_info("Higher = more complex tasks, but costs more tokens.")
print_info(
@ -1686,9 +1698,13 @@ def setup_agent_settings(config: dict):
try:
max_iter = int(max_iter_str)
if max_iter > 0:
save_env_value("HERMES_MAX_ITERATIONS", str(max_iter))
# Write to config.yaml (authoritative) only. Also clean up any
# stale .env entry from earlier setup runs — the gateway's
# bridge in gateway/run.py now unconditionally derives
# HERMES_MAX_ITERATIONS from agent.max_turns at startup.
config.setdefault("agent", {})["max_turns"] = max_iter
config.pop("max_turns", None)
remove_env_value("HERMES_MAX_ITERATIONS")
print_success(f"Max iterations set to {max_iter}")
except ValueError:
print_warning("Invalid number, keeping current value")

View file

@ -1822,7 +1822,7 @@ def _reconfigure_tool(config: dict):
cat = TOOL_CATEGORIES.get(ts_key)
reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
if cat or reqs:
if _toolset_has_keys(ts_key, config):
if _toolset_has_keys(ts_key, config) or _toolset_enabled_for_reconfigure(ts_key, config):
configurable.append((ts_key, ts_label))
if not configurable:
@ -1848,6 +1848,28 @@ def _reconfigure_tool(config: dict):
save_config(config)
def _toolset_enabled_for_reconfigure(ts_key: str, config: dict) -> bool:
"""Return True if a configurable toolset is enabled anywhere.
Reconfigure must include enabled-but-unconfigured categories so users can
finish provider/API-key setup without disabling and re-enabling the toolset.
"""
for platform in PLATFORMS:
if not _toolset_allowed_for_platform(ts_key, platform):
continue
try:
enabled = _get_platform_tools(
config,
platform,
include_default_mcp_servers=False,
)
except Exception:
continue
if ts_key in enabled:
return True
return False
def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
"""Reconfigure a tool category - provider selection + API key update."""
icon = cat.get("icon", "")

View file

@ -8,14 +8,64 @@ import os
from pathlib import Path
_profile_fallback_warned: bool = False
def get_hermes_home() -> Path:
"""Return the Hermes home directory (default: ~/.hermes).
Reads HERMES_HOME env var, falls back to ~/.hermes.
This is the single source of truth all other copies should import this.
When ``HERMES_HOME`` is unset but an ``active_profile`` file indicates
a non-default profile is active, logs a loud one-shot warning to
``errors.log`` so cross-profile data corruption is diagnosable instead
of silent. Behavior is unchanged otherwise we still return
``~/.hermes`` because raising here would brick 30+ module-level
callers that import this at load time. Subprocess spawners are
expected to propagate ``HERMES_HOME`` explicitly (see the systemd
template in ``hermes_cli/gateway.py`` and the kanban dispatcher in
``hermes_cli/kanban_db.py``). See https://github.com/NousResearch/hermes-agent/issues/18594.
"""
val = os.environ.get("HERMES_HOME", "").strip()
return Path(val) if val else Path.home() / ".hermes"
if val:
return Path(val)
# Guard: if a non-default profile is sticky-active, warn once that
# the fallback to the default profile is almost certainly wrong.
global _profile_fallback_warned
if not _profile_fallback_warned:
try:
# Inline the default-root resolution from get_default_hermes_root()
# to stay import-safe (this function is called from module scope
# in 30+ files; we cannot afford to trigger logging setup here).
active_path = (Path.home() / ".hermes" / "active_profile")
active = active_path.read_text().strip() if active_path.exists() else ""
except (UnicodeDecodeError, OSError):
active = ""
if active and active != "default":
_profile_fallback_warned = True
# Write directly to stderr. We intentionally do NOT route this
# through ``logging`` because (a) this function is called at
# module-import time from 30+ sites, often before logging is
# configured, and (b) root-logger propagation would double-emit
# on consoles where a StreamHandler is already attached.
import sys
msg = (
f"[HERMES_HOME fallback] HERMES_HOME is unset but active "
f"profile is {active!r}. Falling back to ~/.hermes, which "
f"is the DEFAULT profile — not {active!r}. Any data this "
f"process writes will land in the wrong profile. The "
f"subprocess spawner should pass HERMES_HOME explicitly "
f"(see issue #18594)."
)
try:
sys.stderr.write(msg + "\n")
sys.stderr.flush()
except Exception:
pass
return Path.home() / ".hermes"
def get_default_hermes_root() -> Path:

View file

@ -0,0 +1,206 @@
---
name: kanban-video-orchestrator
description: Plan, set up, and monitor a multi-agent video production pipeline backed by Hermes Kanban. Use when the user wants to make ANY video — narrative film, product/marketing, music video, explainer, ASCII/terminal art, abstract/generative loop, comic, 3D, real-time/installation — and the work warrants decomposition into specialized profiles (writer, designer, animator, renderer, voice, editor, etc.) coordinated through a kanban board. Performs adaptive discovery to scope the brief, designs an appropriate team for the requested style, generates the setup script that creates Hermes profiles + initial kanban task, then helps monitor execution and intervene when tasks stall or fail. Routes scenes to whichever Hermes rendering / audio / design skill fits each beat (`ascii-video`, `manim-video`, `p5js`, `comfyui`, `touchdesigner-mcp`, `blender-mcp`, `pixel-art`, `baoyu-comic`, `claude-design`, `excalidraw`, `songsee`, `heartmula`, …) plus external APIs for TTS, image-gen, and image-to-video as needed.
version: 1.0.0
author: [SHL0MS, alt-glitch]
license: MIT
metadata:
hermes:
tags: [video, kanban, multi-agent, orchestration, production-pipeline]
related_skills: [kanban-orchestrator, kanban-worker, ascii-video, manim-video, p5js, comfyui, touchdesigner-mcp, blender-mcp, pixel-art, ascii-art, songwriting-and-ai-music, heartmula, songsee, spotify, youtube-content, claude-design, excalidraw, architecture-diagram, concept-diagrams, baoyu-comic, baoyu-infographic, humanizer, gif-search, meme-generation]
credits: |
The single-project workspace layout, profile-config patching pattern,
SOUL.md-per-profile model, TEAM.md task-graph convention, and
`--workspace dir:<path>` discipline are adapted from alt-glitch's
original multi-agent video pipeline at
https://github.com/NousResearch/kanban-video-pipeline.
---
# Kanban Video Orchestrator
Wrap any video request — from a 15-second product teaser to a 5-minute narrative
short to a music video to an ASCII loop — in a Hermes Kanban pipeline that
decomposes the work to specialized agent profiles.
This skill does **not** render anything itself. It is a meta-pipeline that:
1. **Scopes** the request through targeted discovery
2. **Designs** an appropriate team (which roles, which tools per role) based on the style
3. **Generates** a setup script that creates Hermes profiles, project workspace, and the initial kanban task
4. **Hands off** to the director profile, which decomposes via the kanban
5. **Monitors** execution, helps intervene when tasks stall or fail
The actual rendering happens inside the kanban once it's running, via whichever
existing skills + tools fit the scenes — `ascii-video`, `manim-video`, `p5js`,
`comfyui`, `touchdesigner-mcp`, `blender-mcp`, `songwriting-and-ai-music`,
`heartmula`, external APIs, or plain Python with PIL + ffmpeg.
## When NOT to use this skill
- The video is one continuous procedural project that needs no specialists. Just write the code directly.
- The user wants a quick one-shot conversion (e.g. "convert this mp4 to a GIF") — use ffmpeg directly.
- The output is a static image, GIF, or audio-only artifact — use the matching specific skill (`ascii-art`, `gifs`, `meme-generation`, `songwriting-and-ai-music`).
- The work fits a single existing skill cleanly (e.g. a pure ASCII video — just use `ascii-video`).
## Workflow
```
DISCOVER → BRIEF → TEAM DESIGN → SETUP → EXECUTE → MONITOR
```
### Step 1 — Discover (ask the right questions)
The discovery process is **adaptive**: ask only what is actually needed. Always
start with three questions to identify the broad shape:
- **What is the video?** (one-sentence brief)
- **How long?** (5-30s teaser / 30-90s short / 90s-3min explainer / 3-10min film / longer)
- **What aspect ratio + target platform?** (1:1 / 9:16 / 16:9; X, IG, YouTube, internal, etc.)
From the answer, classify the style category. The style determines which
follow-up questions to ask. **Do not ask all questions at once.** Ask 2-4 at a
time, listen, then proceed. Make reasonable assumptions whenever the user
implies an answer.
For complete intake patterns and per-style question banks, see
**[references/intake.md](references/intake.md)**.
### Step 2 — Brief
Once enough is known, produce a structured `brief.md` using the template in
`assets/brief.md.tmpl`. Stages:
1. **Concept** — the one-sentence pitch + emotional north star
2. **Scope** — duration, aspect, platform, deadline
3. **Style** — visual references, brand constraints, tone
4. **Scenes** — beat-by-beat breakdown (durations, content, target tool)
5. **Audio** — narration / music / SFX / silent (per scene if needed)
6. **Deliverables** — file format, resolution, optional alternates (vertical cut, GIF, etc.)
Show the brief to the user for confirmation before designing the team. **The
brief is the contract** — every downstream task references it.
### Step 3 — Team design
Pick role archetypes from the library that fit this video. **Compose, don't
clone.** Most videos need 4-7 profiles. The director is always present; the
rest are picked by what the brief actually requires.
For the role library and per-style team compositions, see
**[references/role-archetypes.md](references/role-archetypes.md)**.
For mapping role → which Hermes skills + toolsets it loads, see
**[references/tool-matrix.md](references/tool-matrix.md)**.
### Step 4 — Setup
Generate a setup script (`setup.sh`) and run it. The script:
1. Creates the project workspace (`~/projects/video-pipeline/<slug>/`)
2. Copies any provided assets into `taste/`, `audio/`, `assets/`
3. Creates each Hermes profile via `hermes profile create --clone`
4. Writes per-profile `SOUL.md` (personality + role definition)
5. Configures profile YAML (toolsets, always_load skills, cwd)
6. Writes `brief.md`, `TEAM.md`, and `taste/` content
7. Fires the initial `hermes kanban create` task assigned to the director
Use `scripts/bootstrap_pipeline.py` to generate setup.sh from a brief +
team-design JSON. See **[references/kanban-setup.md](references/kanban-setup.md)**
for the setup script structure, profile config patterns, and the critical
"shared workspace" rule.
### Step 5 — Execute
Run `setup.sh`. Then provide the user with monitoring commands:
```bash
hermes kanban watch --tenant <project-tenant> # live events
hermes kanban list --tenant <project-tenant> # board snapshot
hermes dashboard # visual board UI
```
The director profile takes over from here, decomposing the work and routing
tasks to specialist profiles via the kanban toolset.
### Step 6 — Monitor and intervene
Stay engaged — the kanban runs autonomously but a stuck task or bad output
needs human (or AI) judgment.
Monitoring patterns: poll `kanban list` periodically, inspect any RUNNING task
that exceeds its expected duration with `kanban show <id>`, and check
heartbeats. When a worker's output fails review, the standard interventions are:
1. Comment on the worker's task with specific feedback (`kanban_comment`)
2. Create a re-run task with the original as parent
3. Adjust the brief's scope and let the director re-decompose
For diagnostic patterns, intervention recipes, and the "task is stuck"
playbook, see **[references/monitoring.md](references/monitoring.md)**.
## Reference: worked examples
Six concrete pipelines covering very different video styles — narrative film,
product/marketing, music video, math/algorithm explainer, ASCII video, real-time
installation — showing how the same workflow yields very different teams and
task graphs. See **[references/examples.md](references/examples.md)**.
## Critical rules
1. **Discovery before action.** Never start generating a brief or team without
asking at least the three baseline questions. A bad brief cascades through
the entire pipeline.
2. **Match the team to the video.** Don't reuse the same 4-profile setup for
every job. A music video that doesn't have a beat-analysis profile will
misfire. A narrative film that doesn't have a writer profile will produce
incoherent scenes. See `references/role-archetypes.md`.
3. **One workspace per project.** All profiles for a given video share the same
`dir:` workspace. Tasks pass artifacts via shared filesystem and structured
handoffs. **Every** `kanban_create` call passes
`workspace_kind="dir"` + `workspace_path="<absolute project path>"`.
4. **Tenant every project.** Use a project-specific tenant
(`--tenant <project-slug>`). Keeps the dashboard scoped and prevents
cross-pollination with other ongoing kanbans.
5. **Respect existing skills.** When a scene fits an existing skill, the
relevant renderer should load that skill via `--skill <name>` on its task
or `always_load` in its profile. Do not re-derive what a skill already
provides.
6. **The director never executes.** Even with the full `kanban + terminal +
file` toolset, the director's `SOUL.md` rules forbid it from executing
work itself. It decomposes and routes only — every concrete task becomes
a `hermes kanban create` call to a specialist profile. The
`kanban-orchestrator` skill spells this out further.
7. **Don't over-decompose.** A 30-second product video does NOT need 20 tasks.
Aim for the smallest task graph that still parallelizes well and exposes the
right human-review gates.
8. **Verify API keys BEFORE firing.** External APIs (TTS, image-gen,
image-to-video) need keys in `~/.hermes/.env` or the user's secret store.
A worker that hits a missing-key error wastes a task slot. The setup
script's `check_key` helper aborts cleanly if a required key is missing.
## File map
```
SKILL.md ← this file (workflow + rules)
references/
intake.md ← discovery question banks per style
role-archetypes.md ← role library (writer, designer, animator, …)
tool-matrix.md ← skill + toolset mapping per role
kanban-setup.md ← setup script structure & profile config
monitoring.md ← watch + intervene patterns
examples.md ← six worked pipelines
assets/
brief.md.tmpl ← brief skeleton
setup.sh.tmpl ← setup script skeleton
soul.md.tmpl ← profile personality skeleton
scripts/
bootstrap_pipeline.py ← generate setup.sh from brief + team JSON
monitor.py ← polling + intervention helpers
```

View file

@ -0,0 +1,79 @@
# Video Brief — {{TITLE}}
> Slug: `{{SLUG}}` · Tenant: `{{TENANT}}` · Project workspace: `{{WORKSPACE}}`
## 1. Concept
**One-line pitch.** {{ONE_LINE_PITCH}}
**Emotional north star.** {{EMOTIONAL_NORTH_STAR}}
*(What should the viewer feel walking away?)*
## 2. Scope
| | |
|---|---|
| Duration | {{DURATION_S}} seconds |
| Aspect ratio | {{ASPECT}} |
| Resolution | {{RESOLUTION}} |
| Frame rate | {{FPS}} fps |
| Target platforms | {{PLATFORMS}} |
| Deadline | {{DEADLINE}} |
| Quality bar | {{QUALITY_BAR}} *(rough draft / polished / archival)* |
## 3. Style
**Visual references.** {{VISUAL_REFS}}
**Tone.** {{TONE}}
**Brand constraints.** {{BRAND_CONSTRAINTS}}
*(colors, typography, motion language; or "n/a")*
**Aesthetic rules.**
{{AESTHETIC_RULES}}
## 4. Scenes
Beat-by-beat breakdown. Each scene gets a row.
| # | Time | Content | Target tool / skill | Audio | Notes |
|---|------|---------|---------------------|-------|-------|
| 1 | 0:000:0X | {{SCENE_1_CONTENT}} | {{SCENE_1_TOOL}} | {{SCENE_1_AUDIO}} | {{SCENE_1_NOTES}} |
| 2 | 0:0X0:0Y | ... | ... | ... | ... |
## 5. Audio
**Approach.** {{AUDIO_APPROACH}}
*(narration / music-only / synced to track / silent / mixed)*
**Voiceover.** {{VO_DETAILS}}
*(provider, voice, language, script source — "n/a" if no VO)*
**Music.** {{MUSIC_DETAILS}}
*(provided track path / commission via Suno / commission via heartmula /
license-free / "n/a")*
**SFX.** {{SFX_DETAILS}}
*(generated, library, or "n/a")*
## 6. Deliverables
| Format | Resolution | Notes |
|--------|-----------|-------|
| {{PRIMARY_FORMAT}} | {{PRIMARY_RES}} | The main output |
| {{ALT_FORMAT_1}} | {{ALT_RES_1}} | {{ALT_NOTES_1}} |
**Final filename.** `output/final.mp4`
*(plus optional `output/final-9x16.mp4`, `output/captions.srt`, etc.)*
## 7. Constraints
- API keys required: {{API_KEYS_REQUIRED}}
- External dependencies: {{EXT_DEPS}}
- Source assets to incorporate: {{SOURCE_ASSETS}}
---
**This brief is the contract. The director and every downstream profile read
it. If the brief changes, the kanban must be re-fired — don't edit live.**

View file

@ -0,0 +1,185 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════════════
# Video Pipeline Setup — {{TITLE}}
#
# Generated by kanban-video-orchestrator skill.
#
# Slug: {{SLUG}}
# Workspace: {{WORKSPACE}}
# Tenant: {{TENANT}}
# ═══════════════════════════════════════════════════════════════════════
set -euo pipefail
PROJECT_SLUG="{{SLUG}}"
WORKSPACE="$HOME/projects/video-pipeline/${PROJECT_SLUG}"
TENANT="{{TENANT}}"
# ─────────────────────────────────────────────────────────────────────
# 1. Verify required API keys
# ─────────────────────────────────────────────────────────────────────
echo "═══ Checking required API keys ═══"
check_key() {
local var="$1"
local kc_account="${2:-hermes}"
local kc_service="${3:-$1}"
if grep -q "^${var}=" "$HOME/.hermes/.env" 2>/dev/null && \
[ -n "$(grep "^${var}=" "$HOME/.hermes/.env" | cut -d= -f2-)" ]; then
echo " ✓ ${var} (env)"
return 0
fi
if command -v security >/dev/null 2>&1 && \
security find-generic-password -a "${kc_account}" -s "${kc_service}" -w >/dev/null 2>&1; then
echo " ✓ ${var} (Keychain ${kc_account}/${kc_service})"
return 0
fi
echo " ✗ ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})"
return 1
}
# Customize this list per project — only check keys actually used:
{{KEY_CHECKS}}
# ─────────────────────────────────────────────────────────────────────
# 2. Create project workspace
# ─────────────────────────────────────────────────────────────────────
echo "═══ Creating project workspace ═══"
mkdir -p "$WORKSPACE"/{taste,audio/{voiceover,sfx},assets,scenes,checkpoints,tools,output}
{{SCENE_DIRS}}
echo " ✓ $WORKSPACE"
# ─────────────────────────────────────────────────────────────────────
# 3. Create Hermes profiles
# ─────────────────────────────────────────────────────────────────────
echo "═══ Creating Hermes profiles ═══"
{{PROFILE_CREATE_COMMANDS}}
# ─────────────────────────────────────────────────────────────────────
# 4. Configure profiles (toolsets, skills, cwd)
# ─────────────────────────────────────────────────────────────────────
echo "═══ Configuring profiles ═══"
configure_profile() {
local profile="$1"
local toolsets_json="$2" # JSON array string, e.g. '["kanban","terminal","file"]'
local skills_json="$3" # JSON array string, e.g. '["kanban-worker","ascii-video"]'
python3 - "$profile" "$toolsets_json" "$skills_json" "$WORKSPACE" <<'PY'
"""Patch a Hermes profile config.yaml using PyYAML so we don't depend on the
exact default-config string format. Validates the patch took effect and exits
non-zero if anything's off."""
import json
import os
import sys
try:
import yaml
except ImportError:
print("ERROR: PyYAML required. pip install pyyaml", file=sys.stderr)
sys.exit(1)
profile, toolsets_json, skills_json, workspace = sys.argv[1:5]
toolsets = json.loads(toolsets_json)
skills = json.loads(skills_json)
p = os.path.expanduser(f"~/.hermes/profiles/{profile}/config.yaml")
if not os.path.exists(p):
print(f" ✗ profile config not found: {p}", file=sys.stderr)
sys.exit(1)
with open(p) as f:
cfg = yaml.safe_load(f) or {}
# Apply our changes — only the keys we actually want to set.
cfg["toolsets"] = toolsets
cfg.setdefault("skills", {})
cfg["skills"]["always_load"] = skills
# Note: we do NOT touch cfg["approvals"] — that's a security-sensitive
# setting (manual confirmation of tool calls). Workspace cwd is overridden
# per-task by `--workspace dir:<path>` on `hermes kanban create`, so we
# don't need to mutate cfg["terminal"]["cwd"] either.
with open(p, "w") as f:
yaml.safe_dump(cfg, f, sort_keys=False)
# Validate
with open(p) as f:
after = yaml.safe_load(f)
errors = []
if after.get("toolsets") != toolsets:
errors.append(f"toolsets mismatch: {after.get('toolsets')!r}")
if after.get("skills", {}).get("always_load") != skills:
errors.append(f"skills.always_load mismatch: {after.get('skills', {}).get('always_load')!r}")
if errors:
print(f" ✗ {profile}: " + "; ".join(errors), file=sys.stderr)
sys.exit(1)
PY
if [ $? -ne 0 ]; then
echo " ✗ failed to configure ${profile}" >&2
exit 1
fi
echo " ✓ ${profile}"
}
{{PROFILE_CONFIG_COMMANDS}}
# ─────────────────────────────────────────────────────────────────────
# 5. Write SOUL.md per profile
# ─────────────────────────────────────────────────────────────────────
echo "═══ Writing profile personalities ═══"
{{SOUL_WRITES}}
# ─────────────────────────────────────────────────────────────────────
# 6. Copy brief, TEAM.md, and any provided assets
# ─────────────────────────────────────────────────────────────────────
echo "═══ Writing brief + taste ═══"
cat > "$WORKSPACE/brief.md" <<'BRIEF_EOF'
{{BRIEF_CONTENTS}}
BRIEF_EOF
cat > "$WORKSPACE/TEAM.md" <<'TEAM_EOF'
{{TEAM_CONTENTS}}
TEAM_EOF
{{TASTE_WRITES}}
{{ASSET_COPIES}}
# ─────────────────────────────────────────────────────────────────────
# 7. Fire the initial kanban task
# ─────────────────────────────────────────────────────────────────────
echo "═══ Firing initial kanban task ═══"
hermes kanban create "Direct production of {{TITLE}}" \
--assignee director \
--workspace dir:"$WORKSPACE" \
--tenant "$TENANT" \
--priority 2 \
--max-runtime 4h \
--body "$(cat <<EOF
Read brief.md, TEAM.md, and taste/.
Decompose into the team graph defined in TEAM.md.
All child tasks MUST use:
workspace_kind="dir"
workspace_path="$WORKSPACE"
tenant="$TENANT"
Do not execute the work yourself — route every concrete subtask to the
appropriate profile via kanban_create.
EOF
)"
echo ""
echo "═══ Setup complete ═══"
echo ""
echo "Monitor with:"
echo " hermes kanban watch --tenant $TENANT"
echo " hermes kanban list --tenant $TENANT"
echo " hermes dashboard"
echo ""
echo "Workspace: $WORKSPACE"

View file

@ -0,0 +1,38 @@
# {{ROLE_NAME}}
You are the **{{ROLE_NAME}}** for this video production.
## Project context
- **Brief:** read `brief.md` in your CWD
- **Team graph:** read `TEAM.md` in your CWD
- **Style spec:** read `taste/brand-guide.md` and `taste/emotional-dna.md` in
your CWD
## What you do
{{ROLE_RESPONSIBILITIES}}
## Inputs you read
{{INPUTS_READ}}
## Outputs you produce
{{OUTPUTS_PRODUCED}}
## Tools and skills available
- **Toolsets:** {{TOOLSETS}}
- **Skills loaded:** {{SKILLS}}
- **External APIs / CLIs:** {{EXTERNAL_TOOLS}}
## Rules
{{ROLE_RULES}}
{{COMMON_RULES}}
## Common reference commands
{{COMMON_COMMANDS}}

View file

@ -0,0 +1,227 @@
# Worked Examples
Six concrete pipelines covering different video styles. Each shows the team
composition, task graph, and skill/tool choices the orchestrator would make
for that brief. **These are illustrative, not templates** — adapt to the
actual brief.
## Example 1 — Narrative short film (text-to-image → image-to-video → cut)
**Brief:** A 90-second noir-style short. A detective walks through a rainy
city. Voiceover narration. AI-generated visuals.
**Team:**
- `director` — vision, decomposition, approval
- `writer` — script + voiceover copy (loads `humanizer` for natural voice)
- `storyboarder` — beat-by-beat shot list (loads `excalidraw`)
- `image-generator` — generates each shot's still via local ComfyUI workflows
(loads `comfyui`)
- `image-to-video-generator` — animates each still (Runway/Kling, OR
ComfyUI's AnimateDiff/WAN workflows via `comfyui`)
- `voice-talent` — narration via ElevenLabs
- `audio-mixer` — VO + ambient pad
- `editor` — assembly + transitions
- `reviewer` — final QA
**Task graph:**
```
T0 director decompose
T1 writer script + voiceover.md (parent: T0)
T2 storyboarder shot list with framing per beat (parent: T1)
T3 image-generator one still per shot (~12 shots) (parent: T2)
T4 image-to-video animate each still (parent: T3)
T5 voice-talent generate narration audio (parent: T1)
T6 audio-mixer mix VO + ambient (parent: T5)
T7 editor cut + transitions + audio mux (parents: T4, T6)
T8 reviewer final QA (parent: T7)
```
**Key choices:**
- Local ComfyUI via `comfyui` skill is preferred over external API for
cost/control — but external APIs are fine if ComfyUI isn't installed
- `editor` profile is ffmpeg-only, no Hermes skill required beyond
`kanban-worker`
- Storyboarder produces `storyboard.excalidraw` alongside the markdown
## Example 2 — Product / marketing teaser
**Brief:** A 30-second product teaser for a developer tool. Shows code +
terminal + UI screen recordings, voiceover, CTA at end. Square 1:1.
**Team:**
- `director`
- `copywriter` — taglines, voiceover script, CTA (loads `humanizer`)
- `concept-artist` — style frames (loads `claude-design` for UI mockups)
- `renderer-motion-graphics` — animated UI sequences (Remotion CLI)
- `renderer-ascii` — terminal-style demo scenes (loads `ascii-video`)
- `voice-talent` — VO via ElevenLabs
- `editor` — assembly + brand-color treatment
- `audio-mixer` — VO + light music bed
- `captioner` — burned subtitles for muted-autoplay platforms
- `masterer` — produces 1:1 + 9:16 + 16:9 variants
**Task graph:**
```
T0 director decompose
T1 copywriter copy.md + cta + vo script (parent: T0)
T2 concept-artist visual-spec.md + style frames (parent: T1)
T3a renderer-motion-graphics scene 1: UI sequence (parent: T2)
T3b renderer-ascii scene 2: terminal demo (parent: T2)
T3c renderer-motion-graphics scene 3: feature highlight (parent: T2)
T3d renderer-motion-graphics scene 4: CTA card (parent: T2)
T4 voice-talent narration (parent: T1)
T5 audio-mixer VO + music bed (parent: T4)
T6 editor cut + transitions (parents: T3*, T5)
T7 captioner SRT + burned subtitles (parent: T6)
T8 masterer 1:1, 9:16, 16:9 variants (parent: T7)
```
**Key choices:**
- Multiple specialized renderers (motion-graphics + ASCII) coexist
- Captioner is included because muted autoplay is the norm on social
- `claude-design` skill for UI mockups maps directly to the product video idiom
## Example 3 — Music video (synced to provided track)
**Brief:** A 3-minute music video for a provided lo-fi hip-hop track. Visuals
should pulse with the beat. Generative + ASCII hybrid. Vertical 9:16.
**Team:**
- `director`
- `music-supervisor` — analyze track, emit `audio/beats.json` (loads `songsee`)
- `storyboarder` — beat-aligned shot list (loads `excalidraw`)
- `renderer-ascii` — ASCII scenes synced to bass kicks (loads `ascii-video`)
- `renderer-p5js` — generative particle scenes synced to highs (loads `p5js`)
- `editor` — beat-cut assembly using `beats.json`
- `reviewer` — sync QA
**Task graph:**
```
T0 director decompose
T1 music-supervisor analyze track → beats.json + spectrogram (parent: T0)
T2 storyboarder shot list aligned to beats (parents: T1, T0)
T3a renderer-ascii scene 1: bass-driven ASCII (parent: T2)
T3b renderer-p5js scene 2: high-end particle field (parent: T2)
... (more scenes)
T4 editor cut to beats + mux track (parents: T3*, T1)
T5 reviewer sync QA + final approval (parent: T4)
```
**Key choices:**
- `music-supervisor` runs FIRST — `beats.json` gates the renderers
- `editor` uses `beats.json` directly to align cuts to bass kicks
- No voice-talent — music is the audio
- Two specialized renderers (`ascii-video` + `p5js`) for visual variety
## Example 4 — Math/algorithm explainer
**Brief:** A 2-minute explainer of an algorithm. 3Blue1Brown-style. Animated
diagrams, equations, narration. Square 1:1.
**Team:**
- `director`
- `writer` — narration script (loads `humanizer`)
- `cinematographer` — visual spec (loads `manim-video`)
- `renderer-manim` — all animated scenes (loads `manim-video`)
- `voice-talent` — narration via ElevenLabs
- `editor` — assembly + audio mux
- `captioner` — burned subtitles
**Task graph:**
```
T0 director decompose
T1 writer script + narration (parent: T0)
T2 cinematographer visual spec for all scenes (parent: T1)
T3a-Tn renderer-manim scenes 1..N (parents: T2)
T4 voice-talent narration audio (parent: T1)
T5 editor cut + mux (parents: T3*, T4)
T6 captioner SRT + burn (parent: T5)
```
**Key choices:**
- `manim-video` skill drives both the cinematographer (visual language) and
the renderer (actual scene production)
- The `manim-video` skill's reference docs (animation-design-thinking,
scene-planning, equations) auto-load when needed via the renderer's pinned skill
## Example 5 — ASCII video, music-track-only
**Brief:** A 60-second pure-ASCII video reactive to an existing track. No
voiceover, no other tools. Square 1:1.
**Team:**
- `director`
- `music-supervisor` — track analysis (loads `songsee`)
- `renderer-ascii` — all visuals (loads `ascii-video`)
- `editor` — assembly + audio mux
**Task graph:**
```
T0 director decompose
T1 music-supervisor analyze track (parent: T0)
T2a renderer-ascii scene 1 (parents: T1, T0)
T2b renderer-ascii scene 2 (parents: T1, T0)
T2c renderer-ascii scene 3 (parents: T1, T0)
T3 editor stitch + mux audio (parents: T2*)
```
**Key choices:**
- Minimal team (4 profiles) for a focused single-tool project
- No reviewer — short experimental piece, director approves directly
- All scenes run through one `renderer-ascii` profile because the `ascii-video`
skill covers everything
This example illustrates the rule: **don't over-decompose**. Three scenes
through one renderer is fine. Don't spawn three renderer profiles.
## Example 6 — Real-time / installation art
**Brief:** A 2-minute audio-reactive visual for a gallery installation. Driven
by an audio input feed. TouchDesigner-based. 16:9 4K.
**Team:**
- `director`
- `cinematographer` — visual language spec (loads `touchdesigner-mcp`)
- `renderer-touchdesigner` — all visuals + record-to-disk
(loads `touchdesigner-mcp`)
- `audio-mixer` — final loudness pass on the captured audio (optional if
pre-mixed source)
- `editor` — assemble final clip from TouchDesigner recording
- `reviewer` — visual QA
**Task graph:**
```
T0 director decompose
T1 cinematographer TD operator graph spec (parent: T0)
T2 renderer-touchdesigner build TD network + record output (parent: T1)
T3 editor trim + audio mux (parent: T2)
T4 reviewer final QA (parent: T3)
```
**Key choices:**
- `touchdesigner-mcp` controls a running TouchDesigner instance — the
cinematographer designs the operator graph, renderer builds it
- Output is a recording from the running TD network, not a render-to-frames
process; editor mostly just trims
## Pattern recognition
When the user describes a video, look for these signals to map to an example:
- **Plot, characters, scripted dialogue** → Example 1 (narrative)
- **Specific product, CTA, brand colors, voiceover** → Example 2 (marketing)
- **Track file provided, "synced to music"** → Example 3 (music video)
- **"Explain how X works", math/algorithm/concept walkthrough** → Example 4 (manim explainer)
- **Terminal aesthetic, ASCII, retro pixel** → Example 5 (ASCII)
- **"Audio-reactive", "real-time", "installation"** → Example 6 (TouchDesigner)
- **Comic-style narrative** → use `renderer-comic` (`baoyu-comic` skill)
- **Retro game / pixel-art aesthetic** → use `renderer-pixel` (`pixel-art` skill)
- **3D scene, photoreal environment** → use `renderer-3d` (`blender-mcp`)
- **Generative art, particle system, shader** → use `renderer-p5js` (`p5js`)
- **AI-generated photoreal stills + animation** → use `renderer-comfyui`
(`comfyui`) for both stills and image-to-video
- **"video about how the system works", recursive demo** → composable from
any of the above; the recursion is a rendering technique, not a style
The actual team should be derived from the specific brief — these examples are
starting points, not endpoints.

View file

@ -0,0 +1,166 @@
# Intake — Discovery Question Banks
The discovery process is **adaptive**. Always start with three baseline
questions to identify the broad style category, then drill into a per-style
question bank. Ask 2-4 questions at a time, listen, then proceed. Make
reasonable assumptions whenever the user implies an answer.
## Tier 0 — Baseline (always ask)
1. **What is the video?** — One-sentence pitch
2. **How long?** — Approximate duration
3. **Aspect ratio + target platform?** — 16:9 / 9:16 / 1:1 / 4:5; X, IG, YouTube, internal, etc.
From these answers, classify the style category and pick the relevant Tier 1
follow-ups. **Do not** continue asking until you have at least these three.
## Style classification
Map the brief to one of these archetypes (or a hybrid):
| Archetype | Tells |
|-----------|-------|
| **Narrative film** | Plot, characters, scenes-with-events, dialogue, location |
| **Product / marketing** | A specific product or feature being shown / sold; CTA at end |
| **Music video** | A specific track exists; visuals sync to music |
| **Explainer / educational** | A concept being taught; voiceover-driven |
| **Tutorial / changelog** | Software demo, terminal-heavy, technical |
| **ASCII / terminal art** | Retro terminal aesthetic explicit, character-grid |
| **Abstract / loop** | Generative, no plot, often perfect-loop |
| **Documentary / interview cut** | Real footage, transcription-driven |
| **Real-time / installation** | Audio-reactive, gallery installation, VJ output |
If ambiguous, **ask** which category fits — don't guess. Hybrids are common
(e.g., a product video with a narrative arc); decompose into the dominant
mode + secondary modifiers.
**Recursive / meta** ("a video that shows its own production") is a
*rendering technique*, not a separate style — compose it from any of the
above by adding a two-pass render step where pass 2 uses pass 1's output as
texture inside the final scene.
## Tier 1 — Per-style follow-ups
### Narrative film
- **Setting / world?** — When and where the story takes place
- **Characters?** — How many, archetypes, who carries dialogue
- **Beat list or full script?** — Has the user written the story or do we draft it
- **Dialogue language?** — Spoken lines, on-screen subs only, silent
- **Visual generation approach?** — Text-to-image (FAL/Midjourney/Imagen) →
image-to-video (Runway/Kling), 3D animation (Blender), 2D animation,
procedural, or hybrid
- **Voice approach?** — TTS (which voice), recorded VO, no dialogue
- **Music / score?** — Commissioned (via `songwriting-and-ai-music` Suno
prompts, or local `heartmula`), licensed track provided, silent
### Product / marketing
- **Product?** — Name, what it does, key feature being shown
- **Target audience?** — Who's watching, what they care about
- **CTA?** — Visit URL, install, sign up, etc.
- **Tone?** — Serious, playful, technical, premium, edgy
- **Brand assets available?** — Logo files, color palette, fonts, existing footage
- **Animation style?** — Motion graphics (Remotion / AE-style), screen recording,
generative, illustrated
- **Voiceover?** — Yes (which voice / language) or text-only
- **Music?** — Track provided, license-free needed, custom-composed
### Music video
- **Track file?** — Path to the audio (essential — we'll analyze BPM + beats)
- **Track length to use?** — Full song or a section
- **Genre / energy?** — Tells what visual rhythm and density to use
- **Lyric / narrative content?** — Are there lyrics to render on screen,
or is it purely visual?
- **Visual reference style?** — Existing music videos / artists for reference
- **Performer footage?** — None, has clips, will provide
- **Visual generation approach?** — Per-beat generative, edit-driven cuts of stock
footage, illustrated, hybrid
### Explainer / educational
- **What concept is being taught?** — One-sentence concept, key takeaway
- **Audience expertise?** — Beginner / intermediate / expert
- **Diagram density?** — Heavy math / formulas / code / abstract concepts
- **Voiceover?** — TTS / recorded / on-screen text only
- **Tool preference?**`manim-video` (math), `p5js` (generative),
Remotion (UI motion graphics), `comfyui` (AI-generated visuals),
`ascii-video` (technical/retro), hybrid
- **Pacing?** — Fast and dense (3Blue1Brown) or slow and contemplative
### Tutorial / changelog / software demo
- **Software being demonstrated?** — Name, what it does
- **Demo script?** — Sequence of commands / screens to show
- **Terminal-only or with GUI?**
- **Voiceover for narration?**
- **Diagram support needed?** — Often these benefit from a diagram skill
alongside the screen-capture/render step (`excalidraw`,
`architecture-diagram`, `concept-diagrams`)
### ASCII / terminal art
- **Source material?** — Generative / driven by audio / converting existing
video / static image starting point
- **Color palette?** — Brand-driven (gold/black/blue), Matrix green, full
rainbow, monochrome
- **Audio reactivity?** — None / loose mood / tight beat sync / FFT-driven
- **Character set?** — ASCII only / Unicode block-drawing / mystic glyphs
- **Loop or narrative?** — Perfect loop or one-shot
### Abstract / loop
- **Mood / emotion?** — One word that captures the feel
- **Motion type?** — Zoom-into-itself, particle drift, wave, geometric, organic
- **Loop required?** — Perfect loop (Droste-style) or just satisfying ending
- **Audio?** — Silent, ambient pad, beat-synced
### Documentary / interview cut
- **Source footage?** — Provided clips, length per clip
- **Transcript / subtitles?** — Provided or to be generated
- **Story structure?** — Chronological / thematic / arc
- **B-roll approach?** — Generated, stock library, none
### Real-time / installation
- **Output environment?** — Gallery wall, projector, screen, web embed
- **Audio source?** — Live audio input, pre-recorded track, both
- **Reactivity tightness?** — Mood-level (loose) vs. tight beat-sync vs. live
parameter control
- **Tool preference?**`touchdesigner-mcp` for full TD operator graphs;
`p5js` for web-canvas; `comfyui` for generative-AI fed by audio features
## Tier 2 — Always ask near the end
- **Brand assets path?** — Where logo / color palette / fonts / music library lives
- **Output format requirements?** — Codec preference, target file size, accepted
alternates (vertical cut, GIF, audio-only)
- **Deadline?** — Affects task `max_runtime_seconds` and acceptable scope
- **Quality bar?** — Rough draft for review / polished final / archival
- **Existing footage / assets to reuse?** — Anything that should appear, not just inform
## Reasonable assumption defaults
When the user under-specifies, fill in these defaults rather than asking:
| Question | Default |
|----------|---------|
| Frame rate | 30 fps for X / IG; 60 fps for tutorials/explainers; 24 fps for narrative film |
| Resolution | 1080×1080 for square, 1920×1080 for 16:9, 1080×1920 for 9:16 |
| Codec | H.264 / yuv420p, CRF 18 |
| Audio codec | AAC 192 kbps |
| Voice | Provider's mid-range neutral voice unless brand calls for distinctive timbre |
| Music | Silent (require user to specify if music is wanted) |
| Captions | On for explainer/tutorial; off for narrative/abstract unless requested |
| Quality bar | Polished final unless user says draft |
State the assumption explicitly: *"Assuming 30fps and AAC audio unless you say otherwise — proceed?"*
## Anti-patterns
- **Asking 10 questions at once.** Maximum 4 per turn.
- **Asking for things the brief already implies.** If the user said "music video for my track," do not ask "is there a track?"
- **Failing to classify before drilling in.** Tier-1 questions depend on classification; mixing them up wastes turns.
- **Treating "make a video" as enough to proceed.** Always confirm the three baseline questions.

View file

@ -0,0 +1,276 @@
# Kanban Setup — Project Bootstrap & Profile Configuration
Once the brief is locked and the team is designed, the next step is producing
the actual `setup.sh` that creates the project workspace, configures Hermes
profiles, and fires the initial kanban task.
This file documents the patterns. The companion script
`scripts/bootstrap_pipeline.py` automates most of it from a structured input
JSON.
> **Credit:** the single-project-workspace layout, profile-config patching
> approach, SOUL.md-per-profile convention, and `--workspace dir:<path>` rule
> are adapted from alt-glitch's original multi-agent video pipeline:
> [NousResearch/kanban-video-pipeline](https://github.com/NousResearch/kanban-video-pipeline).
> This skill generalizes those patterns across video styles and replaces the
> string-replacement config patcher with a PyYAML-based one.
## Project workspace structure
Every video project gets one workspace under `~/projects/video-pipeline/<slug>/`:
```
~/projects/video-pipeline/<slug>/
├── brief.md ← the contract; all tasks reference
├── TEAM.md ← team composition + task graph (director reads this)
├── taste/
│ ├── brand-guide.md ← color, typography, motion rules
│ ├── emotional-dna.md ← what the piece should FEEL like
│ └── style-frames/ ← optional: visual references
├── audio/
│ ├── track.mp3 ← provided music (if any)
│ ├── voiceover/ ← per-line TTS clips
│ └── sfx/ ← sound effects
├── assets/
│ ├── logos/
│ ├── fonts/
│ └── existing-footage/ ← reusable provided clips
├── scenes/
│ ├── scene-01/
│ │ ├── VISUAL_SPEC.md ← cinematographer's per-scene spec
│ │ ├── render.py ← renderer's code (or sketch.html, etc.)
│ │ ├── checkpoints/ ← preview frames for QA
│ │ └── clip.mp4 ← the deliverable for this scene
│ ├── scene-02/...
│ └── ...
├── checkpoints/ ← global review frames
├── tools/ ← optional project-local helpers
└── output/
├── final.mp4 ← stitched + audio
├── final-noaudio.mp4
├── final-9x16.mp4 ← optional: vertical alternate
└── captions.srt ← optional: subtitle file
```
**The slug** is derived from the brief title: lowercase, hyphen-separated.
Example: `q3-product-teaser`, `ascii-mood-loop`, `interview-cut-2026-q1`.
## The setup.sh script
The setup script does six things in order:
1. **Create workspace tree** — all directories above
2. **Create profiles**`hermes profile create <name> --clone`
3. **Configure profiles** — patch each profile's
`~/.hermes/profiles/<name>/config.yaml` to set toolsets, always_load skills,
and `cwd`
4. **Write SOUL.md per profile** — the personality + role definition
5. **Copy any provided assets + write `brief.md`, `TEAM.md`, and `taste/`**
6. **Fire the initial kanban task**`hermes kanban create` assigned to the director
See `assets/setup.sh.tmpl` for the skeleton.
### Profile creation pattern
```bash
hermes profile create director --clone 2>/dev/null || true
```
The `--clone` flag clones from the active profile (preserving model, base
config). The `|| true` makes the script idempotent — re-running won't error if
the profile already exists.
### Profile config patching
Each profile has a YAML config at `~/.hermes/profiles/<name>/config.yaml`. The
setup script edits exactly two keys:
1. `toolsets:` — replace the default with the role's required toolsets
2. `skills.always_load:` — list the role's must-load skills (may be empty)
**Do NOT** modify `approvals.mode` (controls user-confirmation of tool calls
— a security setting that must stay as the user configured it). **Do NOT**
modify `terminal.cwd` — the kanban dispatcher overrides cwd per-task via
`--workspace dir:<path>`, so the profile's cwd is irrelevant to the kanban
work and changing it could break the user's interactive use of the profile.
Use **PyYAML**, not string replacement, so the patch is robust against
default-config schema drift:
```bash
configure_profile() {
local profile="$1"
local toolsets_json="$2" # JSON array, e.g. '["kanban","terminal","file"]'
local skills_json="$3" # JSON array, e.g. '["kanban-worker","ascii-video"]'
python3 - "$profile" "$toolsets_json" "$skills_json" <<'PY'
import json, os, sys, yaml
profile, ts_json, sk_json = sys.argv[1:4]
p = os.path.expanduser(f"~/.hermes/profiles/{profile}/config.yaml")
with open(p) as f:
cfg = yaml.safe_load(f) or {}
cfg["toolsets"] = json.loads(ts_json)
cfg.setdefault("skills", {})["always_load"] = json.loads(sk_json)
with open(p, "w") as f:
yaml.safe_dump(cfg, f, sort_keys=False)
PY
}
```
PyYAML must be installed in the user's Python (it ships with most Hermes
installs). If absent: `pip install pyyaml`.
The setup script should also **validate** the patch by re-reading the file
and comparing — see `assets/setup.sh.tmpl` for the validation pattern.
### SOUL.md per profile
Each profile gets a `SOUL.md` at `~/.hermes/profiles/<name>/SOUL.md` that
defines its role, voice, and rules. See `assets/soul.md.tmpl` for the
template. Customize per role and per project.
The director's SOUL.md should be the most opinionated — its voice flavors
the entire production. **Critical content for the director's SOUL.md:**
- **Anti-temptation rules:** "Do not execute the work yourself. For every
concrete task, create a kanban task and assign it. Decompose, route, comment,
approve — that's the whole job." (The `kanban-orchestrator` skill provides
the deeper playbook; load it.)
- **Decomposition steps:** Read `brief.md`, `TEAM.md`, `taste/`. Use the team
graph in `TEAM.md` to fan out tasks.
- **The workspace_path rule** (see below).
Other profiles' SOUL.md is briefer; mostly mechanical: who you are, what you
read, what you produce, what skills/tools to use, where to write outputs.
Most non-director profiles should `always_load: kanban-worker` for the
deeper-than-baseline kanban guidance.
### Initial kanban task
The final action of setup.sh is firing the kanban:
```bash
hermes kanban create "Direct production of <video title>" \
--assignee director \
--workspace dir:"$HOME/projects/video-pipeline/${PROJECT_SLUG}" \
--tenant ${PROJECT_SLUG} \
--priority 2 \
--max-runtime 4h \
--body "$(cat <<EOF
Read brief.md, TEAM.md, and taste/.
Decompose into the team graph defined in TEAM.md.
All child tasks MUST use:
workspace_kind="dir"
workspace_path="$HOME/projects/video-pipeline/${PROJECT_SLUG}"
tenant="${PROJECT_SLUG}"
EOF
)"
```
The `--workspace dir:<path>` flag is **critical** — it tells the kanban that
all child tasks share this workspace. Skipping or using `worktree` will
isolate profiles and break artifact sharing.
## The TEAM.md file
Alongside `brief.md`, write a `TEAM.md` that the director reads. It documents
the team composition + task graph the orchestrator should follow. This
removes ambiguity and prevents the director from inventing extra steps.
Example structure (for an ASCII video with a music supervisor and editor):
```markdown
# Team & Task Graph — <video title>
## Team
- `director` (this profile) — vision, decomposition, approval
- `cinematographer` — visual spec, quality review (loads `ascii-video`)
- `renderer-ascii` — ASCII scenes (loads `ascii-video`)
- `music-supervisor` — track analysis (loads `songsee`)
- `voice-talent` — narration (uses ElevenLabs API)
- `audio-mixer` — final mix (ffmpeg)
- `editor` — assembly (ffmpeg)
- `reviewer` — final QA gate
## Task Graph
T0: this task — decompose
├── T1: cinematographer "Design visual language" (parent: T0)
│ │
│ ├── T2a: renderer-ascii "Scene 1 — title card" (parent: T1)
│ ├── T2b: renderer-ascii "Scene 2 — main beat" (parent: T1)
│ ├── T2c: renderer-ascii "Scene 3 — outro" (parent: T1)
├── T3: music-supervisor "Analyze track + emit beats.json" (parent: T0)
├── T4: voice-talent "Generate narration" (parent: T0)
├── T5: audio-mixer "Mix VO + bg music" (parents: T3, T4)
├── T6: editor "Assemble cut + mux audio" (parents: T2*, T5)
└── T7: reviewer "Final QA" (parent: T6)
```
The director turns this into actual `kanban_create` calls.
## API-key prerequisites check
Before firing the kanban, verify required keys are available. Check both
`~/.hermes/.env` and macOS Keychain (if on macOS):
```bash
check_key() {
local var="$1"
local kc_account="$2"
local kc_service="$3"
if grep -q "^${var}=" ~/.hermes/.env 2>/dev/null && \
[ -n "$(grep "^${var}=" ~/.hermes/.env | cut -d= -f2-)" ]; then
return 0
fi
if command -v security >/dev/null 2>&1 && \
security find-generic-password -a "${kc_account}" -s "${kc_service}" -w >/dev/null 2>&1; then
return 0
fi
echo "ERROR: ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})"
return 1
}
check_key ELEVENLABS_API_KEY hermes ELEVENLABS_API_KEY || exit 1
check_key OPENROUTER_API_KEY hermes OPENROUTER_API_KEY || exit 1
# ...
```
If a key is missing, the script aborts with a clear message rather than
firing a kanban that will hit credential errors mid-execution.
## Critical rules
1. **`workspace_kind="dir"` + `workspace_path="<absolute>"` on every kanban_create.** Otherwise profiles can't share artifacts.
2. **Tenant every task.** `--tenant <project-slug>` keeps the dashboard scoped
and prevents cross-pollination with other ongoing kanbans.
3. **Idempotency keys.** For tasks that should not duplicate on re-run (e.g.,
setup creating profiles), use the `idempotency_key` argument or check
existence first.
4. **`max_runtime_seconds` per task.** Renderers that get stuck eat compute.
Standard defaults:
- Renderer task: 1800s (30min)
- Editor task: 600s (10min)
- Voice-talent task: 300s (5min)
- Image-generator task: 600s (10min)
- Image-to-video-generator task: 900s (15min)
5. **Heartbeats for long renders.** Tasks expected to run >5min should emit
`kanban_heartbeat` periodically with progress. Renderers should report
frame counts; the editor should report assembly progress.
6. **The `audio/` and `taste/` dirs are populated BEFORE firing the kanban.**
Don't ask the director's pipeline to source these — copy at setup time.
7. **`brief.md` is read-only after setup.** If the brief changes during
execution, that's a significant pivot — re-fire the kanban rather than edit
live.

View file

@ -0,0 +1,180 @@
# Monitoring — Watch the Pipeline + Intervene
After `setup.sh` fires the kanban, the work runs autonomously. The role of
this skill in the execution phase is to help the user (and the AI overseeing
the session) detect problems early and intervene effectively.
## Live monitoring commands
```bash
# Live event stream — task spawns, status changes, heartbeats, completions
hermes kanban watch --tenant <project-slug>
# Snapshot of the board
hermes kanban list --tenant <project-slug>
hermes kanban list --tenant <project-slug> --json # machine-readable
# Per-status counts + oldest-ready age
hermes kanban stats --tenant <project-slug>
# Visual dashboard (browser)
hermes dashboard
# Inspect a specific task (includes comments + events)
hermes kanban show <task-id>
# Follow a single task's event stream
hermes kanban tail <task-id>
```
Verify available subcommands with `hermes kanban --help` — the kanban CLI
ships with `init / create / list / show / assign / link / unlink / claim /
comment / complete / block / unblock / archive / tail / dispatch / watch /
stats / heartbeat / log / runs / context / gc`.
The companion `scripts/monitor.py` polls the kanban via the CLI and surfaces
common issues (stuck tasks, missing heartbeats, repeated retries, dependency
deadlocks).
## What to watch for
### Healthy pipeline indicators
- Tasks transition `READY → RUNNING → DONE` in roughly the expected order
- Renderers emit periodic `kanban_heartbeat` events with progress (e.g. "frame
240/720")
- Each task's runtime is well under its `max_runtime_seconds` cap
- No task accumulates more than 1 retry
- Dependency arrows resolve (children unblock as parents complete)
### Warning signs
| Symptom | Likely cause | Action |
|---------|--------------|--------|
| Task RUNNING but no heartbeat in 2+ min | Worker stuck, infinite loop, blocked on input | `hermes kanban show <id>` — read the worker's last events. The dispatcher SIGTERMs tasks that exceed their `max-runtime`; if you need to stop one earlier, `hermes kanban block <id>` then `hermes kanban archive <id>`, and create a re-run task. |
| Same task retried 2+ times | Reproducible failure (missing key, bad spec, broken tool) | `hermes kanban show <id>` to read failure events. Fix root cause before re-running. |
| RUNNING longer than max_runtime | Task is slow but progressing OR genuinely stuck | Check heartbeats with `hermes kanban tail <id>`. If progressing, the dispatcher will SIGTERM eventually anyway — raise `max-runtime` on a re-created task. |
| Child task READY but parents still RUNNING for >2× expected | Cascade slow, dependency miswired | Check the dependency graph. Inspect the parent: sometimes it completed but its handoff fields (summary, metadata) were empty so the child has nothing to consume. |
| New tasks not appearing | Director is hung in decomposition | Inspect director task with `kanban show`. Often a malformed `kanban_create` call. |
| Specialist tasks completing instantly | Decomposition created tasks without bodies | Director didn't pass enough context. Re-create with explicit body content. |
| Tasks created but never picked up | Profile not running, or tenant mismatch, or dispatcher not running | Check `hermes profile list` (profile exists?), `hermes status` (gateway/dispatcher up?), and verify tenant. |
| Specific renderer task fails → review note → renderer redoes → fails again | Brief is asking for the impossible | Pivot the brief, not the renderer. |
## Intervention recipes
### Rejecting bad output
When a renderer ships a clip that doesn't pass review:
```bash
# 1. Comment on the renderer's task with specific feedback
hermes kanban comment <renderer-task-id> "Scene 3 looks too sparse \
— increase visual density. Tighten color palette to brand spec."
# 2. Create a re-render task with the original as parent
hermes kanban create "Scene 3 — re-render with feedback" \
--assignee renderer-ascii \
--parent <renderer-task-id> \
--workspace dir:"$HOME/projects/video-pipeline/<slug>" \
--tenant <slug> \
--skill ascii-video \
--max-runtime 30m
```
### Adding a new dependency mid-flight
When the editor needs an asset that wasn't originally planned (e.g., a captions
file):
```bash
# 1. Create the new task and capture its id
NEW_TASK_ID=$(hermes kanban create "Generate SRT captions from voiceover" \
--assignee captioner \
--workspace dir:"$HOME/projects/video-pipeline/<slug>" \
--tenant <slug> \
--json | python3 -c "import json,sys;print(json.load(sys.stdin)['id'])")
# 2. Wire it as a parent of the editor's task with `kanban link`
hermes kanban link "$NEW_TASK_ID" <editor-task-id>
```
`kanban link` takes `parent_id child_id` (parent first). Use `kanban unlink`
to remove a dependency.
### Stopping a worker that's stuck
The kanban dispatcher will SIGTERM (then SIGKILL) any task that exceeds its
`--max-runtime` automatically. To stop one sooner:
```bash
# Mark blocked so the dispatcher leaves it alone, then archive
hermes kanban block <task-id>
hermes kanban archive <task-id>
# Diagnose what happened
hermes kanban show <task-id> # task body, comments, recent events
hermes kanban tail <task-id> # follow the live event stream
hermes kanban log <task-id> # worker process log
```
After stopping, decide: fix root cause + re-create the task, or skip and
adjust dependent tasks.
### Pivoting the brief
If during execution the user wants something fundamentally different:
1. Cancel the active director task and all RUNNING children
2. Edit `brief.md` and `TEAM.md`
3. Re-fire the initial `hermes kanban create` for the director
Don't try to "edit while running" — the kanban's audit trail makes a clean
pivot more legible than mid-stream changes.
## Periodic check-in script
A simple polling pattern for hands-off monitoring:
```bash
while true; do
clear
hermes kanban list --tenant <slug>
echo "---"
hermes kanban stats --tenant <slug>
sleep 30
done
```
For a live event feed, run `hermes kanban watch --tenant <slug>` in a
separate terminal — it streams task lifecycle events as they happen.
For automated intervention (auto-restart stuck tasks, auto-create re-render on
review failure), see the `scripts/monitor.py` patterns.
## When to call it done
The pipeline is finished when:
1. All RENDER tasks complete and pass review
2. The editor's `output/final.mp4` exists and `ffprobe` confirms expected
duration + streams
3. The reviewer (if present) has approved
4. Optional masterer variants exist
At this point, present the final.mp4 path to the user along with any review
notes. Do NOT delete the workspace — the user may want to iterate on a single
scene without re-running the whole pipeline.
## Common gotchas
- **Tenant mismatches.** A task created with the wrong tenant won't appear in
monitoring. Always pass `--tenant <slug>` consistently.
- **Profile process not running.** Tasks queue indefinitely in READY if no
worker for that profile is online. Check `hermes profile list` and start
any missing profiles.
- **Workspace permissions.** All profiles need read+write to the workspace
directory. `chmod -R u+rw <workspace>` if any worker reports permission
errors.
- **Audio/visual sync.** The editor's clip stitching must match the
renderer's actual output durations. Don't hardcode scene durations in
the editor — read from the renderer's handoff metadata.

View file

@ -0,0 +1,298 @@
# Role Archetypes
The library of role archetypes for video production. **Compose a team from this
list, don't clone a fixed roster.** Most videos need 4-7 profiles. The director
is always present; everything else is conditional on the brief.
Each role's profile name is by convention `kebab-case` (e.g. `creative-director`,
`image-generator`). Multiple instances of the same role get descriptive suffixes
when they need different focus (e.g., `renderer-ascii`, `renderer-3d`).
For toolset + skill mapping per role, see [tool-matrix.md](tool-matrix.md).
## Always present
### director
The vision-holder. Reads the brief and brand guide, decomposes into a task
graph, comments to steer creative direction, approves the final cut.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-orchestrator`. The kanban plugin auto-injects baseline
orchestration guidance for free; `kanban-orchestrator` is the deeper
decomposition playbook. Add `creative-ideation` if the brief is wide-open
and needs framing help.
- **Personality:** Tied to the brand voice — see `assets/soul.md.tmpl`
The director has the same toolset as everyone else, but its `SOUL.md` rules
**forbid** execution. The "decompose, don't execute" discipline is enforced
by personality + the kanban-orchestrator skill, not by missing tools.
## Pre-production roles
Pick based on what the brief needs.
### writer / screenwriter
Writes scripts, dialogue, voiceover copy, narration. Use for any video with
spoken or written words beyond a tagline.
- **Toolsets:** kanban, file
- **Skills:** `kanban-worker`, `humanizer` (post-process to strip AI-tells)
- **Outputs:** `script.md`, `narration.md`, `dialogue/scene-NN.md`
### copywriter
Like `writer` but specifically for marketing copy: taglines, CTAs, voiceover
scripts for product videos.
- **Toolsets:** kanban, file
- **Skills:** `kanban-worker`, `humanizer`
- **Outputs:** `copy.md`
### concept-artist / visual-designer
Develops the visual identity: mood board, style frames, color palette
rationale, typography choices. Produces a `visual-spec.md` that all generators
follow. Often produces still reference frames using image-generation APIs or
local skills.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker` plus any project-specific design skill —
`claude-design` (UI/web), `sketch` (quick mockup variants),
`popular-web-designs` (matching known web aesthetic), `pixel-art` (retro),
`ascii-art` (terminal/retro), `excalidraw` (hand-drawn frames),
`design-md` (text-based design docs)
- **Outputs:** `visual-spec.md`, `taste/style-frames/*.png`
### storyboarder
Maps the brief to a beat-by-beat shot list with timing. Critical for narrative
film and music video. Often pairs with a diagramming tool.
- **Toolsets:** kanban, file
- **Skills:** `kanban-worker` plus a diagram skill — `excalidraw` (sketch),
`architecture-diagram` (technical/system), `concept-diagrams` (educational/
scientific)
- **Outputs:** `storyboard.md` with one row per scene/shot, optional
storyboard sketches
### cinematographer / dp
Designs the visual language: framing, color, motion, transitions. Reviews
generator output for visual consistency. Hands off per-scene `VISUAL_SPEC.md`.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker` plus the visual skill that matches the project
(e.g., `ascii-video` for ASCII work, `manim-video` for explainers,
`touchdesigner-mcp` for real-time visuals, etc.)
- **Outputs:** `scenes/scene-NN/VISUAL_SPEC.md`, review comments on renderer
tasks
- **Reviews via:** any media-analysis approach (Gemini multimodal, manual
inspection of clip thumbnails, ffprobe summaries)
## Production roles
### renderer (generic)
A worker that produces visual content for one or more scenes. Loaded with
whichever creative skill fits the scene's style. Multiple renderers can run in
parallel, each pinned to a different skill via `always_load` in their profile
or `--skill` on the task.
- **Toolsets:** kanban, terminal, file
- **Skills:** one creative skill (see specialized variants below)
- **Outputs:** `scenes/scene-NN/clip.mp4`
### Specialized renderer variants
When scenes need very different tools, create specialized renderer profiles
instead of overloading one. Each loads a different creative skill.
| Variant | Skill | Best for |
|---------|-------|----------|
| `renderer-ascii` | `ascii-video` | Terminal aesthetic, retro pixel, audio-reactive grid, video-to-ASCII conversion |
| `renderer-manim` | `manim-video` | Math, algorithms, 3Blue1Brown-style explainers, equation derivations |
| `renderer-p5js` | `p5js` | Generative art, particles, shaders, organic motion, web-canvas content |
| `renderer-comfyui` | `comfyui` | AI-generated stills + video using local ComfyUI workflows (img-to-img, img-to-video, etc.) |
| `renderer-touchdesigner` | `touchdesigner-mcp` | Real-time, audio-reactive, installation art, VJ-style content |
| `renderer-3d` | `blender-mcp` *(optional)* | 3D modeling, animation, photoreal environments, character animation |
| `renderer-pixel` | `pixel-art` | Retro game aesthetic with era-correct palettes |
| `renderer-comic` | `baoyu-comic` | Knowledge-comic style narrative scenes |
| `renderer-meme` | `meme-generation` *(optional)* | Meme-style stills for satirical/social content |
| `renderer-procedural` | (none — Python with PIL + ffmpeg directly) | Custom procedural content where no skill fits |
| `renderer-video` | (external image-to-video API: Runway / Kling / Luma) | Animating still images in narrative film |
| `renderer-motion-graphics` | (external — Remotion CLI) | Motion graphics, kinetic typography, UI animations |
For external-API renderers, the profile holds the API client logic; only
`kanban-worker` is loaded, plus the terminal toolset and the API key.
### image-generator
Specifically for text-to-image generation. Often produces stills that go to
`renderer-video` for animation.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`, optionally `comfyui` (drives a local
ComfyUI install for image generation)
- **External APIs (alternative to local ComfyUI):** FAL, Replicate, OpenAI
Images, Midjourney
- **Outputs:** `scenes/scene-NN/stills/*.png`
### image-to-video-generator
Takes still images and animates them via Runway/Kling/Luma APIs, or via
ComfyUI's image-to-video workflows locally. Almost always follows
`image-generator` in narrative film pipelines.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`, optionally `comfyui` (for local image-to-video
workflows like AnimateDiff or WAN)
- **External APIs:** Runway, Kling, Luma, Pika
- **Outputs:** `scenes/scene-NN/clip.mp4`
### music-supervisor
Sources, analyzes, and prepares the music track. For music videos, also
produces a beat/BPM map and key-moment timestamps. Uses `songsee` for
spectrograms when the editor or renderer needs a visual reference of the
audio's energy.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`, `songsee` (audio visualization), plus one of:
- `songwriting-and-ai-music` — when commissioning lyrics + Suno prompts
- `heartmula` — when generating music with the open-source local model
- `spotify` — when sourcing existing tracks
- **Outputs:** `audio/track.mp3`, `audio/beats.json`, optional
`audio/track-spectrogram.png`
### voice-talent / narrator
Generates voiceover audio. Calls a TTS API directly; no Hermes skill required
beyond `kanban-worker`. The user can also supply pre-recorded VO instead of
generation.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **External APIs:** ElevenLabs, OpenAI TTS, etc.
- **Outputs:** `audio/voiceover/line-NN.mp3`, `audio/voiceover/timeline.mp3`
### foley / sfx-designer
Sound effects and ambient design. Often optional unless the brief calls for
sound design specifically.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`, `songsee` for audio-feature visualization when
designing to a track
- **Outputs:** `audio/sfx/*.mp3`
## Post-production roles
### editor
Assembles the final cut from clips. Uses ffmpeg for stitching, fades,
transitions. Reviews each clip for pacing and quality before assembly.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **External tools:** ffmpeg, ffprobe
- **Outputs:** `output/final.mp4`, `output/final-noaudio.mp4`
### colorist
Color grading. Usually optional — if the renderers already produce
brand-consistent output and the editor just stitches, the colorist is overkill.
Worth including for narrative film with hero shots.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **Outputs:** `output/final-graded.mp4`
### audio-mixer
Mixes voiceover + music + SFX into a final audio track. Sets levels, ducks
music under VO, normalizes loudness (LUFS).
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **External tools:** ffmpeg with `loudnorm` filter, optional `sox`
- **Outputs:** `audio/final-mix.mp3`
### captioner
Burns subtitles into the video, generates SRT, handles accessibility. Can also
generate captions from audio via Whisper.
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **External tools:** Whisper (CLI or API), ffmpeg subtitle filters
- **Outputs:** `output/captions.srt`, `output/final-captioned.mp4`
### masterer
Final encode + format variants. Produces deliverables for each platform target
(square for IG, vertical for TikTok, full HD for YouTube, etc.).
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **Outputs:** `output/final-1080.mp4`, `output/final-9x16.mp4`, etc.
## QA roles
### reviewer
A neutral quality gate. Reads the brief, watches the cut, comments
specifically on what's off (pacing, sync, brand alignment, technical
quality). Distinct from the cinematographer (who reviews visuals during
production) and the editor (who reviews for assembly).
- **Toolsets:** kanban, terminal, file
- **Skills:** `kanban-worker`
- **External tools:** any media-analysis approach (Gemini multimodal,
ffprobe, manual frame extraction)
- **Outputs:** `review-notes.md`, comments on tasks
### brand-cop
Reviews specifically for brand compliance — colors, typography, voice. Use
when the brand guidelines are detailed and a generic reviewer might miss
violations.
- **Toolsets:** kanban, file
- **Skills:** `kanban-worker`
- **Outputs:** comments + `brand-review.md`
## Composing teams — heuristics
- **Always:** director + at least one renderer + editor.
- **Add writer** if scripted dialogue / narration / on-screen text exceeds a
tagline.
- **Add storyboarder** if the brief has more than 5 distinct beats and the
director hasn't already laid out a beat list.
- **Add cinematographer** if multiple renderer instances need consistent
visual language. (For a single-tool video, the renderer's own skill spec
is enough.)
- **Add image-generator + image-to-video-generator pair** for narrative film
with photorealistic visuals.
- **Add music-supervisor** when music is provided and rhythm matters
(music videos always; explainers sometimes).
- **Add voice-talent** for any voiceover / narrative dialogue.
- **Add audio-mixer** when there are 2+ audio sources (VO + music, music + SFX).
- **Add captioner** for accessibility-priority projects (explainer, tutorial,
any platform that defaults to muted playback).
- **Add reviewer** for high-stakes projects. Skip for quick experimental loops.
- **Add masterer** when multiple platform deliverables are needed.
## Anti-patterns
- **One renderer doing everything.** If scenes use very different tools
(ASCII + 3D + motion graphics), use specialized renderer variants. The
renderer loads ONE creative skill at a time; mixing styles in a single
renderer causes thrashing.
- **A separate profile per scene.** No. Profiles are per-role, not per-scene.
Eight scenes use one or two renderer profiles, not eight.
- **A "general" profile that does everything.** Worse than no specialization.
The kanban routing breaks down if every task fits every profile.
- **No reviewer for important deliverables.** Saves an hour of pipeline time
but ships flaws.

View file

@ -0,0 +1,305 @@
# Tool Matrix — Skills + Toolsets per Role
Maps each role archetype to the Hermes skills it should `always_load` and the
toolsets it needs. Only references skills that ship in the public hermes-agent
repository (under `skills/` or `optional-skills/`). External APIs and CLIs are
called from the terminal toolset; they don't appear in `always_load`.
## Hermes skills relevant to video production
### Visual / rendering skills (`hermes-agent/skills/creative/`)
| Skill | What it does | Best fit for |
|-------|--------------|--------------|
| `ascii-video` | Production pipeline for ASCII art video — generative, audio-reactive, video-to-ASCII | Renderer for ASCII / terminal / retro pixel content; cinematographer for ASCII projects |
| `ascii-art` | Static ASCII art generation | Concept artist for ASCII style frames; secondary tool for ASCII renderer |
| `manim-video` | Manim CE animations — math, algorithms, 3Blue1Brown-style explainers | Renderer for math, algorithm walkthroughs, technical concept explainers |
| `p5js` | p5.js sketches — generative art, shaders, interactive, 3D | Renderer for generative art, particle systems, organic motion, web-canvas content |
| `comfyui` | Generate images, video, audio with ComfyUI workflows (image-to-image, image-to-video, etc.) | image-generator, image-to-video-generator, or general renderer for AI-generated content |
| `touchdesigner-mcp` | Control a running TouchDesigner instance — real-time visuals, audio-reactive installation art, VJ | Renderer for real-time/audio-reactive content; installation art; live performance |
| `blender-mcp` *(optional)* | Control Blender 4.3+ via MCP — 3D modeling, animation, rendering | Renderer for 3D scenes, photoreal environments, character animation |
| `pixel-art` | Pixel art with era palettes (NES, Game Boy, PICO-8) | Renderer for retro game aesthetic; concept artist for pixel-style frames |
| `baoyu-comic` | Knowledge-comic generation (educational, biography, tutorial) | Renderer for comic-style narrative; explainer in panel form |
| `baoyu-infographic` | Infographic generation | Renderer for data-driven explainer scenes |
| `meme-generation` *(optional)* | Generate meme images by overlaying text on templates | Generator for satirical/social content; meme-style stills |
### Design / pre-production skills (`hermes-agent/skills/creative/`)
| Skill | What it does | Best fit for |
|-------|--------------|--------------|
| `claude-design` | Design one-off HTML artifacts (landing, deck, prototype) | Concept artist for product video style frames; storyboarder for UI-heavy content |
| `design-md` | Design markdown docs | Concept artist documenting visual specs |
| `popular-web-designs` | Reference patterns for popular web designs | Concept artist; cinematographer when matching a known UI aesthetic |
| `sketch` | Throwaway HTML mockups (2-3 design variants to compare) | Concept artist exploring directions; storyboarder for UI flows |
| `excalidraw` | Excalidraw-style hand-drawn diagrams | Storyboarder; concept artist for sketch-style frames |
| `architecture-diagram` | Software architecture diagrams | Storyboarder for technical content; explainer scenes about systems |
| `concept-diagrams` *(optional)* | Flat, minimal SVG diagrams (educational visual language; physics, chemistry, math, anatomy, etc.) | Renderer / storyboarder for explainer scenes with clean educational diagrams |
| `pretext` | Mathematical/scientific content authoring | Writer / cinematographer for technical-explainer pretexts |
| `creative-ideation` | Constraint-driven project ideation | Director / cinematographer when the brief is wide-open and needs framing |
| `humanizer` | Strip AI-isms from text, add real voice | Writer / copywriter post-process to avoid AI-tells in scripts and VO copy |
### Audio / media skills (`hermes-agent/skills/creative/` + `skills/media/`)
| Skill | What it does | Best fit for |
|-------|--------------|--------------|
| `songwriting-and-ai-music` | Songwriting craft + Suno prompt patterns | Music supervisor when commissioning a track via Suno |
| `heartmula` | Open-source music generation (Apache-2.0, Suno-like) | Music supervisor generating bespoke tracks without external APIs |
| `songsee` | Spectrograms, mel/chroma/MFCC of audio files | Music supervisor analyzing tracks; foley-designer designing to a beat; editor visualizing a mix |
| `spotify` | Spotify control — play, search, queue, manage playlists | Music supervisor sourcing existing tracks; reference research |
| `youtube-content` | Fetch transcripts + transform to chapters/summaries/posts | Documentary cut, content adaptation, research for explainers |
| `gif-search` | Find existing GIFs | Editor / concept artist sourcing references |
| `gifs` | GIF tooling | Masterer producing GIF deliverables |
### Kanban infrastructure (`hermes-agent/skills/devops/`)
| Skill | What it does | When to load |
|-------|--------------|--------------|
| `kanban-orchestrator` | Decomposition playbook + anti-temptation rules for orchestrator profiles | Director only |
| `kanban-worker` | Pitfalls, examples, edge cases for kanban workers (deeper than auto-injected guidance) | Any profile — load when handling tricky multi-step workflows |
The kanban plugin auto-injects baseline orchestration guidance into every
worker's system prompt — the `kanban_create` fan-out pattern, claim/handoff
lifecycle, and the "decompose, don't execute" rule for orchestrators.
`kanban-orchestrator` and `kanban-worker` are deeper playbooks loaded when a
profile needs them.
## External tools (called from terminal toolset)
These are **not** Hermes skills but external CLIs / APIs that profiles invoke.
They don't appear in `always_load`; instead the role's terminal commands hit
them directly.
| Tool | What it does | Profile that uses it |
|------|--------------|----------------------|
| `ffmpeg` | Video / audio encode, splice, mux | renderer, editor, audio-mixer, masterer |
| `ffprobe` | Inspect media | All media-touching profiles |
| Whisper (CLI or API) | Speech-to-text for captions | captioner |
| Text-to-image API (FAL / Replicate / OpenAI / Midjourney) | Stills generation | image-generator (alternative to local `comfyui`) |
| Image-to-video API (Runway / Kling / Luma / Pika) | Animate stills | image-to-video-generator |
| Text-to-speech API (ElevenLabs / OpenAI TTS / etc.) | Voiceover generation | voice-talent |
| Suno API or web | Track composition (paired with `songwriting-and-ai-music`) | music-supervisor |
| Remotion CLI (`npx remotion render`) | React-based motion graphics | renderer-motion-graphics |
| Manim CE (`manim`) | Math animation render (driven by `manim-video` skill's recipes) | renderer-manim |
| Blender (`blender -b`) | 3D rendering (alternative to `blender-mcp`) | renderer-3d |
| Gemini multimodal / Claude vision | AI review of clips | reviewer, cinematographer, editor |
## Standard toolset configurations per role
### director
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-orchestrator
```
The director's terminal access is conventional but the SOUL.md rules forbid
execution. Audit logs catch violations.
### writer / copywriter
```yaml
toolsets:
- kanban
- file
skills:
always_load:
- kanban-worker
- humanizer # post-process scripts to strip AI-tells
```
No terminal — writers don't need it.
### concept-artist
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
# plus one or more (style-dependent):
# - claude-design (UI / web product video)
# - sketch (quick mockup variants)
# - excalidraw (hand-drawn frames)
# - ascii-art (ASCII style frames)
# - pixel-art (retro/game aesthetic)
# - popular-web-designs (matching known web aesthetic)
# - design-md (text-based design docs)
```
### storyboarder
```yaml
toolsets:
- kanban
- file
skills:
always_load:
- kanban-worker
# one of:
# - excalidraw (sketch storyboards)
# - architecture-diagram (technical/system content)
# - concept-diagrams (educational / scientific content)
```
### cinematographer
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
# the visual skill that matches the project, e.g.:
# - ascii-video (ASCII projects)
# - manim-video (math/explainer)
# - p5js (generative)
# - comfyui (AI-generated visuals)
# - blender-mcp (3D)
# - touchdesigner-mcp (real-time/installation)
```
### renderer (specialized variants)
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
# ONE skill per renderer variant (or empty for external-API renderers):
# - ascii-video (renderer-ascii)
# - manim-video (renderer-manim)
# - p5js (renderer-p5js)
# - comfyui (renderer-comfyui — img/video AI gen)
# - touchdesigner-mcp (renderer-touchdesigner)
# - blender-mcp (renderer-3d)
# - pixel-art (renderer-pixel)
# - baoyu-comic (renderer-comic)
# - meme-generation (renderer-meme)
```
For external-API renderers (image-to-video-generator using Runway, voice-talent
using ElevenLabs, renderer-motion-graphics using Remotion), `always_load` only
contains `kanban-worker` — the role's work is API-driven and the API key +
terminal commands suffice.
For multi-skill renderer setups (rare — usually one variant per skill is
cleaner) use `--skill <name>` on individual `kanban_create` calls to override
which skill loads for that specific task.
### image-generator / image-to-video-generator / voice-talent
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
# for image-generator that drives ComfyUI locally:
# - comfyui
env_required:
# populate based on the chosen API:
- FAL_KEY # or REPLICATE_API_TOKEN, OPENAI_API_KEY for image-gen
- RUNWAY_API_KEY # or KLING_API_KEY, LUMA_API_KEY for image-to-video
- ELEVENLABS_API_KEY # or OPENAI_API_KEY for TTS
```
If the user's setup has ComfyUI installed locally, the `comfyui` skill can
replace the external image-gen API entirely (cheaper, more control, supports
custom workflows for image-to-video too).
### music-supervisor
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
- songsee # spectrograms / audio analysis
# plus (depending on what the project needs):
# - songwriting-and-ai-music (commissioning Suno tracks)
# - heartmula (commissioning open-source local generation)
# - spotify (sourcing existing tracks)
```
### editor / audio-mixer / captioner / masterer
```yaml
toolsets:
- kanban
- terminal
- file
skills:
always_load:
- kanban-worker
```
These are mostly ffmpeg-driven; no special skill needed beyond `kanban-worker`.
For captioner add Whisper invocation patterns to the SOUL.md.
### reviewer / brand-cop
```yaml
toolsets:
- kanban
- terminal # for media inspection
- file
skills:
always_load:
- kanban-worker
env_required:
- OPENROUTER_API_KEY # if using Gemini multimodal review
# or ANTHROPIC_API_KEY if using Claude vision (already required globally)
```
## API key requirements
Track these in the project setup. The setup script should verify each required
key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban.
| Service | Env var | Used by |
|---------|---------|---------|
| ElevenLabs | `ELEVENLABS_API_KEY` | voice-talent |
| OpenAI | `OPENAI_API_KEY` | image-generator (DALL-E), voice-talent (TTS) |
| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (Gemini multimodal review) |
| FAL | `FAL_KEY` | image-generator (FAL flux models) |
| Replicate | `REPLICATE_API_TOKEN` | image-generator (alternate provider) |
| Runway | `RUNWAY_API_KEY` | image-to-video-generator |
| Kling | `KLING_API_KEY` | image-to-video-generator (alternate) |
| Luma | `LUMA_API_KEY` | image-to-video-generator (alternate) |
| Suno | `SUNO_API_KEY` | music-supervisor (paired with `songwriting-and-ai-music`) |
| Spotify | `SPOTIFY_CLIENT_ID` + `SPOTIFY_CLIENT_SECRET` | music-supervisor (paired with `spotify` skill) |
| Anthropic | `ANTHROPIC_API_KEY` | every Hermes profile (Claude) |
If a key is missing, prompt the user to add it. Storage methods, in order of
preference: macOS Keychain → `~/.hermes/.env` → environment variable.
## Skill version pinning
If a specific skill version is desired, pass it via the per-task
`--skill <name>=<version>` flag. The default is whatever's installed.
## Adding a new skill to the matrix
When a new Hermes-public video skill ships:
1. Add a row to the relevant table at the top of this file
2. If it warrants a specialized renderer variant, add to `role-archetypes.md`
3. Update relevant per-style examples in `examples.md`

View file

@ -0,0 +1,501 @@
#!/usr/bin/env python3
"""
Bootstrap a video production kanban from a structured plan JSON.
Reads a plan.json describing the team + brief, expands templates from
../assets/, and writes a setup.sh that creates Hermes profiles and fires the
initial kanban task.
Profile-config patching, SOUL.md-per-profile, TEAM.md task-graph convention,
and the `hermes kanban create --workspace dir:` initial-task pattern are
adapted from alt-glitch's NousResearch/kanban-video-pipeline.
Usage:
bootstrap_pipeline.py plan.json [--out setup.sh]
The plan.json schema is documented inline below see the `validate_plan`
function. A minimal example:
{
"title": "Q3 Product Teaser",
"slug": "q3-product-teaser",
"tenant": "q3-product-teaser",
"duration_s": 30,
"aspect": "1:1",
"resolution": "1080x1080",
"fps": 30,
"team": [
{
"profile": "director",
"role": "director",
"toolsets": ["kanban", "terminal", "file"],
"skills": [],
"responsibilities": "...",
"inputs": "brief.md, TEAM.md, taste/",
"outputs": "kanban tasks for the team"
},
...
],
"scenes": [
{"n": 1, "time": "0:00-0:08", "content": "...", "tool": "renderer-ascii"},
...
],
"audio": {"approach": "voiceover + music bed", "vo": "ElevenLabs Lily",
"music": "license-free", "sfx": "n/a"},
"deliverables": [
{"format": "mp4", "resolution": "1080x1080", "notes": "primary"}
],
"api_keys_required": ["ELEVENLABS_API_KEY", "OPENROUTER_API_KEY"],
"brief_extra": {
"concept_one_liner": "...",
"emotional_north_star": "...",
"visual_refs": "...",
"tone": "...",
"brand_constraints": "..."
}
}
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
from pathlib import Path
ASSETS_DIR = Path(__file__).resolve().parent.parent / "assets"
def load_template(name: str) -> str:
return (ASSETS_DIR / name).read_text()
PROFILE_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9-]+$")
def validate_plan(plan: dict) -> list[str]:
"""Return a list of validation error strings; empty list = valid."""
errors = []
required_top = ["title", "slug", "tenant", "duration_s", "aspect",
"resolution", "fps", "team", "scenes", "audio",
"deliverables"]
for k in required_top:
if k not in plan:
errors.append(f"missing required key: {k}")
if "team" in plan:
if not isinstance(plan["team"], list) or not plan["team"]:
errors.append("team must be a non-empty list")
else:
roles = [t.get("role") for t in plan["team"]]
if "director" not in roles:
errors.append("team must include a director role")
seen_profiles = set()
for i, t in enumerate(plan["team"]):
for k in ["profile", "role", "toolsets", "skills",
"responsibilities"]:
if k not in t:
errors.append(f"team[{i}] missing {k}")
# Profile name must match Hermes's regex (lowercase
# alphanumeric + hyphens + underscores, up to 64 chars).
if "profile" in t:
if not PROFILE_NAME_RE.match(t["profile"]):
errors.append(
f"team[{i}].profile {t['profile']!r} must match "
f"[a-z0-9][a-z0-9_-]{{0,63}} per Hermes profile rules"
)
if t["profile"] in seen_profiles:
errors.append(
f"team[{i}].profile {t['profile']!r} is duplicated"
)
seen_profiles.add(t["profile"])
# Toolsets / skills must be lists, not strings.
if "toolsets" in t and not isinstance(t["toolsets"], list):
errors.append(
f"team[{i}].toolsets must be a list of strings"
)
if "skills" in t and not isinstance(t["skills"], list):
errors.append(
f"team[{i}].skills must be a list of strings"
)
if "slug" in plan:
if not SLUG_RE.match(plan["slug"]):
errors.append("slug must be lowercase, hyphenated, "
"starting with [a-z0-9]")
return errors
def render_brief(plan: dict) -> str:
"""Render brief.md from the plan."""
tmpl = load_template("brief.md.tmpl")
extra = plan.get("brief_extra", {})
# Scene table rows
scene_rows = []
for s in plan["scenes"]:
scene_rows.append(
f"| {s.get('n', '?')} | {s.get('time', '?')} | "
f"{s.get('content', '')} | {s.get('tool', '')} | "
f"{s.get('audio', '')} | {s.get('notes', '')} |"
)
scene_table = "\n".join(scene_rows) if scene_rows else "_(none yet)_"
# Deliverable rows
deliv_rows = []
for d in plan["deliverables"]:
deliv_rows.append(
f"| {d.get('format', '?')} | {d.get('resolution', '?')} | "
f"{d.get('notes', '')} |"
)
deliv_table = "\n".join(deliv_rows) if deliv_rows else "_(none)_"
# Replacements (single-pass)
replacements = {
"TITLE": plan["title"],
"SLUG": plan["slug"],
"TENANT": plan["tenant"],
"WORKSPACE": f"~/projects/video-pipeline/{plan['slug']}",
"ONE_LINE_PITCH": extra.get("concept_one_liner", "_(TBD)_"),
"EMOTIONAL_NORTH_STAR": extra.get("emotional_north_star", "_(TBD)_"),
"DURATION_S": str(plan["duration_s"]),
"ASPECT": plan["aspect"],
"RESOLUTION": plan["resolution"],
"FPS": str(plan["fps"]),
"PLATFORMS": extra.get("platforms", "_(TBD)_"),
"DEADLINE": extra.get("deadline", "_(none)_"),
"QUALITY_BAR": extra.get("quality_bar", "polished"),
"VISUAL_REFS": extra.get("visual_refs", "_(none)_"),
"TONE": extra.get("tone", "_(TBD)_"),
"BRAND_CONSTRAINTS": extra.get("brand_constraints", "_(none)_"),
"AESTHETIC_RULES": extra.get("aesthetic_rules", "_(TBD)_"),
"AUDIO_APPROACH": plan["audio"].get("approach", "_(TBD)_"),
"VO_DETAILS": plan["audio"].get("vo", "_(n/a)_"),
"MUSIC_DETAILS": plan["audio"].get("music", "_(n/a)_"),
"SFX_DETAILS": plan["audio"].get("sfx", "_(n/a)_"),
"PRIMARY_FORMAT": plan["deliverables"][0]["format"],
"PRIMARY_RES": plan["deliverables"][0]["resolution"],
"ALT_FORMAT_1": (plan["deliverables"][1]["format"]
if len(plan["deliverables"]) > 1 else "_(none)_"),
"ALT_RES_1": (plan["deliverables"][1]["resolution"]
if len(plan["deliverables"]) > 1 else ""),
"ALT_NOTES_1": (plan["deliverables"][1].get("notes", "")
if len(plan["deliverables"]) > 1 else ""),
"API_KEYS_REQUIRED": ", ".join(plan.get("api_keys_required", [])) or "none",
"EXT_DEPS": extra.get("ext_deps", "ffmpeg, Python 3.11+"),
"SOURCE_ASSETS": extra.get("source_assets", "_(none)_"),
}
out = tmpl
for k, v in replacements.items():
out = out.replace("{{" + k + "}}", str(v))
# Scene + deliv tables: replace the placeholder row in the template
out = re.sub(
r"\|\s*1\s*\|\s*0:000:0X.+?\n\|\s*2\s*\|.+?\n",
scene_table + "\n",
out, flags=re.DOTALL,
)
return out
def render_team_md(plan: dict) -> str:
"""Render TEAM.md from the team list + scene → tool mapping."""
lines = [f"# Team & Task Graph — {plan['title']}", "", "## Team", ""]
for t in plan["team"]:
skills = (
f"loads `{', '.join(t['skills'])}`"
if t["skills"] else "no skills required"
)
lines.append(
f"- `{t['profile']}` — {t['responsibilities']} ({skills})"
)
lines.extend(["", "## Task Graph", "", "```"])
# Build a simple task graph based on conventions
profiles_by_role = {t["role"]: t["profile"] for t in plan["team"]}
director = profiles_by_role.get("director", "director")
lines.append(f"T0 {director} — decompose")
next_id = 1
parents_for_renderer: list[str] = ["T0"]
if "cinematographer" in profiles_by_role:
cid = f"T{next_id}"
lines.append(
f"{cid:5} {profiles_by_role['cinematographer']} — visual spec for all scenes (parent: T0)"
)
parents_for_renderer = [cid]
next_id += 1
if "music-supervisor" in profiles_by_role:
cid = f"T{next_id}"
lines.append(
f"{cid:5} {profiles_by_role['music-supervisor']} — track analysis + beats.json (parent: T0)"
)
next_id += 1
ms_id = cid
else:
ms_id = None
# Scenes
scene_ids = []
for s in plan["scenes"]:
cid = f"T{next_id}"
renderer_profile = s.get("tool") or "renderer"
# Lookup the actual profile name
for t in plan["team"]:
if t["role"] == renderer_profile or t["profile"] == renderer_profile:
renderer_profile = t["profile"]
break
parents = parents_for_renderer + ([ms_id] if ms_id else [])
parent_str = ", ".join(parents)
lines.append(
f"{cid:5} {renderer_profile} — scene {s.get('n', '?')}: "
f"{s.get('content', '')[:50]} (parents: {parent_str})"
)
scene_ids.append(cid)
next_id += 1
# VO + audio mix
if "voice-talent" in profiles_by_role:
vo_id = f"T{next_id}"
lines.append(f"{vo_id:5} {profiles_by_role['voice-talent']} — narration (parent: T0)")
next_id += 1
else:
vo_id = None
if "audio-mixer" in profiles_by_role:
am_id = f"T{next_id}"
am_parents = [p for p in [ms_id, vo_id] if p]
lines.append(
f"{am_id:5} {profiles_by_role['audio-mixer']} — mix audio (parents: {', '.join(am_parents)})"
)
next_id += 1
else:
am_id = None
# Editor
if "editor" in profiles_by_role:
ed_id = f"T{next_id}"
ed_parents = scene_ids + [p for p in [am_id, vo_id, ms_id] if p and p not in scene_ids]
lines.append(
f"{ed_id:5} {profiles_by_role['editor']} — assemble + mux (parents: {', '.join(ed_parents)})"
)
next_id += 1
else:
ed_id = None
# Captioner
if "captioner" in profiles_by_role and ed_id:
cap_id = f"T{next_id}"
lines.append(
f"{cap_id:5} {profiles_by_role['captioner']} — SRT + burn (parent: {ed_id})"
)
next_id += 1
last = cap_id
else:
last = ed_id
# Reviewer
if "reviewer" in profiles_by_role and last:
rv_id = f"T{next_id}"
lines.append(
f"{rv_id:5} {profiles_by_role['reviewer']} — final QA (parent: {last})"
)
lines.append("```")
lines.extend([
"",
"## Per-task workspace requirement",
"",
f"All `kanban_create` calls MUST pass:",
f"```",
f'workspace_kind="dir"',
f'workspace_path="$HOME/projects/video-pipeline/{plan["slug"]}"',
f'tenant="{plan["tenant"]}"',
f"```",
])
return "\n".join(lines)
def render_setup_sh(plan: dict, brief_md: str, team_md: str) -> str:
"""Render setup.sh from the plan."""
tmpl = load_template("setup.sh.tmpl")
# API key checks
key_checks = []
for key in plan.get("api_keys_required", []):
key_checks.append(f'check_key {key} hermes {key} || exit 1')
key_checks_str = "\n".join(key_checks) if key_checks else "# (no API keys required)"
# Scene dirs
scene_dir_lines = []
for s in plan["scenes"]:
n = s.get("n", "?")
scene_dir_lines.append(f'mkdir -p "$WORKSPACE/scenes/scene-{n:02d}"/checkpoints')
scene_dirs = "\n".join(scene_dir_lines) if scene_dir_lines else ""
# Profile create
profile_creates = []
for t in plan["team"]:
profile_creates.append(
f'hermes profile create {t["profile"]} --clone 2>/dev/null || true'
)
# Profile config — emit JSON arrays so the bash function can pass them
# safely through to the Python YAML patcher.
profile_configs = []
for t in plan["team"]:
ts_json = json.dumps(t["toolsets"])
sk_json = json.dumps(t["skills"])
# Use single-quoted bash strings; JSON only contains "/[/], no single
# quotes, so this is safe.
profile_configs.append(
f"configure_profile {t['profile']!r} {ts_json!r} {sk_json!r}"
)
# SOUL writes — uses heredocs per profile
soul_writes = []
for t in plan["team"]:
soul_writes.append(
f'cat > "$HOME/.hermes/profiles/{t["profile"]}/SOUL.md" <<\'SOUL_EOF\'\n'
f"{render_soul_md(t, plan)}\n"
f"SOUL_EOF\n"
f'echo " ✓ SOUL.md for {t["profile"]}"'
)
# Taste writes (placeholder; real content optional)
taste_writes = (
'cat > "$WORKSPACE/taste/brand-guide.md" <<\'TASTE_EOF\'\n'
'# Brand Guide\n\n'
'_(Populate with project-specific colors, typography, motion rules)_\n'
'TASTE_EOF\n'
'cat > "$WORKSPACE/taste/emotional-dna.md" <<\'DNA_EOF\'\n'
'# Emotional DNA\n\n'
'_(What this piece should FEEL like — populate from the brief.)_\n'
'DNA_EOF'
)
# Asset copies — leave empty by default; user fills in
asset_copies = "# Add cp/rsync commands here for any provided assets"
out = tmpl
out = out.replace("{{TITLE}}", plan["title"])
out = out.replace("{{SLUG}}", plan["slug"])
out = out.replace("{{TENANT}}", plan["tenant"])
out = out.replace("{{WORKSPACE}}", f"~/projects/video-pipeline/{plan['slug']}")
out = out.replace("{{KEY_CHECKS}}", key_checks_str)
out = out.replace("{{SCENE_DIRS}}", scene_dirs)
out = out.replace("{{PROFILE_CREATE_COMMANDS}}", "\n".join(profile_creates))
out = out.replace("{{PROFILE_CONFIG_COMMANDS}}", "\n".join(profile_configs))
out = out.replace("{{SOUL_WRITES}}", "\n".join(soul_writes))
out = out.replace("{{BRIEF_CONTENTS}}", brief_md)
out = out.replace("{{TEAM_CONTENTS}}", team_md)
out = out.replace("{{TASTE_WRITES}}", taste_writes)
out = out.replace("{{ASSET_COPIES}}", asset_copies)
return out
def render_soul_md(team_member: dict, plan: dict) -> str:
"""Render a profile's SOUL.md from a team member dict + plan context."""
tmpl = load_template("soul.md.tmpl")
role = team_member["role"]
common_rules = (
"- **Read the brief and team graph** before doing anything else.\n"
"- **Pass `workspace_kind=\"dir\"` and `workspace_path` on every "
"`kanban_create` call.** This keeps the team in one shared workspace.\n"
f"- **Use tenant `{plan['tenant']}`** on every kanban call.\n"
"- **Write outputs to predictable paths.** Other profiles depend on "
"your filename conventions.\n"
"- **Emit heartbeats** during long-running work. Renderers should "
"report frame counts; editors should report assembly progress.\n"
)
if role == "director":
common_rules += (
"- **Do not execute the work yourself.** For every concrete task, "
"create a kanban task and assign it to the appropriate profile.\n"
"- **Decompose, route, comment, approve — that's the whole job.**\n"
"- **Read TEAM.md** for the canonical task graph. Do not invent "
"new roles unless the brief truly demands it.\n"
"- **Load the `kanban-orchestrator` skill** for the deeper "
"decomposition playbook beyond the auto-injected baseline.\n"
)
common_commands = (
"```bash\n"
"# Inspect a clip\n"
"ffprobe -v quiet -show_entries format=duration -show_entries "
"stream=codec_name,width,height,r_frame_rate <file.mp4>\n"
"\n"
"# Extract a frame for QA\n"
"ffmpeg -y -i <input.mp4> -vf \"select='eq(n,30)'\" -vsync vfr <out.png>\n"
"```"
)
out = tmpl
out = out.replace("{{ROLE_NAME}}", role)
out = out.replace("{{ROLE_RESPONSIBILITIES}}", team_member["responsibilities"])
out = out.replace("{{INPUTS_READ}}", team_member.get("inputs", "_(see brief)_"))
out = out.replace("{{OUTPUTS_PRODUCED}}", team_member.get("outputs", "_(see brief)_"))
out = out.replace("{{TOOLSETS}}", ", ".join(team_member["toolsets"]))
out = out.replace(
"{{SKILLS}}",
", ".join(team_member["skills"]) if team_member["skills"] else "(none)"
)
out = out.replace(
"{{EXTERNAL_TOOLS}}",
team_member.get("external_tools", "ffmpeg, ffprobe (via terminal)")
)
out = out.replace(
"{{ROLE_RULES}}",
team_member.get("role_rules", "_(see TEAM.md and brief.md)_")
)
out = out.replace("{{COMMON_RULES}}", common_rules)
out = out.replace("{{COMMON_COMMANDS}}", common_commands)
return out
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("plan_json", help="Path to plan.json")
ap.add_argument("--out", default="setup.sh",
help="Output path for setup.sh (default: ./setup.sh)")
ap.add_argument("--brief-out", default=None,
help="Write brief.md alongside (default: skipped)")
ap.add_argument("--team-out", default=None,
help="Write TEAM.md alongside (default: skipped)")
args = ap.parse_args()
plan = json.loads(Path(args.plan_json).read_text())
errors = validate_plan(plan)
if errors:
print("Plan validation failed:", file=sys.stderr)
for e in errors:
print(f" - {e}", file=sys.stderr)
sys.exit(2)
brief = render_brief(plan)
team = render_team_md(plan)
setup = render_setup_sh(plan, brief, team)
Path(args.out).write_text(setup)
os.chmod(args.out, 0o755)
print(f"Wrote {args.out}")
if args.brief_out:
Path(args.brief_out).write_text(brief)
print(f"Wrote {args.brief_out}")
if args.team_out:
Path(args.team_out).write_text(team)
print(f"Wrote {args.team_out}")
if __name__ == "__main__":
main()

View file

@ -0,0 +1,195 @@
#!/usr/bin/env python3
"""
Monitor a running video-production kanban. Polls `hermes kanban list` and
`events` for a tenant and surfaces issues (stuck tasks, missing heartbeats,
repeated retries, dependency deadlocks).
Usage:
monitor.py --tenant <project-slug> [--interval 30]
Outputs a periodic snapshot to stdout. Sends alerts via stderr when issues
are detected. Designed to run alongside the kanban kill with Ctrl-C when
you're satisfied (or scripted to stop on completion).
This is best-effort observability. It does not auto-restart tasks; intervention
decisions should remain human/AI-overseen.
"""
from __future__ import annotations
import argparse
import json
import shutil
import subprocess
import sys
import time
from collections import defaultdict
from datetime import datetime, timedelta
def hermes_available() -> bool:
return shutil.which("hermes") is not None
def kanban_list(tenant: str) -> list[dict]:
"""Returns parsed task rows. Falls back to plain stdout parsing if JSON
output isn't supported by the installed hermes CLI."""
try:
out = subprocess.run(
["hermes", "kanban", "list", "--tenant", tenant, "--json"],
capture_output=True, text=True, check=False,
)
if out.returncode == 0 and out.stdout.strip().startswith("["):
return json.loads(out.stdout)
except (FileNotFoundError, json.JSONDecodeError):
pass
# Fallback: textual parse of `hermes kanban list`
out = subprocess.run(
["hermes", "kanban", "list", "--tenant", tenant],
capture_output=True, text=True, check=False,
)
rows = []
for line in out.stdout.splitlines():
line = line.strip()
if not line or line.startswith("#") or "STATUS" in line.upper():
continue
parts = line.split()
if len(parts) >= 4 and parts[0].startswith("t_"):
rows.append({
"id": parts[0],
"status": parts[1] if len(parts) > 1 else "?",
"assignee": parts[2] if len(parts) > 2 else "?",
"title": " ".join(parts[3:]) if len(parts) > 3 else "",
"started_at": None,
"heartbeat_at": None,
"max_runtime_s": None,
})
return rows
def kanban_show(task_id: str) -> dict | None:
out = subprocess.run(
["hermes", "kanban", "show", task_id, "--json"],
capture_output=True, text=True, check=False,
)
if out.returncode != 0:
return None
try:
return json.loads(out.stdout)
except json.JSONDecodeError:
return None
def detect_issues(tasks: list[dict]) -> list[str]:
"""Return a list of issue strings, one per concern."""
now = datetime.now()
issues: list[str] = []
by_status = defaultdict(list)
for t in tasks:
by_status[t.get("status", "?")].append(t)
# Stuck tasks: RUNNING with no heartbeat in 2 min
for t in by_status.get("running", []) + by_status.get("RUNNING", []):
hb = t.get("heartbeat_at")
if not hb:
continue
try:
hb_dt = datetime.fromisoformat(str(hb).rstrip("Z"))
except ValueError:
continue
if now - hb_dt > timedelta(minutes=2):
issues.append(
f"STUCK: {t['id']} ({t.get('assignee', '?')}) — "
f"no heartbeat in {(now - hb_dt).total_seconds():.0f}s"
)
# Tasks exceeding max_runtime
for t in by_status.get("running", []) + by_status.get("RUNNING", []):
started = t.get("started_at")
max_rt = t.get("max_runtime_s")
if not started or not max_rt:
continue
try:
started_dt = datetime.fromisoformat(str(started).rstrip("Z"))
except ValueError:
continue
elapsed = (now - started_dt).total_seconds()
if elapsed > max_rt:
issues.append(
f"OVERTIME: {t['id']} ({t.get('assignee', '?')}) — "
f"running {elapsed:.0f}s, cap was {max_rt}s"
)
# Repeated retries
for t in tasks:
retries = t.get("retries", 0)
if retries and retries >= 2:
issues.append(
f"FLAPPING: {t['id']} ({t.get('assignee', '?')}) — "
f"retried {retries}× — fix root cause before next run"
)
return issues
def snapshot(tenant: str) -> tuple[list[dict], list[str]]:
tasks = kanban_list(tenant)
issues = detect_issues(tasks)
return tasks, issues
def print_snapshot(tasks: list[dict], issues: list[str]):
counts = defaultdict(int)
for t in tasks:
counts[str(t.get("status", "?")).lower()] += 1
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] "
f"Total: {len(tasks)} | "
+ " | ".join(f"{k}: {v}" for k, v in sorted(counts.items())))
for t in tasks:
bar = "" if str(t.get("status", "")).lower() == "done" else \
"" if str(t.get("status", "")).lower() == "running" else \
"·" if str(t.get("status", "")).lower() == "ready" else \
"" if str(t.get("status", "")).lower() == "failed" else "?"
print(f" {bar} {t.get('id', '?'):14} {t.get('assignee', '?'):20} "
f"{t.get('title', '')[:60]}")
if issues:
print("\n ⚠ ISSUES:", file=sys.stderr)
for i in issues:
print(f" {i}", file=sys.stderr)
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
ap.add_argument("--tenant", required=True,
help="Project tenant slug to monitor")
ap.add_argument("--interval", type=int, default=30,
help="Poll interval in seconds (default: 30)")
ap.add_argument("--once", action="store_true",
help="Print one snapshot and exit (no polling loop)")
args = ap.parse_args()
if not hermes_available():
print("ERROR: 'hermes' CLI not found in PATH", file=sys.stderr)
sys.exit(1)
if args.once:
tasks, issues = snapshot(args.tenant)
print_snapshot(tasks, issues)
sys.exit(0 if not issues else 2)
print(f"Monitoring tenant '{args.tenant}' every {args.interval}s. "
"Ctrl-C to exit.")
try:
while True:
tasks, issues = snapshot(args.tenant)
print_snapshot(tasks, issues)
time.sleep(args.interval)
except KeyboardInterrupt:
print("\nStopped.")
if __name__ == "__main__":
main()

View file

@ -1258,6 +1258,10 @@ class AIAgent:
# after each API call. Accessed by /usage slash command.
self._rate_limit_state: Optional["RateLimitState"] = None
# OpenRouter response cache hit counter — incremented when
# X-OpenRouter-Cache-Status: HIT is seen in streaming response headers.
self._or_cache_hits: int = 0
# Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
# both live under ~/.hermes/logs/. Idempotent, so gateway mode
# (which creates a new AIAgent per message) won't duplicate handlers.
@ -1421,11 +1425,8 @@ class AIAgent:
client_kwargs["args"] = self.acp_args
effective_base = base_url
if base_url_host_matches(effective_base, "openrouter.ai"):
client_kwargs["default_headers"] = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
from agent.auxiliary_client import build_or_headers
client_kwargs["default_headers"] = build_or_headers()
elif base_url_host_matches(effective_base, "api.routermint.com"):
client_kwargs["default_headers"] = _routermint_headers()
elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
@ -1473,17 +1474,49 @@ class AIAgent:
_env_hint = _pcfg.api_key_env_vars[0]
except Exception:
pass
# --- Init-time fallback (#17929) ---
_fb_entries = []
if isinstance(fallback_model, list):
_fb_entries = [
f for f in fallback_model
if isinstance(f, dict) and f.get("provider") and f.get("model")
]
elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"):
_fb_entries = [fallback_model]
_fb_resolved = False
for _fb in _fb_entries:
_fb_client, _fb_model = resolve_provider_client(
_fb["provider"], model=_fb["model"], raw_codex=True,
explicit_base_url=_fb.get("base_url"),
explicit_api_key=_fb.get("api_key"),
)
if _fb_client is not None:
self.provider = _fb["provider"]
self.model = _fb_model or _fb["model"]
self._fallback_activated = True
client_kwargs = {
"api_key": _fb_client.api_key,
"base_url": str(_fb_client.base_url),
}
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
client_kwargs["default_headers"] = dict(_fb_client._default_headers)
_fb_resolved = True
break
if not _fb_resolved:
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_env_hint} environment "
f"variable, or switch to a different provider with `hermes model`."
)
if not getattr(self, "_fallback_activated", False):
# No provider configured — reject with a clear message.
raise RuntimeError(
f"Provider '{_explicit}' is set in config.yaml but no API key "
f"was found. Set the {_env_hint} environment "
f"variable, or switch to a different provider with `hermes model`."
"No LLM provider configured. Run `hermes model` to "
"select a provider, or run `hermes setup` for first-time "
"configuration."
)
# No provider configured — reject with a clear message.
raise RuntimeError(
"No LLM provider configured. Run `hermes model` to "
"select a provider, or run `hermes setup` for first-time "
"configuration."
)
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
@ -1536,7 +1569,7 @@ class AIAgent:
else:
self._fallback_chain = []
self._fallback_index = 0
self._fallback_activated = False
self._fallback_activated = getattr(self, "_fallback_activated", False)
# Legacy attribute kept for backward compat (tests, external callers)
self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
if self._fallback_chain and not self.quiet_mode:
@ -4548,6 +4581,28 @@ class AIAgent:
"""Return the last captured RateLimitState, or None."""
return self._rate_limit_state
def _check_openrouter_cache_status(self, http_response: Any) -> None:
"""Read X-OpenRouter-Cache-Status from response headers and log it.
Increments ``_or_cache_hits`` on HIT so callers can report savings.
"""
if http_response is None:
return
headers = getattr(http_response, "headers", None)
if not headers:
return
try:
status = headers.get("x-openrouter-cache-status")
if not status:
return
if status.upper() == "HIT":
self._or_cache_hits += 1
logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits)
else:
logger.debug("OpenRouter response cache %s", status.upper())
except Exception:
pass # Never let header parsing break the agent loop
def get_activity_summary(self) -> dict:
"""Return a snapshot of the agent's current activity for diagnostics.
@ -6125,10 +6180,10 @@ class AIAgent:
return True
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers
if base_url_host_matches(base_url, "openrouter.ai"):
self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
self._client_kwargs["default_headers"] = build_or_headers()
elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
elif base_url_host_matches(base_url, "api.routermint.com"):
@ -6748,6 +6803,9 @@ class AIAgent:
# response via .response before any chunks are consumed.
self._capture_rate_limits(getattr(stream, "response", None))
# Log OpenRouter response cache status when present.
self._check_openrouter_cache_status(getattr(stream, "response", None))
content_parts: list = []
tool_calls_acc: dict = {}
tool_gen_notified: set = set()

View file

@ -46,6 +46,7 @@ AUTHOR_MAP = {
"leone.parise@gmail.com": "leoneparise",
"teknium@nousresearch.com": "teknium1",
"127238744+teknium1@users.noreply.github.com": "teknium1",
"159539633+MottledShadow@users.noreply.github.com": "MottledShadow",
"aludwin+gh@gmail.com": "adamludwin",
"2093036+exiao@users.noreply.github.com": "exiao",
"rylen.anil@gmail.com": "rylena",
@ -67,6 +68,7 @@ AUTHOR_MAP = {
"274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi",
"dejie.guo@gmail.com": "JayGwod",
"maxence@groine.fr": "MaxyMoos",
"61830395+leprincep35700@users.noreply.github.com": "leprincep35700",
# OpenViking viking_read salvage (April 2026)
"hitesh@gmail.com": "htsh",
"pty819@outlook.com": "pty819",
@ -370,6 +372,10 @@ AUTHOR_MAP = {
"xowiekk@gmail.com": "Xowiek",
"1243352777@qq.com": "zons-zhaozhy",
"e.silacandmr@gmail.com": "Es1la",
"h3057183414@gmail.com": "CoreyNoDream",
"franksong2702@gmail.com": "franksong2702",
"673088860@qq.com": "ambition0802",
"beibei1988@proton.me": "beibi9966",
# ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
# crossref, and GH contributor list matching (April 2026 audit) ──
"1115117931@qq.com": "aaronagent",
@ -500,6 +506,10 @@ AUTHOR_MAP = {
"michel.belleau@malaiwah.com": "malaiwah",
"gnanasekaran.sekareee@gmail.com": "gnanam1990",
"jz.pentest@gmail.com": "0xyg3n",
"7093928+0xyg3n@users.noreply.github.com": "0xyg3n",
"nftpoetrist@gmail.com": "nftpoetrist", # PR #18982
"millerc79@users.noreply.github.com": "millerc79", # PR #19033
"hermes@example.com": "shellybotmoyer", # PR #18915 (bot-committed)
"hypnosis.mda@gmail.com": "Hypn0sis",
"ywt000818@gmail.com": "OwenYWT",
"dhandhalyabhavik@gmail.com": "v1k22",
@ -668,6 +678,7 @@ AUTHOR_MAP = {
"web3blind@gmail.com": "web3blind",
"ztzheng@163.com": "chengoak", # PR #17467
"24110240104@m.fudan.edu.cn": "YuShu", # co-author only
"charliekerfoot@gmail.com": "CharlieKerfoot", # PR #18951
}

View file

@ -25,15 +25,15 @@
}
},
"node_modules/@cacheable/memory": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/@cacheable/memory/-/memory-2.0.7.tgz",
"integrity": "sha512-RbxnxAMf89Tp1dLhXMS7ceft/PGsDl1Ip7T20z5nZ+pwIAsQ1p2izPjVG69oCLv/jfQ7HDPHTWK0c9rcAWXN3A==",
"version": "2.0.8",
"resolved": "https://registry.npmjs.org/@cacheable/memory/-/memory-2.0.8.tgz",
"integrity": "sha512-FvEb29x5wVwu/Kf93IWwsOOEuhHh6dYCJF3vcKLzXc0KXIW181AOzv6ceT4ZpBHDvAfG60eqb+ekmrnLHIy+jw==",
"license": "MIT",
"dependencies": {
"@cacheable/utils": "^2.3.3",
"@keyv/bigmap": "^1.3.0",
"hookified": "^1.14.0",
"keyv": "^5.5.5"
"@cacheable/utils": "^2.4.0",
"@keyv/bigmap": "^1.3.1",
"hookified": "^1.15.1",
"keyv": "^5.6.0"
}
},
"node_modules/@cacheable/node-cache": {
@ -51,19 +51,19 @@
}
},
"node_modules/@cacheable/utils": {
"version": "2.3.4",
"resolved": "https://registry.npmjs.org/@cacheable/utils/-/utils-2.3.4.tgz",
"integrity": "sha512-knwKUJEYgIfwShABS1BX6JyJJTglAFcEU7EXqzTdiGCXur4voqkiJkdgZIQtWNFhynzDWERcTYv/sETMu3uJWA==",
"version": "2.4.1",
"resolved": "https://registry.npmjs.org/@cacheable/utils/-/utils-2.4.1.tgz",
"integrity": "sha512-eiFgzCbIneyMlLOmNG4g9xzF7Hv3Mga4LjxjcSC/ues6VYq2+gUbQI8JqNuw/ZM8tJIeIaBGpswAsqV2V7ApgA==",
"license": "MIT",
"dependencies": {
"hashery": "^1.3.0",
"hashery": "^1.5.1",
"keyv": "^5.6.0"
}
},
"node_modules/@emnapi/runtime": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz",
"integrity": "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==",
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
"integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
"license": "MIT",
"optional": true,
"peer": true,
@ -87,9 +87,9 @@
"license": "BSD-3-Clause"
},
"node_modules/@img/colour": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
"integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==",
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
"integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
"license": "MIT",
"peer": true,
"engines": {
@ -617,9 +617,9 @@
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
"integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/eventemitter": {
@ -645,9 +645,9 @@
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/path": {
@ -663,9 +663,9 @@
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
"integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
"license": "BSD-3-Clause"
},
"node_modules/@tokenizer/inflate": {
@ -714,25 +714,20 @@
"integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==",
"license": "MIT"
},
"node_modules/@types/long": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
"integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==",
"license": "MIT"
},
"node_modules/@types/node": {
"version": "25.3.1",
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.1.tgz",
"integrity": "sha512-hj9YIJimBCipHVfHKRMnvmHg+wfhKc0o4mTtXh9pKBjC8TLJzz0nzGmLi5UJsYAUgSvXFHgb0V2oY10DUFtImw==",
"version": "25.6.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz",
"integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
"license": "MIT",
"dependencies": {
"undici-types": "~7.18.0"
"undici-types": "~7.19.0"
}
},
"node_modules/@whiskeysockets/baileys": {
"name": "baileys",
"version": "7.0.0-rc.9",
"resolved": "git+ssh://git@github.com/WhiskeySockets/Baileys.git#01047debd81beb20da7b7779b08edcb06aa03770",
"integrity": "sha512-letWyB96JHD6NdqpAiseOfaUBi13u8AhiRcKSRqcVjc5Vw5xoPTZGvVnw8K/NvGBFAvyLJkwim9Mjvwzhx/SlA==",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
@ -807,9 +802,9 @@
}
},
"node_modules/body-parser": {
"version": "1.20.4",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz",
"integrity": "sha512-ZTgYYLMOXY9qKU/57FAo8F+HA2dGX7bqGc71txDRC1rS4frdFI5R7NhluHxH6M0YItAP0sHB4uqAOcYKxO6uGA==",
"version": "1.20.5",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.5.tgz",
"integrity": "sha512-3grm+/2tUOvu2cjJkvsIxrv/wVpfXQW4PsQHYm7yk4vfpu7Ekl6nEsYBoJUL6qDwZUx8wUhQ8tR2qz+ad9c9OA==",
"license": "MIT",
"dependencies": {
"bytes": "~3.1.2",
@ -820,7 +815,7 @@
"http-errors": "~2.0.1",
"iconv-lite": "~0.4.24",
"on-finished": "~2.4.1",
"qs": "~6.14.0",
"qs": "~6.15.1",
"raw-body": "~2.5.3",
"type-is": "~1.6.18",
"unpipe": "~1.0.0"
@ -830,6 +825,21 @@
"npm": "1.2.8000 || >= 1.4.16"
}
},
"node_modules/body-parser/node_modules/qs": {
"version": "6.15.1",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz",
"integrity": "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==",
"license": "BSD-3-Clause",
"dependencies": {
"side-channel": "^1.1.0"
},
"engines": {
"node": ">=0.6"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/bytes": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@ -840,16 +850,16 @@
}
},
"node_modules/cacheable": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/cacheable/-/cacheable-2.3.2.tgz",
"integrity": "sha512-w+ZuRNmex9c1TR9RcsxbfTKCjSL0rh1WA5SABbrWprIHeNBdmyQLSYonlDy9gpD+63XT8DgZ/wNh1Smvc9WnJA==",
"version": "2.3.4",
"resolved": "https://registry.npmjs.org/cacheable/-/cacheable-2.3.4.tgz",
"integrity": "sha512-djgxybDbw9fL/ZWMI3+CE8ZilNxcwFkVtDc1gJ+IlOSSWkSMPQabhV/XCHTQ6pwwN6aivXPZ43omTooZiX06Ew==",
"license": "MIT",
"dependencies": {
"@cacheable/memory": "^2.0.7",
"@cacheable/utils": "^2.3.3",
"@cacheable/memory": "^2.0.8",
"@cacheable/utils": "^2.4.0",
"hookified": "^1.15.0",
"keyv": "^5.5.5",
"qified": "^0.6.0"
"keyv": "^5.6.0",
"qified": "^0.9.0"
}
},
"node_modules/call-bind-apply-helpers": {
@ -1212,21 +1222,21 @@
}
},
"node_modules/hashery": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/hashery/-/hashery-1.5.0.tgz",
"integrity": "sha512-nhQ6ExaOIqti2FDWoEMWARUqIKyjr2VcZzXShrI+A3zpeiuPWzx6iPftt44LhP74E5sW36B75N6VHbvRtpvO6Q==",
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/hashery/-/hashery-1.5.1.tgz",
"integrity": "sha512-iZyKG96/JwPz1N55vj2Ie2vXbhu440zfUfJvSwEqEbeLluk7NnapfGqa7LH0mOsnDxTF85Mx8/dyR6HfqcbmbQ==",
"license": "MIT",
"dependencies": {
"hookified": "^1.14.0"
"hookified": "^1.15.0"
},
"engines": {
"node": ">=20"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
"integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
@ -1327,44 +1337,6 @@
"protobufjs": "6.8.8"
}
},
"node_modules/libsignal/node_modules/@types/node": {
"version": "10.17.60",
"resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.60.tgz",
"integrity": "sha512-F0KIgDJfy2nA3zMLmWGKxcH2ZVEtCZXHHdOQs2gSaQ27+lNeEfGxzkIw90aXswATX7AZ33tahPbzy6KAfUreVw==",
"license": "MIT"
},
"node_modules/libsignal/node_modules/long": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
"integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
"license": "Apache-2.0"
},
"node_modules/libsignal/node_modules/protobufjs": {
"version": "6.8.8",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.8.8.tgz",
"integrity": "sha512-AAmHtD5pXgZfi7GMpllpO3q1Xw1OYldr+dMUlAnffGTAhqkg72WdmSY71uKBF/JuyiKs8psYbtKrhi0ASCD8qw==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@types/long": "^4.0.0",
"@types/node": "^10.1.0",
"long": "^4.0.0"
},
"bin": {
"pbjs": "bin/pbjs",
"pbts": "bin/pbts"
}
},
"node_modules/long": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
@ -1372,9 +1344,9 @@
"license": "Apache-2.0"
},
"node_modules/lru-cache": {
"version": "11.2.6",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz",
"integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==",
"version": "11.3.5",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.5.tgz",
"integrity": "sha512-NxVFwLAnrd9i7KUBxC4DrUhmgjzOs+1Qm50D3oF1/oL+r1NpZ4gA7xvG0/zJ8evR7zIKn4vLf7qTNduWFtCrRw==",
"license": "BlueOak-1.0.0",
"engines": {
"node": "20 || >=22"
@ -1552,12 +1524,12 @@
}
},
"node_modules/p-queue": {
"version": "9.1.0",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.1.0.tgz",
"integrity": "sha512-O/ZPaXuQV29uSLbxWBGGZO1mCQXV2BLIwUr59JUU9SoH76mnYvtms7aafH/isNSNGwuEfP6W/4xD0/TJXxrizw==",
"version": "9.2.0",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.2.0.tgz",
"integrity": "sha512-dWgLE8AH0HjQ9fe74pUkKkvzzYT18Inp4zra3lKHnnwqGvcfcUBrvF2EAVX+envufDNBOzpPq/IBUONDbI7+3g==",
"license": "MIT",
"dependencies": {
"eventemitter3": "^5.0.1",
"eventemitter3": "^5.0.4",
"p-timeout": "^7.0.0"
},
"engines": {
@ -1648,22 +1620,22 @@
"license": "MIT"
},
"node_modules/protobufjs": {
"version": "7.5.4",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
"integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
"version": "7.5.6",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.4",
"@protobufjs/codegen": "^2.0.5",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.0",
"@protobufjs/inquire": "^1.1.1",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.0",
"@protobufjs/utf8": "^1.1.1",
"@types/node": ">=13.7.0",
"long": "^5.0.0"
},
@ -1685,17 +1657,23 @@
}
},
"node_modules/qified": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/qified/-/qified-0.6.0.tgz",
"integrity": "sha512-tsSGN1x3h569ZSU1u6diwhltLyfUWDp3YbFHedapTmpBl0B3P6U3+Qptg7xu+v+1io1EwhdPyyRHYbEw0KN2FA==",
"version": "0.9.1",
"resolved": "https://registry.npmjs.org/qified/-/qified-0.9.1.tgz",
"integrity": "sha512-n7mar4T0xQ+39dE2vGTAlbxUEpndwPANH0kDef1/MYsB8Bba9wshkybIRx74qgcvKQPEWErf9AqAdYjhzY2Ilg==",
"license": "MIT",
"dependencies": {
"hookified": "^1.14.0"
"hookified": "^2.1.1"
},
"engines": {
"node": ">=20"
}
},
"node_modules/qified/node_modules/hookified": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/hookified/-/hookified-2.2.0.tgz",
"integrity": "sha512-p/LgFzRN5FeoD3DLS6bkUapeye6E4SI6yJs6KetENd18S+FBthqYq2amJUWpt5z0EQwwHemidjY5OqJGEKm5uA==",
"license": "MIT"
},
"node_modules/qrcode-terminal": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/qrcode-terminal/-/qrcode-terminal-0.12.0.tgz",
@ -1922,13 +1900,13 @@
}
},
"node_modules/side-channel-list": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
"integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz",
"integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"object-inspect": "^1.13.3"
"object-inspect": "^1.13.4"
},
"engines": {
"node": ">= 0.4"
@ -2094,9 +2072,9 @@
}
},
"node_modules/undici-types": {
"version": "7.18.2",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
"integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
"version": "7.19.2",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz",
"integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==",
"license": "MIT"
},
"node_modules/unpipe": {
@ -2139,9 +2117,9 @@
"license": "MIT"
},
"node_modules/ws": {
"version": "8.19.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
"integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
"version": "8.20.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"

View file

@ -12,5 +12,8 @@
"express": "^4.21.0",
"qrcode-terminal": "^0.12.0",
"pino": "^9.0.0"
},
"overrides": {
"protobufjs": "^7.5.5"
}
}

View file

@ -178,9 +178,10 @@ class TestMcpRegistrationE2E:
complete_event = completions[0]
assert isinstance(complete_event, ToolCallProgress)
assert complete_event.status == "completed"
# rawOutput should contain the tool result string
assert complete_event.raw_output is not None
assert "hello" in str(complete_event.raw_output)
# Completion should contain human-readable output rather than forcing raw JSON panes.
assert complete_event.content
assert "hello" in complete_event.content[0].content.text
assert complete_event.raw_output is None
def test_patch_mode_tool_start_emits_diff_blocks_for_v4a_patch(self):
update = build_tool_start(

View file

@ -27,7 +27,10 @@ from acp.schema import (
SetSessionModeResponse,
SessionInfo,
TextContentBlock,
ToolCallProgress,
ToolCallStart,
Usage,
UsageUpdate,
UserMessageChunk,
)
from acp_adapter.server import HermesACPAgent, HERMES_VERSION
@ -200,6 +203,8 @@ class TestSessionOps:
"context",
"reset",
"compact",
"steer",
"queue",
"version",
]
model_cmd = next(
@ -208,6 +213,46 @@ class TestSessionOps:
assert model_cmd.input is not None
assert model_cmd.input.root.hint == "model name to switch to"
def test_build_usage_update_for_zed_context_indicator(self, agent, mock_manager):
state = mock_manager.create_session(cwd="/tmp")
state.history = [{"role": "user", "content": "hello"}]
state.agent.context_compressor = MagicMock(context_length=100_000)
state.agent._cached_system_prompt = "system"
state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
with patch(
"agent.model_metadata.estimate_request_tokens_rough",
return_value=25_000,
):
update = agent._build_usage_update(state)
assert isinstance(update, UsageUpdate)
assert update.session_update == "usage_update"
assert update.size == 100_000
assert update.used == 25_000
@pytest.mark.asyncio
async def test_send_usage_update_to_client(self, agent, mock_manager):
state = mock_manager.create_session(cwd="/tmp")
state.agent.context_compressor = MagicMock(context_length=100_000)
mock_conn = MagicMock(spec=acp.Client)
mock_conn.session_update = AsyncMock()
agent._conn = mock_conn
with patch(
"agent.model_metadata.estimate_request_tokens_rough",
return_value=25_000,
):
await agent._send_usage_update(state)
mock_conn.session_update.assert_awaited_once()
call = mock_conn.session_update.await_args
assert call.kwargs["session_id"] == state.session_id
update = call.kwargs["update"]
assert isinstance(update, UsageUpdate)
assert update.size == 100_000
assert update.used == 25_000
@pytest.mark.asyncio
async def test_cancel_sets_event(self, agent):
resp = await agent.new_session(cwd=".")
@ -238,11 +283,31 @@ class TestSessionOps:
{"role": "system", "content": "hidden system"},
{"role": "user", "content": "what controls the / slash commands?"},
{"role": "assistant", "content": "HermesACPAgent._ADVERTISED_COMMANDS controls them."},
{"role": "tool", "content": "tool output should not replay"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_search_1",
"type": "function",
"function": {
"name": "search_files",
"arguments": '{"pattern":"slash commands","path":"."}',
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_search_1",
"content": '{"total_count":1,"matches":[{"path":"cli.py","line":42,"content":"slash commands"}]}',
},
]
mock_conn.session_update.reset_mock()
resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
await asyncio.sleep(0)
await asyncio.sleep(0)
assert isinstance(resp, LoadSessionResponse)
calls = mock_conn.session_update.await_args_list
@ -257,6 +322,21 @@ class TestSessionOps:
assert isinstance(replay_calls[1].kwargs["update"], AgentMessageChunk)
assert replay_calls[1].kwargs["update"].content.text.startswith("HermesACPAgent")
tool_updates = [
call.kwargs["update"]
for call in calls
if getattr(call.kwargs.get("update"), "session_update", None)
in {"tool_call", "tool_call_update"}
]
assert len(tool_updates) == 2
assert isinstance(tool_updates[0], ToolCallStart)
assert tool_updates[0].tool_call_id == "call_search_1"
assert tool_updates[0].title == "search: slash commands"
assert isinstance(tool_updates[1], ToolCallProgress)
assert tool_updates[1].tool_call_id == "call_search_1"
assert "Search results" in tool_updates[1].content[0].content.text
assert "cli.py:42" in tool_updates[1].content[0].content.text
@pytest.mark.asyncio
async def test_resume_session_replays_persisted_history_to_client(self, agent):
mock_conn = MagicMock(spec=acp.Client)
@ -269,6 +349,8 @@ class TestSessionOps:
mock_conn.session_update.reset_mock()
resp = await agent.resume_session(cwd="/tmp", session_id=new_resp.session_id)
await asyncio.sleep(0)
await asyncio.sleep(0)
assert isinstance(resp, ResumeSessionResponse)
updates = [call.kwargs["update"] for call in mock_conn.session_update.await_args_list]
@ -278,6 +360,27 @@ class TestSessionOps:
for update in updates
)
@pytest.mark.asyncio
async def test_load_session_schedules_history_replay_after_response(self, agent):
"""Zed only attaches replayed updates after session/load has completed."""
new_resp = await agent.new_session(cwd="/tmp")
state = agent.session_manager.get_session(new_resp.session_id)
state.history = [{"role": "user", "content": "hello from history"}]
events = []
async def replay_after_response(_state):
events.append("replay")
with patch.object(agent, "_replay_session_history", side_effect=replay_after_response):
resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
events.append("returned")
assert isinstance(resp, LoadSessionResponse)
assert events == ["returned"]
await asyncio.sleep(0)
await asyncio.sleep(0)
assert events == ["returned", "replay"]
@pytest.mark.asyncio
async def test_resume_session_creates_new_if_missing(self, agent):
resume_resp = await agent.resume_session(cwd="/tmp", session_id="nonexistent")
@ -522,6 +625,11 @@ class TestPrompt:
assert isinstance(resp, PromptResponse)
assert resp.stop_reason == "end_turn"
state.agent.run_conversation.assert_called_once()
assert state.agent.tool_progress_callback is not None
assert state.agent.step_callback is not None
assert state.agent.stream_delta_callback is not None
assert state.agent.reasoning_callback is not None
assert state.agent.thinking_callback is None
@pytest.mark.asyncio
async def test_prompt_updates_history(self, agent):
@ -565,12 +673,40 @@ class TestPrompt:
prompt = [TextContentBlock(type="text", text="help me")]
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
# session_update should have been called with the final message
# session_update should include the final message (usage_update may follow it)
mock_conn.session_update.assert_called()
# Get the last call's update argument
last_call = mock_conn.session_update.call_args_list[-1]
update = last_call[1].get("update") or last_call[0][1]
assert update.session_update == "agent_message_chunk"
updates = [
call.kwargs.get("update") or call.args[1]
for call in mock_conn.session_update.call_args_list
]
assert any(update.session_update == "agent_message_chunk" for update in updates)
@pytest.mark.asyncio
async def test_prompt_does_not_duplicate_streamed_final_message(self, agent):
"""If ACP already streamed response chunks, final_response should not be sent again."""
new_resp = await agent.new_session(cwd=".")
state = agent.session_manager.get_session(new_resp.session_id)
def mock_run(*args, **kwargs):
state.agent.stream_delta_callback("streamed answer")
return {"final_response": "streamed answer", "messages": []}
state.agent.run_conversation = mock_run
mock_conn = MagicMock(spec=acp.Client)
mock_conn.session_update = AsyncMock()
agent._conn = mock_conn
prompt = [TextContentBlock(type="text", text="hello")]
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
updates = [
call.kwargs.get("update") or call.args[1]
for call in mock_conn.session_update.call_args_list
]
agent_chunks = [update for update in updates if update.session_update == "agent_message_chunk"]
assert len(agent_chunks) == 1
assert agent_chunks[0].content.text == "streamed answer"
@pytest.mark.asyncio
async def test_prompt_auto_titles_session(self, agent):
@ -708,6 +844,43 @@ class TestSlashCommands:
assert "2 messages" in result
assert "user: 1" in result
def test_context_shows_usage_and_compression_threshold(self, agent, mock_manager):
state = self._make_state(mock_manager)
state.history = [{"role": "user", "content": "hello"}]
state.agent.context_compressor = MagicMock(
context_length=100_000,
threshold_tokens=80_000,
)
state.agent._cached_system_prompt = "system"
state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
with patch(
"agent.model_metadata.estimate_request_tokens_rough",
return_value=25_000,
):
result = agent._handle_slash_command("/context", state)
assert "Context usage: ~25,000 / 100,000 tokens (25.0%)" in result
assert "Compression: ~55,000 tokens until threshold (~80,000, 80%)" in result
assert "Tip: run /compact" in result
def test_context_says_compression_due_when_past_threshold(self, agent, mock_manager):
state = self._make_state(mock_manager)
state.history = [{"role": "user", "content": "hello"}]
state.agent.context_compressor = MagicMock(
context_length=100_000,
threshold_tokens=80_000,
)
with patch(
"agent.model_metadata.estimate_request_tokens_rough",
return_value=82_000,
):
result = agent._handle_slash_command("/context", state)
assert "Context usage: ~82,000 / 100,000 tokens (82.0%)" in result
assert "Compression: due now (threshold ~80,000, 80%). Run /compact." in result
def test_reset_clears_history(self, agent, mock_manager):
state = self._make_state(mock_manager)
state.history = [{"role": "user", "content": "hello"}]
@ -787,7 +960,12 @@ class TestSlashCommands:
resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
assert resp.stop_reason == "end_turn"
mock_conn.session_update.assert_called_once()
updates = [
call.kwargs.get("update") or call.args[1]
for call in mock_conn.session_update.call_args_list
]
assert any(update.session_update == "agent_message_chunk" for update in updates)
assert any(update.session_update == "usage_update" for update in updates)
@pytest.mark.asyncio
async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager):

View file

@ -52,6 +52,12 @@ class TestToolKindMap:
def test_tool_kind_execute_code(self):
assert get_tool_kind("execute_code") == "execute"
def test_tool_kind_todo(self):
assert get_tool_kind("todo") == "other"
def test_tool_kind_skill_view(self):
assert get_tool_kind("skill_view") == "read"
def test_tool_kind_browser_navigate(self):
assert get_tool_kind("browser_navigate") == "fetch"
@ -110,6 +116,25 @@ class TestBuildToolTitle:
title = build_tool_title("web_search", {"query": "python asyncio"})
assert "python asyncio" in title
def test_skill_view_title_includes_skill_name(self):
title = build_tool_title("skill_view", {"name": "github-pitfalls"})
assert title == "skill view (github-pitfalls)"
def test_skill_view_title_includes_linked_file(self):
title = build_tool_title("skill_view", {"name": "github-pitfalls", "file_path": "references/api.md"})
assert title == "skill view (github-pitfalls/references/api.md)"
def test_execute_code_title_includes_first_code_line(self):
title = build_tool_title("execute_code", {"code": "\nfrom hermes_tools import terminal\nprint('done')"})
assert title == "python: from hermes_tools import terminal"
def test_skill_manage_title_includes_action_and_target(self):
title = build_tool_title(
"skill_manage",
{"action": "patch", "name": "hermes-agent-operations", "file_path": "references/acp.md"},
)
assert title == "skill patch: hermes-agent-operations/references/acp.md"
def test_unknown_tool_uses_name(self):
title = build_tool_title("some_new_tool", {"foo": "bar"})
assert title == "some_new_tool"
@ -164,15 +189,23 @@ class TestBuildToolStart:
assert "ls -la /tmp" in text
def test_build_tool_start_for_read_file(self):
"""read_file should include the path in content."""
"""read_file start should stay compact; completion carries file contents."""
args = {"path": "/etc/hosts", "offset": 1, "limit": 50}
result = build_tool_start("tc-3", "read_file", args)
assert isinstance(result, ToolCallStart)
assert result.kind == "read"
assert len(result.content) >= 1
content_item = result.content[0]
assert isinstance(content_item, ContentToolCallContent)
assert "/etc/hosts" in content_item.content.text
assert result.content is None
assert result.raw_input is None
def test_build_tool_start_for_web_extract_is_compact(self):
"""web_extract start should stay compact; title identifies URLs."""
args = {"urls": ["https://example.com/docs"]}
result = build_tool_start("tc-web-start", "web_extract", args)
assert isinstance(result, ToolCallStart)
assert result.title == "extract: https://example.com/docs"
assert result.kind == "fetch"
assert result.content is None
assert result.raw_input is None
def test_build_tool_start_for_search(self):
"""search_files should include pattern in content."""
@ -181,6 +214,48 @@ class TestBuildToolStart:
assert isinstance(result, ToolCallStart)
assert result.kind == "search"
assert "TODO" in result.content[0].content.text
assert result.raw_input is None
def test_build_tool_start_for_todo_is_human_readable(self):
args = {"todos": [{"id": "one", "content": "Fix ACP rendering", "status": "in_progress"}]}
result = build_tool_start("tc-todo", "todo", args)
assert result.title == "todo (1 item)"
assert "Fix ACP rendering" in result.content[0].content.text
assert result.raw_input is None
def test_build_tool_start_for_skill_view_is_human_readable(self):
result = build_tool_start("tc-skill", "skill_view", {"name": "github-pitfalls"})
assert result.title == "skill view (github-pitfalls)"
assert "github-pitfalls" in result.content[0].content.text
assert result.raw_input is None
def test_build_tool_start_for_execute_code_shows_code_preview(self):
result = build_tool_start("tc-code", "execute_code", {"code": "print('hello')"})
assert result.kind == "execute"
assert result.title == "python: print('hello')"
assert "```python" in result.content[0].content.text
assert "print('hello')" in result.content[0].content.text
assert result.raw_input is None
def test_build_tool_start_for_skill_manage_patch_shows_diff(self):
result = build_tool_start(
"tc-skill-manage",
"skill_manage",
{
"action": "patch",
"name": "hermes-agent-operations",
"file_path": "references/acp.md",
"old_string": "old advice",
"new_string": "new advice",
},
)
assert result.kind == "edit"
assert result.title == "skill patch: hermes-agent-operations/references/acp.md"
assert isinstance(result.content[0], FileEditToolCallContent)
assert result.content[0].path == "skills/hermes-agent-operations/references/acp.md"
assert result.content[0].old_text == "old advice"
assert result.content[0].new_text == "new advice"
assert result.raw_input is None
def test_build_tool_start_generic_fallback(self):
"""Unknown tools should get a generic text representation."""
@ -205,6 +280,158 @@ class TestBuildToolComplete:
content_item = result.content[0]
assert isinstance(content_item, ContentToolCallContent)
assert "total 42" in content_item.content.text
assert result.raw_output is None
def test_build_tool_complete_for_todo_is_checklist(self):
result = build_tool_complete(
"tc-todo",
"todo",
'{"todos":[{"id":"a","content":"Inspect ACP","status":"completed"},{"id":"b","content":"Patch renderers","status":"in_progress"}],"summary":{"total":2,"pending":0,"in_progress":1,"completed":1,"cancelled":0}}',
)
text = result.content[0].content.text
assert "✅ Inspect ACP" in text
assert "- 🔄 Patch renderers" in text
assert "**Progress:** 1 completed, 1 in progress, 0 pending" in text
assert result.raw_output is None
def test_build_tool_complete_for_skill_view_summarizes_content_without_raw_json(self):
result = build_tool_complete(
"tc-skill",
"skill_view",
'{"success":true,"name":"github-pitfalls","description":"GitHub gotchas","content":"# GitHub Pitfalls\\nUse gh carefully.","path":"github/github-pitfalls/SKILL.md"}',
)
text = result.content[0].content.text
assert "**Skill loaded**" in text
assert "`github-pitfalls`" in text
assert "GitHub gotchas" in text
assert "GitHub Pitfalls" in text
assert "Use gh carefully" not in text
assert "Full skill content is available to the agent" in text
assert result.raw_output is None
def test_build_tool_complete_for_execute_code_formats_output(self):
result = build_tool_complete("tc-code", "execute_code", '{"output":"hello\\n","exit_code":0}')
text = result.content[0].content.text
assert "Exit code: 0" in text
assert "hello" in text
assert result.raw_output is None
def test_build_tool_complete_for_skill_manage_summarizes_without_raw_json(self):
result = build_tool_complete(
"tc-skill-manage",
"skill_manage",
'{"success":true,"message":"Patched references/hermes-acp-zed-rendering.md in skill \'hermes-agent-operations\' (1 replacement)."}',
function_args={
"action": "patch",
"name": "hermes-agent-operations",
"file_path": "references/hermes-acp-zed-rendering.md",
},
)
text = result.content[0].content.text
assert "**✅ Skill updated**" in text
assert "`patch`" in text
assert "`hermes-agent-operations`" in text
assert "references/hermes-acp-zed-rendering.md" in text
assert "{\"success\"" not in text
assert result.raw_output is None
def test_build_tool_complete_for_read_file_formats_content(self):
result = build_tool_complete(
"tc-read",
"read_file",
'{"content":"1|hello\\n2|world","total_lines":2}',
function_args={"path":"README.md","offset":1,"limit":20},
)
text = result.content[0].content.text
assert "Read README.md" in text
assert "```\n1|hello\n2|world\n```" in text
assert result.raw_output is None
def test_build_tool_complete_for_search_files_formats_matches(self):
result = build_tool_complete(
"tc-search",
"search_files",
'{"total_count":2,"matches":[{"path":"README.md","line":3,"content":"TODO: fix this"},{"path":"src/app.py","line":9,"content":"needle"}],"truncated":true}\n\n[Hint: Results truncated. Use offset=12 to see more.]',
)
text = result.content[0].content.text
assert "Search results" in text
assert "Found 2 matches" in text
assert "README.md:3" in text
assert "TODO: fix this" in text
assert "Results truncated" in text
assert result.raw_output is None
def test_build_tool_complete_for_process_list_formats_table(self):
result = build_tool_complete(
"tc-process",
"process",
'{"processes":[{"session_id":"p1","status":"running","pid":123,"command":"npm run dev"}]}',
function_args={"action":"list"},
)
text = result.content[0].content.text
assert "Processes: 1" in text
assert "`p1`" in text
assert "npm run dev" in text
assert result.raw_output is None
def test_build_tool_complete_for_delegate_task_summarizes_children(self):
result = build_tool_complete(
"tc-delegate",
"delegate_task",
'{"results":[{"task_index":0,"status":"completed","summary":"Reviewed ACP rendering.","model":"gpt-5.5","duration_seconds":3.2,"tool_trace":[{"tool":"read_file"}]}],"total_duration_seconds":3.4}',
)
text = result.content[0].content.text
assert "Delegation results: 1 task" in text
assert "Reviewed ACP rendering" in text
assert "gpt-5.5" in text
assert "Tools: read_file" in text
assert result.raw_output is None
def test_build_tool_complete_for_session_search_recent(self):
result = build_tool_complete(
"tc-session",
"session_search",
'{"success":true,"mode":"recent","results":[{"session_id":"s1","title":"ACP work","last_active":"2026-05-02","message_count":12,"preview":"Polished tool rendering."}],"count":1}',
)
text = result.content[0].content.text
assert "Recent sessions" in text
assert "ACP work" in text
assert "Polished tool rendering" in text
assert result.raw_output is None
def test_build_tool_complete_for_memory_avoids_dumping_entries(self):
result = build_tool_complete(
"tc-memory",
"memory",
'{"success":true,"target":"user","entries":["private long memory"],"usage":"1% — 19/2000 chars","entry_count":1,"message":"Entry added."}',
function_args={"action":"add","target":"user","content":"User likes concise ACP rendering."},
)
text = result.content[0].content.text
assert "Memory add saved" in text
assert "User likes concise ACP rendering" in text
assert "private long memory" not in text
assert result.raw_output is None
def test_build_tool_complete_for_web_extract_success_stays_compact(self):
result = build_tool_complete(
"tc-web-extract",
"web_extract",
'{"results":[{"url":"https://example.com","title":"Example","content":"# Intro\\nThis is extracted content."}]}',
)
assert result.content is None
assert result.raw_output is None
def test_build_tool_complete_for_web_extract_error_shows_error(self):
result = build_tool_complete(
"tc-web-extract-error",
"web_extract",
'{"results":[{"url":"https://example.com","title":"Example","error":"timeout"}]}',
)
text = result.content[0].content.text
assert "Web extract failed" in text
assert "https://example.com" in text
assert "timeout" in text
assert result.raw_output is None
def test_build_tool_complete_truncates_large_output(self):
"""Very large outputs should be truncated."""

View file

@ -1836,3 +1836,55 @@ class TestResolveMessagesMaxTokens:
result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
assert result > 0
assert result != 0
# ---------------------------------------------------------------------------
# convert_tools_to_anthropic — tool dedup at API boundary
# ---------------------------------------------------------------------------
class TestConvertToolsToAnthropicDedup:
"""convert_tools_to_anthropic must deduplicate tool names.
Anthropic rejects requests with duplicate tool names. This guard converts
a hard failure into a warning log. See:
https://github.com/NousResearch/hermes-agent/issues/18478
"""
def _make_openai_tool(self, name: str) -> dict:
return {
"type": "function",
"function": {
"name": name,
"description": f"Tool {name}",
"parameters": {"type": "object", "properties": {}},
},
}
def test_unique_tools_pass_through(self):
tools = [self._make_openai_tool("alpha"), self._make_openai_tool("beta")]
result = convert_tools_to_anthropic(tools)
assert len(result) == 2
names = [t["name"] for t in result]
assert names == ["alpha", "beta"]
def test_duplicate_tool_names_are_deduplicated(self):
"""RED test — must fail until dedup guard is added."""
tools = [
self._make_openai_tool("lcm_grep"),
self._make_openai_tool("lcm_describe"),
self._make_openai_tool("lcm_grep"), # duplicate
self._make_openai_tool("lcm_expand"),
self._make_openai_tool("lcm_describe"), # duplicate
]
result = convert_tools_to_anthropic(tools)
names = [t["name"] for t in result]
assert len(names) == len(set(names)), (
f"Duplicate tool names found: {names}"
)
assert len(result) == 3 # lcm_grep, lcm_describe, lcm_expand
def test_empty_tools_returns_empty(self):
assert convert_tools_to_anthropic([]) == []
def test_none_tools_returns_empty(self):
assert convert_tools_to_anthropic(None) == []

View file

@ -16,6 +16,7 @@ from agent.auxiliary_client import (
auxiliary_max_tokens_param,
call_llm,
async_call_llm,
_build_call_kwargs,
_read_codex_access_token,
_get_provider_chain,
_is_payment_error,
@ -1752,3 +1753,143 @@ class TestVisionAutoSkipsKimiCoding:
"kimi-coding",
"kimi-coding-cn",
})
# ---------------------------------------------------------------------------
# _build_call_kwargs — tool dedup at API boundary
# ---------------------------------------------------------------------------
class TestBuildCallKwargsToolDedup:
"""_build_call_kwargs must deduplicate tool names before passing to API.
Providers like Google Vertex, Azure, and Bedrock reject requests with
duplicate tool names (HTTP 400). This guard converts a hard failure into
a warning log so agent turns succeed even if an upstream injection path
regresses. See: https://github.com/NousResearch/hermes-agent/issues/18478
"""
def _make_tool(self, name: str) -> dict:
return {
"type": "function",
"function": {
"name": name,
"description": f"Tool {name}",
"parameters": {"type": "object", "properties": {}},
},
}
def test_unique_tools_pass_through_unchanged(self):
tools = [self._make_tool("alpha"), self._make_tool("beta")]
kwargs = _build_call_kwargs(
provider="openai", model="gpt-4o", messages=[], tools=tools,
)
assert len(kwargs["tools"]) == 2
names = [t["function"]["name"] for t in kwargs["tools"]]
assert names == ["alpha", "beta"]
def test_duplicate_tool_names_are_deduplicated(self):
"""RED test — must fail until dedup guard is added."""
tools = [
self._make_tool("lcm_grep"),
self._make_tool("lcm_describe"),
self._make_tool("lcm_grep"), # duplicate
self._make_tool("lcm_expand"),
self._make_tool("lcm_describe"), # duplicate
]
kwargs = _build_call_kwargs(
provider="google", model="gemini-2.5-pro", messages=[], tools=tools,
)
result_tools = kwargs["tools"]
names = [t["function"]["name"] for t in result_tools]
# Must be deduplicated — no repeated names
assert len(names) == len(set(names)), (
f"Duplicate tool names found: {names}"
)
assert len(result_tools) == 3 # lcm_grep, lcm_describe, lcm_expand
def test_empty_tools_unchanged(self):
kwargs = _build_call_kwargs(
provider="openai", model="gpt-4o", messages=[], tools=[],
)
assert kwargs.get("tools") == [] or "tools" not in kwargs
def test_none_tools_unchanged(self):
kwargs = _build_call_kwargs(
provider="openai", model="gpt-4o", messages=[], tools=None,
)
assert "tools" not in kwargs
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
"""Strip provider env vars so each test starts clean."""
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
):
monkeypatch.delenv(key, raising=False)
class TestOpenRouterExplicitApiKey:
"""Test that explicit_api_key is correctly propagated to _try_openrouter()."""
def test_resolve_provider_client_passes_explicit_api_key_to_openrouter(
self, monkeypatch
):
"""
When resolve_provider_client() is called with explicit_api_key for OpenRouter,
the explicit key should be passed to the OpenAI client instead of falling back
to OPENROUTER_API_KEY env var.
"""
# Set up env var as fallback (should NOT be used when explicit_api_key is provided)
monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
# Mock OpenAI to capture the api_key used
mock_openai = MagicMock()
mock_openai.return_value = MagicMock(name="openrouter-client")
with patch("agent.auxiliary_client.OpenAI", mock_openai):
client, model = resolve_provider_client(
provider="openrouter",
explicit_api_key="explicit-pool-key",
)
# Verify a client was created
assert client is not None
# Verify the explicit key was used, not the env var fallback
mock_openai.assert_called_once()
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "explicit-pool-key", (
f"Expected explicit_api_key to be passed, got: {call_kwargs['api_key']}"
)
assert call_kwargs["api_key"] != "env-fallback-key", (
"Should NOT fall back to OPENROUTER_API_KEY when explicit_api_key is provided"
)
def test_resolve_provider_client_without_explicit_api_key_falls_back_to_env(
self, monkeypatch
):
"""
When resolve_provider_client() is called WITHOUT explicit_api_key for OpenRouter,
it should fall back to OPENROUTER_API_KEY env var.
"""
# Set up env var as fallback (should be used when explicit_api_key is NOT provided)
monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
# Mock OpenAI to capture the api_key used
mock_openai = MagicMock()
mock_openai.return_value = MagicMock(name="openrouter-client")
with patch("agent.auxiliary_client.OpenAI", mock_openai):
client, model = resolve_provider_client(
provider="openrouter",
explicit_api_key=None,
)
# Verify a client was created
assert client is not None
# Verify the env var fallback was used
mock_openai.assert_called_once()
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "env-fallback-key", (
f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}"
)

View file

@ -348,6 +348,64 @@ def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
assert entry.access_token == "sk-or-seeded"
def test_load_pool_prefers_dotenv_over_stale_os_environ(tmp_path, monkeypatch):
"""Regression for #18254: stale OPENROUTER_API_KEY in os.environ (inherited
from a parent shell) must NOT shadow the fresh key in ~/.hermes/.env when
seeding the credential pool. Before the fix, `get_env_value()` preferred
os.environ and silently wrote the stale value into auth.json, causing
persistent 401 errors after key rotation.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Simulate the bug: parent shell exported a stale test key
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-STALE-from-shell")
# User edited ~/.hermes/.env with the fresh key
(hermes_home / ".env").write_text(
"OPENROUTER_API_KEY=sk-or-FRESH-from-dotenv\n"
)
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
from agent.credential_pool import load_pool
pool = load_pool("openrouter")
entry = pool.select()
assert entry is not None
assert entry.source == "env:OPENROUTER_API_KEY"
# The fresh key from .env must win over the stale shell export
assert entry.access_token == "sk-or-FRESH-from-dotenv", (
f"Expected .env to win, got {entry.access_token!r}"
)
def test_load_pool_falls_back_to_os_environ_when_dotenv_empty(tmp_path, monkeypatch):
"""When ~/.hermes/.env does not define OPENROUTER_API_KEY (typical Docker /
K8s / systemd deployment), seeding must still pick up the key from
os.environ. Guards against regressions that would break production
deployments relying on runtime-injected env vars.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-from-runtime-env")
# .env exists but does not define OPENROUTER_API_KEY
(hermes_home / ".env").write_text("SOME_OTHER_VAR=unrelated\n")
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
from agent.credential_pool import load_pool
pool = load_pool("openrouter")
entry = pool.select()
assert entry is not None
assert entry.access_token == "sk-or-from-runtime-env"
def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)

View file

@ -314,3 +314,281 @@ def test_dry_run_skips_snapshot(backup_env, monkeypatch):
assert not any(r.get("reason") == "pre-curator-run" for r in rows), (
"dry-run must not create a pre-run snapshot"
)
# ---------------------------------------------------------------------------
# cron-jobs backup + rollback (the part issue #18671's follow-up adds)
# ---------------------------------------------------------------------------
def _write_cron_jobs(home: Path, jobs: list) -> Path:
"""Write a synthetic cron/jobs.json under HERMES_HOME. Returns the path.
Mirrors cron.jobs.save_jobs() wrapper shape: `{"jobs": [...], "updated_at": ...}`.
"""
cron_dir = home / "cron"
cron_dir.mkdir(parents=True, exist_ok=True)
path = cron_dir / "jobs.json"
path.write_text(
json.dumps({"jobs": jobs, "updated_at": "2026-05-01T00:00:00Z"}, indent=2),
encoding="utf-8",
)
return path
def _reload_cron_jobs(home: Path):
"""Reload cron.jobs so its module-level HERMES_DIR picks up the tmp HOME."""
import hermes_constants
importlib.reload(hermes_constants)
if "cron.jobs" in sys.modules:
import cron.jobs as _cj
importlib.reload(_cj)
else:
import cron.jobs as _cj # noqa: F401
import cron.jobs as cj
return cj
def test_snapshot_includes_cron_jobs(backup_env):
"""With a cron/jobs.json present, snapshot writes cron-jobs.json and records it in manifest."""
cb = backup_env["cb"]
_write_skill(backup_env["skills"], "alpha")
_write_cron_jobs(backup_env["home"], [
{"id": "job-a", "name": "a", "schedule": "every 1h", "skills": ["alpha"]},
{"id": "job-b", "name": "b", "schedule": "every 2h", "skill": "alpha"},
])
snap = cb.snapshot_skills(reason="test")
assert snap is not None
assert (snap / cb.CRON_JOBS_FILENAME).exists()
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
assert mf["cron_jobs"]["backed_up"] is True
assert mf["cron_jobs"]["jobs_count"] == 2
def test_snapshot_without_cron_jobs_file_still_succeeds(backup_env):
"""No cron/jobs.json on disk → snapshot succeeds, manifest records absence."""
cb = backup_env["cb"]
_write_skill(backup_env["skills"], "alpha")
# Deliberately do not create ~/.hermes/cron/jobs.json
snap = cb.snapshot_skills(reason="test")
assert snap is not None
assert not (snap / cb.CRON_JOBS_FILENAME).exists()
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
assert mf["cron_jobs"]["backed_up"] is False
assert "cron/jobs.json" in mf["cron_jobs"]["reason"]
def test_snapshot_cron_jobs_malformed_json_still_captured(backup_env):
"""Malformed jobs.json is still copied to the snapshot (fidelity over
validation); the manifest notes the parse warning."""
cb = backup_env["cb"]
_write_skill(backup_env["skills"], "alpha")
(backup_env["home"] / "cron").mkdir()
(backup_env["home"] / "cron" / "jobs.json").write_text("{oh no", encoding="utf-8")
snap = cb.snapshot_skills(reason="test")
assert snap is not None
# Raw file was copied even though we couldn't parse it
assert (snap / cb.CRON_JOBS_FILENAME).read_text() == "{oh no"
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
assert mf["cron_jobs"]["backed_up"] is True
assert mf["cron_jobs"]["jobs_count"] == 0
assert "parse_warning" in mf["cron_jobs"]
def test_rollback_restores_cron_skill_links(backup_env):
"""End-to-end: snapshot with job [alpha,beta], curator-style in-place
rewrite to [umbrella], then rollback skills restored to [alpha,beta]."""
cb = backup_env["cb"]
home = backup_env["home"]
_write_skill(backup_env["skills"], "alpha")
_write_skill(backup_env["skills"], "beta")
_write_skill(backup_env["skills"], "umbrella")
cj = _reload_cron_jobs(home)
cj.create_job(name="weekly", prompt="p", schedule="every 7d",
skills=["alpha", "beta"])
snap = cb.snapshot_skills(reason="pre-curator-run")
assert snap is not None
# Simulate the curator's in-place cron rewrite after consolidation
cj.rewrite_skill_refs(
consolidated={"alpha": "umbrella", "beta": "umbrella"},
pruned=[],
)
live_after_curator = cj.load_jobs()
assert live_after_curator[0]["skills"] == ["umbrella"]
# Now roll back
ok, msg, _ = cb.rollback(backup_id=snap.name)
assert ok, msg
assert "cron links" in msg
live_after_rollback = cj.load_jobs()
# skills restored; legacy `skill` mirror follows first element
assert live_after_rollback[0]["skills"] == ["alpha", "beta"]
def test_rollback_only_touches_skill_fields(backup_env):
"""Every field other than skills/skill must remain untouched across rollback.
Schedule, enabled, prompt, timestamps all live state, hands off."""
cb = backup_env["cb"]
home = backup_env["home"]
_write_skill(backup_env["skills"], "alpha")
# Hand-rolled jobs.json with varied fields (no real create_job — we want
# exact field control).
_write_cron_jobs(home, [{
"id": "stable-id",
"name": "original-name",
"prompt": "original prompt",
"schedule": "every 1h",
"skills": ["alpha"],
"enabled": True,
"last_run_at": "2026-04-01T00:00:00Z",
}])
snap = cb.snapshot_skills(reason="pre-curator-run")
assert snap is not None
# User/scheduler activity AFTER the snapshot: rename the job, change
# the schedule, update timestamps, and (curator) rewrite the skills list.
cj = _reload_cron_jobs(home)
jobs = cj.load_jobs()
jobs[0]["name"] = "renamed-since-snapshot"
jobs[0]["schedule"] = "every 30m"
jobs[0]["last_run_at"] = "2026-05-01T12:00:00Z"
jobs[0]["skills"] = ["umbrella"] # pretend curator did this
cj.save_jobs(jobs)
ok, _, _ = cb.rollback(backup_id=snap.name)
assert ok
after = cj.load_jobs()
job = after[0]
# skills: restored
assert job["skills"] == ["alpha"]
# everything else: untouched (live state preserved)
assert job["name"] == "renamed-since-snapshot"
assert job["schedule"] == "every 30m"
assert job["last_run_at"] == "2026-05-01T12:00:00Z"
assert job["prompt"] == "original prompt"
def test_rollback_skips_jobs_the_user_deleted(backup_env):
"""If the user deleted a cron job after the snapshot, rollback must
NOT resurrect it the user's delete is a later, explicit choice."""
cb = backup_env["cb"]
home = backup_env["home"]
_write_skill(backup_env["skills"], "alpha")
_write_cron_jobs(home, [
{"id": "keep-me", "name": "keep", "schedule": "every 1h", "skills": ["alpha"]},
{"id": "delete-me", "name": "gone", "schedule": "every 1h", "skills": ["alpha"]},
])
snap = cb.snapshot_skills(reason="pre-curator-run")
# User deletes one job after the snapshot
cj = _reload_cron_jobs(home)
cj.save_jobs([j for j in cj.load_jobs() if j["id"] != "delete-me"])
ok, _, _ = cb.rollback(backup_id=snap.name)
assert ok
live_after = cj.load_jobs()
live_ids = {j["id"] for j in live_after}
assert "keep-me" in live_ids
assert "delete-me" not in live_ids # not resurrected
def test_rollback_leaves_new_jobs_untouched(backup_env):
"""Jobs created AFTER the snapshot must pass through rollback unchanged."""
cb = backup_env["cb"]
home = backup_env["home"]
_write_skill(backup_env["skills"], "alpha")
_write_cron_jobs(home, [
{"id": "original", "name": "o", "schedule": "every 1h", "skills": ["alpha"]},
])
snap = cb.snapshot_skills(reason="pre-curator-run")
cj = _reload_cron_jobs(home)
jobs = cj.load_jobs()
jobs.append({"id": "new-after-snapshot", "name": "new",
"schedule": "every 15m", "skills": ["brand-new-skill"]})
cj.save_jobs(jobs)
ok, _, _ = cb.rollback(backup_id=snap.name)
assert ok
live = cj.load_jobs()
by_id = {j["id"]: j for j in live}
assert "new-after-snapshot" in by_id
# New job's fields completely preserved
assert by_id["new-after-snapshot"]["skills"] == ["brand-new-skill"]
assert by_id["new-after-snapshot"]["schedule"] == "every 15m"
def test_rollback_with_snapshot_missing_cron_succeeds(backup_env):
"""Older snapshots (created before this feature shipped) have no
cron-jobs.json. Rollback must still restore the skills tree and not
error out."""
cb = backup_env["cb"]
home = backup_env["home"]
_write_skill(backup_env["skills"], "alpha")
# No cron/jobs.json at snapshot time — simulates a pre-feature snapshot
snap = cb.snapshot_skills(reason="test")
assert snap is not None
assert not (snap / cb.CRON_JOBS_FILENAME).exists()
# Later the user created a cron job
_write_cron_jobs(home, [
{"id": "later-job", "name": "l", "schedule": "every 1h", "skills": ["x"]},
])
ok, msg, _ = cb.rollback(backup_id=snap.name)
# Main rollback still succeeds; cron report notes the missing file.
assert ok, msg
# Jobs.json untouched (nothing to restore from)
cj = _reload_cron_jobs(home)
jobs = cj.load_jobs()
assert jobs[0]["id"] == "later-job"
assert jobs[0]["skills"] == ["x"]
def test_restore_cron_skill_links_standalone(backup_env):
"""Unit-level test on _restore_cron_skill_links without the full rollback.
Verifies the report structure carefully."""
cb = backup_env["cb"]
home = backup_env["home"]
# Prime a snapshot dir manually with cron-jobs.json
backups_dir = home / "skills" / ".curator_backups" / "fake-id"
backups_dir.mkdir(parents=True)
(backups_dir / cb.CRON_JOBS_FILENAME).write_text(json.dumps([
{"id": "job-1", "name": "one", "skills": ["narrow-a", "narrow-b"]},
{"id": "job-2", "name": "two", "skill": "legacy-single"},
{"id": "job-gone", "name": "deleted", "skills": ["whatever"]},
]), encoding="utf-8")
# Live jobs: job-1 got rewritten, job-2 unchanged, job-gone deleted
_write_cron_jobs(home, [
{"id": "job-1", "name": "one", "skills": ["umbrella"], "schedule": "every 1h"},
{"id": "job-2", "name": "two", "skill": "legacy-single", "schedule": "every 1h"},
{"id": "job-new", "name": "new", "skills": ["x"], "schedule": "every 1h"},
])
_reload_cron_jobs(home)
report = cb._restore_cron_skill_links(backups_dir)
assert report["attempted"] is True
assert report["error"] is None
assert report["unchanged"] == 1 # job-2 matched
assert len(report["restored"]) == 1 # job-1 got restored
assert report["restored"][0]["job_id"] == "job-1"
assert report["restored"][0]["to"]["skills"] == ["narrow-a", "narrow-b"]
assert len(report["skipped_missing"]) == 1
assert report["skipped_missing"][0]["job_id"] == "job-gone"

View file

@ -548,3 +548,266 @@ def test_reconcile_model_block_visible_in_full_report(curator_env):
md = (run_dir / "REPORT.md").read_text()
assert "duplicate content, now a subsection" in md
assert "pre-curator junk" in md
# ---------------------------------------------------------------------------
# _extract_absorbed_into_declarations — authoritative signal from delete calls
# ---------------------------------------------------------------------------
def test_extract_absorbed_into_picks_up_consolidation(curator_env):
"""Delete call with absorbed_into=<umbrella> yields a declaration."""
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "delete",
"name": "narrow-skill",
"absorbed_into": "umbrella",
}),
},
])
assert declarations == {
"narrow-skill": {"into": "umbrella", "declared": True},
}
def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env):
"""absorbed_into='' is recorded as an explicit prune declaration."""
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "delete",
"name": "stale",
"absorbed_into": "",
}),
},
])
assert declarations == {"stale": {"into": "", "declared": True}}
def test_extract_absorbed_into_missing_arg_ignored(curator_env):
"""Delete call without absorbed_into is skipped — fallback to heuristic."""
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "delete",
"name": "legacy-skill",
}),
},
])
assert declarations == {}
def test_extract_absorbed_into_ignores_non_delete_actions(curator_env):
"""Patch, create, write_file etc. must not leak into declarations."""
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "patch",
"name": "umbrella",
"old_string": "...",
"new_string": "...",
"absorbed_into": "something", # bogus on non-delete, must be ignored
}),
},
])
assert declarations == {}
def test_extract_absorbed_into_accepts_dict_arguments(curator_env):
"""arguments can arrive as a dict (defensive path) — still works."""
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": {
"action": "delete",
"name": "narrow",
"absorbed_into": "umbrella",
},
},
])
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
def test_extract_absorbed_into_strips_whitespace(curator_env):
declarations = curator_env._extract_absorbed_into_declarations([
{
"name": "skill_manage",
"arguments": json.dumps({
"action": "delete",
"name": " narrow ",
"absorbed_into": " umbrella ",
}),
},
])
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env):
declarations = curator_env._extract_absorbed_into_declarations([
{"name": "terminal", "arguments": json.dumps({"command": "ls"})},
{"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})},
])
assert declarations == {}
def test_extract_absorbed_into_handles_malformed_arguments(curator_env):
"""Garbage JSON in arguments must not crash the extractor."""
declarations = curator_env._extract_absorbed_into_declarations([
{"name": "skill_manage", "arguments": "{not json"},
{"name": "skill_manage", "arguments": None},
{"name": "skill_manage"}, # no arguments key at all
])
assert declarations == {}
# ---------------------------------------------------------------------------
# _reconcile_classification with absorbed_into declarations (authoritative)
# ---------------------------------------------------------------------------
def test_reconcile_absorbed_into_beats_everything_else(curator_env):
"""Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins.
This is the exact #18671 regression: the model forgets to emit the YAML
summary block, the heuristic's substring match misses because the
umbrella's patch content doesn't literally contain the old skill's
slug. Previously this fell through to 'no-evidence fallback' prune,
which dropped the cron ref instead of rewriting. With absorbed_into
declared, the model tells us directly.
"""
out = curator_env._reconcile_classification(
removed=["pr-review-format"],
heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]},
model_block={"consolidations": [], "prunings": []}, # model forgot YAML block
destinations={"hermes-agent-dev"},
absorbed_declarations={
"pr-review-format": {"into": "hermes-agent-dev", "declared": True},
},
)
assert len(out["consolidated"]) == 1
assert out["pruned"] == []
e = out["consolidated"][0]
assert e["name"] == "pr-review-format"
assert e["into"] == "hermes-agent-dev"
assert "absorbed_into" in e["source"]
def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env):
"""absorbed_into='' takes precedence and routes to pruned, not fallback."""
out = curator_env._reconcile_classification(
removed=["stale"],
heuristic={"consolidated": [], "pruned": [{"name": "stale"}]},
model_block={"consolidations": [], "prunings": []},
destinations=set(),
absorbed_declarations={
"stale": {"into": "", "declared": True},
},
)
assert out["consolidated"] == []
assert len(out["pruned"]) == 1
assert "model-declared prune" in out["pruned"][0]["source"]
def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env):
"""If the declared umbrella doesn't exist in destinations, fall through to
heuristic/YAML logic. Shouldn't happen in practice (the tool validates at
delete time) but the reconciler is defensive."""
out = curator_env._reconcile_classification(
removed=["thing"],
heuristic={
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
"pruned": [],
},
model_block={"consolidations": [], "prunings": []},
destinations={"real-umbrella"},
absorbed_declarations={
"thing": {"into": "ghost-umbrella", "declared": True},
},
)
assert len(out["consolidated"]) == 1
assert out["consolidated"][0]["into"] == "real-umbrella"
assert "tool-call audit" in out["consolidated"][0]["source"]
def test_reconcile_declaration_preserves_yaml_reason(curator_env):
"""When the model both declared absorbed_into AND emitted YAML with reason,
the reason carries through so REPORT.md still has it."""
out = curator_env._reconcile_classification(
removed=["narrow"],
heuristic={"consolidated": [], "pruned": []},
model_block={
"consolidations": [{
"from": "narrow",
"into": "umbrella",
"reason": "duplicate of umbrella's main content",
}],
"prunings": [],
},
destinations={"umbrella"},
absorbed_declarations={
"narrow": {"into": "umbrella", "declared": True},
},
)
assert len(out["consolidated"]) == 1
e = out["consolidated"][0]
assert e["into"] == "umbrella"
assert "absorbed_into" in e["source"]
assert e["reason"] == "duplicate of umbrella's main content"
def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env):
"""Backward compat: no absorbed_declarations arg → all existing logic intact."""
out = curator_env._reconcile_classification(
removed=["thing"],
heuristic={
"consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}],
"pruned": [],
},
model_block={"consolidations": [], "prunings": []},
destinations={"umbrella"},
# no absorbed_declarations — defaults to None → behaves identically to pre-change
)
assert len(out["consolidated"]) == 1
assert out["consolidated"][0]["into"] == "umbrella"
def test_reconcile_mixed_declarations_and_legacy_calls(curator_env):
"""Real-world run: some deletes declared absorbed_into, some didn't.
Declared ones use the authoritative path; others fall through to YAML/heuristic.
"""
out = curator_env._reconcile_classification(
removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"],
heuristic={
"consolidated": [
{"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."},
],
"pruned": [{"name": "legacy-prune"}],
},
model_block={"consolidations": [], "prunings": []},
destinations={"umbrella-a", "umbrella-b"},
absorbed_declarations={
"declared-cons": {"into": "umbrella-b", "declared": True},
"declared-prune": {"into": "", "declared": True},
},
)
cons_by_name = {e["name"]: e for e in out["consolidated"]}
pruned_by_name = {e["name"]: e for e in out["pruned"]}
assert "declared-cons" in cons_by_name
assert cons_by_name["declared-cons"]["into"] == "umbrella-b"
assert "absorbed_into" in cons_by_name["declared-cons"]["source"]
assert "legacy-cons" in cons_by_name
assert cons_by_name["legacy-cons"]["into"] == "umbrella-a"
assert "tool-call audit" in cons_by_name["legacy-cons"]["source"]
assert "declared-prune" in pruned_by_name
assert "model-declared prune" in pruned_by_name["declared-prune"]["source"]
assert "legacy-prune" in pruned_by_name
assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"]

View file

@ -0,0 +1,284 @@
"""Tests for OpenRouter response caching header injection."""
from types import SimpleNamespace
from unittest.mock import patch
import pytest
# ---------------------------------------------------------------------------
# build_or_headers
# ---------------------------------------------------------------------------
class TestBuildOrHeaders:
"""Test the build_or_headers() helper in agent/auxiliary_client.py."""
def test_base_attribution_always_present(self):
"""Attribution headers must always be included regardless of cache setting."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": False})
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert headers["X-OpenRouter-Title"] == "Hermes Agent"
assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent"
def test_cache_enabled(self):
"""When response_cache is True, X-OpenRouter-Cache header is set."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True})
assert headers["X-OpenRouter-Cache"] == "true"
def test_cache_disabled(self):
"""When response_cache is False, no cache header is sent."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": False})
assert "X-OpenRouter-Cache" not in headers
assert "X-OpenRouter-Cache-TTL" not in headers
def test_cache_disabled_by_default_empty_config(self):
"""Empty config dict means no cache headers (response_cache defaults to False)."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={})
assert "X-OpenRouter-Cache" not in headers
def test_ttl_default(self):
"""Default TTL (300) is included when cache is enabled."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300})
assert headers["X-OpenRouter-Cache-TTL"] == "300"
def test_ttl_custom(self):
"""Custom TTL values within range are sent."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600})
assert headers["X-OpenRouter-Cache-TTL"] == "3600"
def test_ttl_max(self):
"""Maximum TTL (86400) is accepted."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400})
assert headers["X-OpenRouter-Cache-TTL"] == "86400"
def test_ttl_out_of_range_too_high(self):
"""TTL above 86400 is silently ignored (no TTL header sent)."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000})
assert "X-OpenRouter-Cache-TTL" not in headers
# But cache is still enabled
assert headers["X-OpenRouter-Cache"] == "true"
def test_ttl_out_of_range_zero(self):
"""TTL of 0 is below minimum — no TTL header sent."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0})
assert "X-OpenRouter-Cache-TTL" not in headers
def test_ttl_negative(self):
"""Negative TTL is ignored."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5})
assert "X-OpenRouter-Cache-TTL" not in headers
def test_ttl_not_a_number(self):
"""Non-numeric TTL is ignored."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"})
assert "X-OpenRouter-Cache-TTL" not in headers
def test_ttl_float_truncated(self):
"""Float TTL values are truncated to int."""
from agent.auxiliary_client import build_or_headers
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7})
assert headers["X-OpenRouter-Cache-TTL"] == "600"
def test_returns_fresh_dict(self):
"""Each call returns a new dict so mutations don't leak."""
from agent.auxiliary_client import build_or_headers
cfg = {"response_cache": True}
h1 = build_or_headers(or_config=cfg)
h2 = build_or_headers(or_config=cfg)
assert h1 is not h2
assert h1 == h2
def test_none_config_falls_back_to_load_config(self):
"""When or_config is None, build_or_headers reads from load_config()."""
from agent.auxiliary_client import build_or_headers
fake_cfg = {
"openrouter": {"response_cache": True, "response_cache_ttl": 900},
}
with patch("hermes_cli.config.load_config", return_value=fake_cfg):
headers = build_or_headers(or_config=None)
assert headers["X-OpenRouter-Cache"] == "true"
assert headers["X-OpenRouter-Cache-TTL"] == "900"
def test_none_config_load_config_fails_gracefully(self):
"""When load_config() fails, build_or_headers still returns base headers."""
from agent.auxiliary_client import build_or_headers
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
headers = build_or_headers(or_config=None)
# Should have base attribution but no cache headers
assert "HTTP-Referer" in headers
assert "X-OpenRouter-Cache" not in headers
# ---------------------------------------------------------------------------
# Environment variable overrides
# ---------------------------------------------------------------------------
class TestEnvVarOverrides:
"""Test env var precedence over config.yaml for response caching."""
def test_env_enables_cache(self, monkeypatch):
"""HERMES_OPENROUTER_CACHE=true enables cache even when config disables it."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
headers = build_or_headers(or_config={"response_cache": False})
assert headers["X-OpenRouter-Cache"] == "true"
def test_env_disables_cache(self, monkeypatch):
"""HERMES_OPENROUTER_CACHE=false disables cache even when config enables it."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false")
headers = build_or_headers(or_config={"response_cache": True})
assert "X-OpenRouter-Cache" not in headers
@pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"])
def test_truthy_values(self, monkeypatch, value):
"""Various truthy strings enable caching."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
headers = build_or_headers(or_config={})
assert headers["X-OpenRouter-Cache"] == "true"
@pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""])
def test_non_truthy_values(self, monkeypatch, value):
"""Non-truthy strings do not enable caching (empty falls through to config)."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
# Empty string falls through to config; others are explicitly non-truthy
if value == "":
# Empty env var falls through to config default (False)
headers = build_or_headers(or_config={"response_cache": False})
else:
headers = build_or_headers(or_config={"response_cache": True})
assert "X-OpenRouter-Cache" not in headers
def test_env_ttl_overrides_config(self, monkeypatch):
"""HERMES_OPENROUTER_CACHE_TTL overrides config TTL."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800")
headers = build_or_headers(or_config={"response_cache_ttl": 300})
assert headers["X-OpenRouter-Cache-TTL"] == "1800"
@pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"])
def test_invalid_env_ttl_dropped(self, monkeypatch, ttl):
"""Invalid TTL env values are ignored; cache still enabled without TTL."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1")
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
headers = build_or_headers(or_config={})
assert headers["X-OpenRouter-Cache"] == "true"
assert "X-OpenRouter-Cache-TTL" not in headers
@pytest.mark.parametrize("ttl", ["1", "300", "86400"])
def test_valid_env_ttl_boundaries(self, monkeypatch, ttl):
"""Boundary TTL values (1, 300, 86400) are accepted."""
from agent.auxiliary_client import build_or_headers
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes")
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl
def test_no_env_vars_falls_through_to_config(self, monkeypatch):
"""Without env vars, config.yaml controls behavior."""
from agent.auxiliary_client import build_or_headers
monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False)
monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False)
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600})
assert headers["X-OpenRouter-Cache"] == "true"
assert headers["X-OpenRouter-Cache-TTL"] == "600"
class TestDefaultConfig:
"""Verify the openrouter config section is in DEFAULT_CONFIG."""
def test_openrouter_section_exists(self):
from hermes_cli.config import DEFAULT_CONFIG
assert "openrouter" in DEFAULT_CONFIG
or_cfg = DEFAULT_CONFIG["openrouter"]
assert or_cfg["response_cache"] is True
assert or_cfg["response_cache_ttl"] == 300
# ---------------------------------------------------------------------------
# _check_openrouter_cache_status
# ---------------------------------------------------------------------------
class TestCheckOpenrouterCacheStatus:
"""Test the _check_openrouter_cache_status method on AIAgent."""
def _make_agent(self):
"""Create a minimal AIAgent-like object with just the method under test."""
from run_agent import AIAgent
# Use object.__new__ to skip __init__, then set the attributes we need
agent = object.__new__(AIAgent)
agent._or_cache_hits = 0
return agent
def test_hit_increments_counter(self):
agent = self._make_agent()
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"})
agent._check_openrouter_cache_status(resp)
assert agent._or_cache_hits == 1
# Second hit increments
agent._check_openrouter_cache_status(resp)
assert agent._or_cache_hits == 2
def test_miss_does_not_increment(self):
agent = self._make_agent()
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"})
agent._check_openrouter_cache_status(resp)
assert getattr(agent, "_or_cache_hits", 0) == 0
def test_no_header_is_noop(self):
agent = self._make_agent()
resp = SimpleNamespace(headers={})
agent._check_openrouter_cache_status(resp)
assert getattr(agent, "_or_cache_hits", 0) == 0
def test_none_response_is_safe(self):
agent = self._make_agent()
agent._check_openrouter_cache_status(None) # no crash
def test_no_headers_attr_is_safe(self):
agent = self._make_agent()
agent._check_openrouter_cache_status(object()) # no crash
def test_case_insensitive(self):
agent = self._make_agent()
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"})
agent._check_openrouter_cache_status(resp)
assert agent._or_cache_hits == 1

View file

@ -125,6 +125,58 @@ class TestScanSkillCommands:
assert "/knowledge-brain" in result
assert result["/knowledge-brain"]["name"] == "knowledge-brain"
def test_get_skill_commands_rescans_when_platform_scope_changes(self, tmp_path):
"""Platform-specific disabled-skill caches must not leak across platforms.
Regression test for #14536: a gateway process serving Telegram
and Discord concurrently would seed the process-global cache
with whichever platform scanned first, and subsequent
``get_skill_commands()`` calls from the other platform silently
inherited that filter.
"""
import agent.skill_commands as sc_mod
from agent.skill_commands import get_skill_commands
def _disabled_skills():
platform = os.getenv("HERMES_PLATFORM")
if platform == "telegram":
return {"telegram-only"}
if platform == "discord":
return {"discord-only"}
return set()
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch("tools.skills_tool._get_disabled_skill_names", side_effect=_disabled_skills),
patch.object(sc_mod, "_skill_commands", {}),
patch.object(sc_mod, "_skill_commands_platform", None),
):
_make_skill(tmp_path, "shared")
_make_skill(tmp_path, "telegram-only")
_make_skill(tmp_path, "discord-only")
with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
telegram_commands = dict(get_skill_commands())
assert "/shared" in telegram_commands
assert "/discord-only" in telegram_commands
assert "/telegram-only" not in telegram_commands
with patch.dict(os.environ, {"HERMES_PLATFORM": "discord"}):
discord_commands = dict(get_skill_commands())
assert "/shared" in discord_commands
assert "/telegram-only" in discord_commands
assert "/discord-only" not in discord_commands
# Switching back to telegram must also rescan — not re-serve
# the discord view that was just cached.
with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
telegram_again = dict(get_skill_commands())
assert "/telegram-only" not in telegram_again
assert "/discord-only" in telegram_again
def test_special_chars_stripped_from_cmd_key(self, tmp_path):
"""Skill names with +, /, or other special chars produce clean cmd keys."""

View file

@ -46,6 +46,29 @@ class TestResolveOrigin:
job = {"origin": {}}
assert _resolve_origin(job) is None
@pytest.mark.parametrize(
"non_dict_origin",
[
"combined-digest-replaces-x-and-y-20260503",
123,
["telegram", "12345"],
("platform", "chat_id"),
42.0,
],
)
def test_non_dict_origin_returns_none_instead_of_crashing(self, non_dict_origin):
"""Non-dict origins (provenance strings from hand-edited or migrated
jobs.json) must be treated as missing instead of crashing the
scheduler tick on ``origin.get('platform')`` with
``'str' object has no attribute 'get'`` (#18722).
Before this guard a job in this state crashed every fire attempt
forever; ``mark_job_run`` recorded the error but the next tick
re-loaded the poisoned origin and crashed identically.
"""
job = {"origin": non_dict_origin}
assert _resolve_origin(job) is None
class TestResolveDeliveryTarget:
def test_origin_delivery_preserves_thread_id(self):
@ -118,6 +141,16 @@ class TestResolveDeliveryTarget:
"thread_id": None,
}
def test_bare_platform_delivery_preserves_home_thread_id(self, monkeypatch):
monkeypatch.setenv("DISCORD_HOME_CHANNEL", "parent-42")
monkeypatch.setenv("DISCORD_HOME_CHANNEL_THREAD_ID", "topic-7")
assert _resolve_delivery_target({"deliver": "discord"}) == {
"platform": "discord",
"chat_id": "parent-42",
"thread_id": "topic-7",
}
def test_explicit_telegram_topic_target_with_thread_id(self):
"""deliver: 'telegram:chat_id:thread_id' parses correctly."""
job = {

View file

@ -12,6 +12,7 @@ class RestartTestAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
self.sent: list[str] = []
self.sent_calls: list[tuple[str, str, object]] = []
async def connect(self):
return True
@ -21,6 +22,7 @@ class RestartTestAdapter(BasePlatformAdapter):
async def send(self, chat_id, content, reply_to=None, metadata=None):
self.sent.append(content)
self.sent_calls.append((chat_id, content, metadata))
return SendResult(success=True, message_id="1")
async def send_typing(self, chat_id, metadata=None):
@ -30,12 +32,17 @@ class RestartTestAdapter(BasePlatformAdapter):
return {"id": chat_id}
def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> SessionSource:
def make_restart_source(
chat_id: str = "123456",
chat_type: str = "dm",
thread_id: str | None = None,
) -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
chat_id=chat_id,
chat_type=chat_type,
user_id="u1",
thread_id=thread_id,
)
@ -81,6 +88,15 @@ def make_restart_runner(
runner._handle_restart_command = GatewayRunner._handle_restart_command.__get__(
runner, GatewayRunner
)
runner._handle_set_home_command = GatewayRunner._handle_set_home_command.__get__(
runner, GatewayRunner
)
runner._send_restart_notification = GatewayRunner._send_restart_notification.__get__(
runner, GatewayRunner
)
runner._send_home_channel_startup_notifications = (
GatewayRunner._send_home_channel_startup_notifications.__get__(runner, GatewayRunner)
)
runner._status_action_label = GatewayRunner._status_action_label.__get__(
runner, GatewayRunner
)

View file

@ -49,9 +49,10 @@ class TestSuspendRecentlyActive:
count = store.suspend_recently_active()
assert count == 1
# Re-fetch — should be suspended now
# Re-fetch — should be resume_pending (preserved, not wiped)
refreshed = store.get_or_create_session(source)
assert refreshed.was_auto_reset
assert refreshed.resume_pending
assert refreshed.session_id == entry.session_id # same session preserved
def test_does_not_suspend_old_sessions(self, tmp_path):
store = _make_store(tmp_path)
@ -66,21 +67,22 @@ class TestSuspendRecentlyActive:
count = store.suspend_recently_active(max_age_seconds=120)
assert count == 0
def test_already_suspended_not_double_counted(self, tmp_path):
def test_already_resume_pending_not_double_counted(self, tmp_path):
store = _make_store(tmp_path)
source = _make_source()
entry = store.get_or_create_session(source)
# Suspend once
# Mark resume_pending once
count1 = store.suspend_recently_active()
assert count1 == 1
# Create a new session (the old one got reset on next access)
# Re-fetch returns the SAME session (preserved, not reset)
entry2 = store.get_or_create_session(source)
assert entry2.session_id == entry.session_id
# Suspend again — the new session is recent but not yet suspended
# Second call skips already-resume_pending entries
count2 = store.suspend_recently_active()
assert count2 == 1
assert count2 == 0
# ---------------------------------------------------------------------------
@ -180,11 +182,11 @@ class TestCleanShutdownMarker:
else:
store.suspend_recently_active()
# Session SHOULD be suspended (crash recovery)
# Session SHOULD be resume_pending (crash recovery preserves history)
with store._lock:
store._ensure_loaded_locked()
suspended_count = sum(1 for e in store._entries.values() if e.suspended)
assert suspended_count == 1, "Session should be suspended after crash (no marker)"
resume_count = sum(1 for e in store._entries.values() if e.resume_pending)
assert resume_count == 1, "Session should be resume_pending after crash (no marker)"
def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
"""stop(restart=True) should also write the marker."""

View file

@ -0,0 +1,166 @@
"""Regression tests for the config.yaml → env var bridge in gateway/run.py.
Guards against the 60-vs-500 bug where a stale `.env HERMES_MAX_ITERATIONS=60`
entry silently shadowed `agent.max_turns: 500` in config.yaml because the
bridge used `if X not in os.environ` guards. After PR#18413 the bridge
treats config.yaml as authoritative and unconditionally overwrites .env
values for `agent.*`, `display.*`, `timezone`, and `security.*` keys.
"""
from __future__ import annotations
import os
import subprocess
import sys
import textwrap
from pathlib import Path
import pytest
PROJECT_ROOT = Path(__file__).resolve().parents[2]
def _run_gateway_import(hermes_home: Path, initial_env: dict[str, str]) -> dict[str, str]:
"""Import gateway.run in a clean subprocess and return the post-import env.
The bridge runs at module-import time, so simply importing is enough
to exercise it. Running in a subprocess isolates the test from other
import side effects and makes the "what ends up in os.environ" check
deterministic.
"""
script = textwrap.dedent(
f"""
import os, sys
sys.path.insert(0, {str(PROJECT_ROOT)!r})
try:
from gateway import run # noqa: F401 — module import triggers bridge
except Exception as exc:
print(f"IMPORT_ERROR:{{type(exc).__name__}}:{{exc}}", file=sys.stderr)
sys.exit(2)
for k in (
"HERMES_MAX_ITERATIONS",
"HERMES_AGENT_TIMEOUT",
"HERMES_AGENT_TIMEOUT_WARNING",
"HERMES_GATEWAY_BUSY_INPUT_MODE",
"HERMES_TIMEZONE",
):
v = os.environ.get(k)
if v is not None:
print(f"{{k}}={{v}}")
"""
)
env = dict(initial_env)
env["HERMES_HOME"] = str(hermes_home)
# Keep PATH / PYTHONPATH so venv imports resolve.
for k in ("PATH", "PYTHONPATH", "VIRTUAL_ENV", "HOME"):
if k in os.environ and k not in env:
env[k] = os.environ[k]
result = subprocess.run(
[sys.executable, "-c", script],
env=env,
capture_output=True,
text=True,
timeout=60,
)
if result.returncode != 0:
pytest.fail(
f"gateway.run import failed (rc={result.returncode})\n"
f"stderr:\n{result.stderr}\nstdout:\n{result.stdout}"
)
out: dict[str, str] = {}
for line in result.stdout.splitlines():
if "=" in line:
k, v = line.split("=", 1)
out[k] = v
return out
def _write_config(home: Path, agent_cfg: dict | None = None, display_cfg: dict | None = None,
timezone: str | None = None) -> None:
import yaml
cfg: dict = {}
if agent_cfg:
cfg["agent"] = agent_cfg
if display_cfg:
cfg["display"] = display_cfg
if timezone:
cfg["timezone"] = timezone
(home / "config.yaml").write_text(yaml.safe_dump(cfg))
def _write_env(home: Path, entries: dict[str, str]) -> None:
lines = [f"{k}={v}\n" for k, v in entries.items()]
(home / ".env").write_text("".join(lines))
@pytest.fixture
def hermes_home(tmp_path: Path) -> Path:
home = tmp_path / ".hermes"
home.mkdir()
return home
def test_config_max_turns_wins_over_stale_env(hermes_home: Path) -> None:
"""Regression: config.yaml:agent.max_turns=500 must beat .env=60."""
_write_config(hermes_home, agent_cfg={"max_turns": 500})
_write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "60"})
env = _run_gateway_import(hermes_home, initial_env={})
assert env.get("HERMES_MAX_ITERATIONS") == "500", (
f"expected config.yaml max_turns=500 to win; got {env.get('HERMES_MAX_ITERATIONS')!r}. "
"Stale .env value is shadowing config — the bridge lost its override."
)
def test_config_gateway_timeout_wins_over_stale_env(hermes_home: Path) -> None:
"""Every agent.* bridge key must be config-authoritative, not .env-authoritative."""
_write_config(hermes_home, agent_cfg={
"gateway_timeout": 1800,
"gateway_timeout_warning": 900,
})
_write_env(hermes_home, {
"HERMES_AGENT_TIMEOUT": "60",
"HERMES_AGENT_TIMEOUT_WARNING": "30",
})
env = _run_gateway_import(hermes_home, initial_env={})
assert env.get("HERMES_AGENT_TIMEOUT") == "1800"
assert env.get("HERMES_AGENT_TIMEOUT_WARNING") == "900"
def test_config_display_busy_input_mode_wins_over_stale_env(hermes_home: Path) -> None:
_write_config(hermes_home, display_cfg={"busy_input_mode": "interrupt"})
_write_env(hermes_home, {"HERMES_GATEWAY_BUSY_INPUT_MODE": "queue"})
env = _run_gateway_import(hermes_home, initial_env={})
assert env.get("HERMES_GATEWAY_BUSY_INPUT_MODE") == "interrupt"
def test_config_timezone_wins_over_stale_env(hermes_home: Path) -> None:
_write_config(hermes_home, timezone="America/Los_Angeles")
_write_env(hermes_home, {"HERMES_TIMEZONE": "UTC"})
env = _run_gateway_import(hermes_home, initial_env={})
assert env.get("HERMES_TIMEZONE") == "America/Los_Angeles"
def test_env_value_survives_when_config_omits_key(hermes_home: Path) -> None:
"""If config.yaml doesn't set max_turns, .env value must still pass through.
The bridge only overwrites when the config key is present an absent
config key should NOT clobber the .env value.
"""
_write_config(hermes_home, agent_cfg={}) # no max_turns
_write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "123"})
env = _run_gateway_import(hermes_home, initial_env={})
assert env.get("HERMES_MAX_ITERATIONS") == "123"

View file

@ -0,0 +1,230 @@
"""Security regression tests: Discord component views honor role allowlists.
The four interactive component views (ExecApprovalView, SlashConfirmView,
UpdatePromptView, ModelPickerView) historically accepted only
``allowed_user_ids``. Deployments that configure DISCORD_ALLOWED_ROLES
without DISCORD_ALLOWED_USERS therefore had a wide-open component
surface: any guild member who could see the prompt could approve exec
commands, cancel slash confirmations, or switch the model -- even when
the same user would be rejected at the slash and on_message gates.
These tests pin the user-or-role OR semantics and the fail-closed
behavior on missing role data so the parity cannot regress.
"""
from types import SimpleNamespace
import pytest
# Trigger the shared discord mock from tests/gateway/conftest.py before
# importing the production module.
from gateway.platforms.discord import ( # noqa: E402
ExecApprovalView,
ModelPickerView,
SlashConfirmView,
UpdatePromptView,
_component_check_auth,
)
# ---------------------------------------------------------------------------
# Direct helper coverage -- the four views all delegate to this helper, so
# pinning the helper's contract pins all four call sites.
# ---------------------------------------------------------------------------
def _interaction(user_id, role_ids=None, *, drop_user=False, drop_roles=False):
"""Build a mock interaction with the requested user/role shape.
drop_user simulates a payload whose .user attribute is None.
drop_roles simulates a payload where .user has no .roles attribute
at all (DM-context Member, raw User payload).
"""
if drop_user:
return SimpleNamespace(user=None)
user_kwargs = {"id": user_id}
if not drop_roles:
user_kwargs["roles"] = [SimpleNamespace(id=r) for r in (role_ids or [])]
return SimpleNamespace(user=SimpleNamespace(**user_kwargs))
# ── back-compat: empty allowlists -> allow everyone ────────────────────────
def test_component_check_empty_allowlists_allows_everyone():
"""SECURITY-CRITICAL backwards-compat: deployments without any
DISCORD_ALLOWED_* env vars set must continue to allow component
interactions from anyone (no regression for unconfigured setups)."""
interaction = _interaction(11111)
assert _component_check_auth(interaction, set(), set()) is True
assert _component_check_auth(interaction, None, None) is True
# ── user allowlist ─────────────────────────────────────────────────────────
def test_component_check_user_in_user_allowlist_passes():
interaction = _interaction(11111)
assert _component_check_auth(interaction, {"11111"}, set()) is True
def test_component_check_user_not_in_user_allowlist_rejected():
interaction = _interaction(99999)
assert _component_check_auth(interaction, {"11111"}, set()) is False
# ── role allowlist OR semantics ────────────────────────────────────────────
def test_component_check_role_only_user_with_matching_role_passes():
"""Role-only deployment (DISCORD_ALLOWED_ROLES set, DISCORD_ALLOWED_USERS
empty) where the user is not in the empty user list but DOES carry a
matching role: must pass. This is the regression that prompted the
fix -- previously _check_auth allowed everyone when the user set was
empty, ignoring the role allowlist."""
interaction = _interaction(99999, role_ids=[42])
assert _component_check_auth(interaction, set(), {42}) is True
def test_component_check_role_only_user_without_matching_role_rejected():
"""Role-only deployment where the user has no matching role: reject.
Previously this allowed everyone because allowed_user_ids was empty."""
interaction = _interaction(99999, role_ids=[7, 8])
assert _component_check_auth(interaction, set(), {42}) is False
def test_component_check_user_or_role_user_match():
"""Both allowlists set; user matches user allowlist: pass."""
interaction = _interaction(11111, role_ids=[7])
assert _component_check_auth(interaction, {"11111"}, {42}) is True
def test_component_check_user_or_role_role_match():
"""Both allowlists set; user not in user list but in role list: pass."""
interaction = _interaction(99999, role_ids=[42])
assert _component_check_auth(interaction, {"11111"}, {42}) is True
def test_component_check_user_or_role_neither_match():
"""Both allowlists set; user matches neither: reject."""
interaction = _interaction(99999, role_ids=[7])
assert _component_check_auth(interaction, {"11111"}, {42}) is False
# ── fail-closed on missing role data ───────────────────────────────────────
def test_component_check_role_policy_with_no_roles_attr_rejects():
"""Role allowlist configured but interaction.user has no .roles
attribute (DM-context Member, raw User payload): must reject. A user
without resolvable roles cannot satisfy a role allowlist."""
interaction = _interaction(11111, drop_roles=True)
assert _component_check_auth(interaction, set(), {42}) is False
def test_component_check_missing_user_with_allowlist_rejects():
"""interaction.user is None with any allowlist configured: fail
closed without raising AttributeError."""
interaction = _interaction(0, drop_user=True)
assert _component_check_auth(interaction, {"11111"}, set()) is False
assert _component_check_auth(interaction, set(), {42}) is False
# ---------------------------------------------------------------------------
# View construction: every view must accept allowed_role_ids and route
# through the shared helper. Default value preserves prior call-sites.
# ---------------------------------------------------------------------------
def test_exec_approval_view_accepts_role_allowlist():
view = ExecApprovalView(
session_key="sess-1",
allowed_user_ids={"11111"},
allowed_role_ids={42},
)
# Role-only user passes
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
# Neither user nor role match: reject
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
def test_exec_approval_view_role_default_is_empty_set():
"""Existing call sites that pass only allowed_user_ids must continue
working with the legacy semantics (no role gate)."""
view = ExecApprovalView(session_key="sess-1", allowed_user_ids={"11111"})
assert view.allowed_role_ids == set()
assert view._check_auth(_interaction(11111)) is True
assert view._check_auth(_interaction(99999)) is False
def test_slash_confirm_view_accepts_role_allowlist():
view = SlashConfirmView(
session_key="sess-1",
confirm_id="c1",
allowed_user_ids=set(),
allowed_role_ids={42},
)
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
def test_update_prompt_view_accepts_role_allowlist():
view = UpdatePromptView(
session_key="sess-1",
allowed_user_ids=set(),
allowed_role_ids={42},
)
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
def test_model_picker_view_accepts_role_allowlist():
async def _noop(*_a, **_k):
return ""
view = ModelPickerView(
providers=[],
current_model="m",
current_provider="p",
session_key="sess-1",
on_model_selected=_noop,
allowed_user_ids=set(),
allowed_role_ids={42},
)
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
# ---------------------------------------------------------------------------
# Empty allowlists across views: legacy "allow everyone" must hold.
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"view_factory",
[
lambda: ExecApprovalView(session_key="s", allowed_user_ids=set()),
lambda: SlashConfirmView(session_key="s", confirm_id="c", allowed_user_ids=set()),
lambda: UpdatePromptView(session_key="s", allowed_user_ids=set()),
],
)
def test_views_empty_allowlists_allow_everyone(view_factory):
view = view_factory()
assert view._check_auth(_interaction(99999)) is True
def test_model_picker_view_empty_allowlists_allow_everyone():
async def _noop(*_a, **_k):
return ""
view = ModelPickerView(
providers=[],
current_model="m",
current_provider="p",
session_key="s",
on_model_selected=_noop,
allowed_user_ids=set(),
)
assert view.allowed_role_ids == set()
assert view._check_auth(_interaction(99999)) is True

View file

@ -172,6 +172,69 @@ async def test_connect_only_requests_members_intent_when_needed(monkeypatch, all
await adapter.disconnect()
@pytest.mark.asyncio
async def test_reconnect_closes_previous_client_to_prevent_zombie_websocket(monkeypatch):
"""Regression for #18187: calling connect() twice without disconnect() in
between (e.g. during an in-process reconnect attempt) must close the old
commands.Bot before creating a new one. Without this guard, two websockets
stay alive and both fire on_message, producing double responses with
different wording.
"""
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
intents = SimpleNamespace(
message_content=False, dm_messages=False, guild_messages=False,
members=False, voice_states=False,
)
monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
class TrackedBot(FakeBot):
"""FakeBot that records close() calls and reports open/closed state."""
_closed = False
def is_closed(self):
return self._closed
async def close(self):
self._closed = True
created: list[TrackedBot] = []
def fake_bot_factory(*, command_prefix, intents, proxy=None, allowed_mentions=None, **_):
bot = TrackedBot(intents=intents, allowed_mentions=allowed_mentions)
created.append(bot)
return bot
monkeypatch.setattr(discord_platform.commands, "Bot", fake_bot_factory)
monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
# First connect — fresh adapter, no prior client.
assert await adapter.connect() is True
assert len(created) == 1
first_bot = created[0]
assert first_bot._closed is False, "first bot should still be open after connect()"
# Second connect WITHOUT disconnect — simulates an in-process reconnect.
# Without the fix, first_bot would remain open (zombie), and both would
# receive every Discord event, causing double responses.
assert await adapter.connect() is True
assert len(created) == 2
second_bot = created[1]
# The first bot must be closed before the second is assigned.
assert first_bot._closed is True, (
"First Discord client must be closed on re-entry of connect() to prevent "
"zombie websocket (#18187)"
)
assert second_bot._closed is False, "second bot should still be open"
assert adapter._client is second_bot
await adapter.disconnect()
@pytest.mark.asyncio
async def test_connect_releases_token_lock_on_timeout(monkeypatch):
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))

View file

@ -0,0 +1,737 @@
"""Security regression tests: slash commands honor on_message authorization gates.
Slash invocations (``_run_simple_slash``, ``_handle_thread_create_slash``)
historically bypassed every gate ``on_message`` enforces DISCORD_ALLOWED_USERS,
DISCORD_ALLOWED_ROLES, DISCORD_ALLOWED_CHANNELS, DISCORD_IGNORED_CHANNELS.
Any guild member could invoke ``/background``, ``/restart``, etc. as the
operator. ``_check_slash_authorization`` mirrors all four gates one-for-one.
These tests pin the security-correct behavior so the bypass cannot regress.
"""
import asyncio
import logging
import sys
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import PlatformConfig
# ---------------------------------------------------------------------------
# Discord module mock — borrowed from test_discord_slash_commands.py so this
# file runs on machines without discord.py installed.
# ---------------------------------------------------------------------------
def _ensure_discord_mock():
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
return # real discord installed
if sys.modules.get("discord") is None:
discord_mod = MagicMock()
discord_mod.Intents.default.return_value = MagicMock()
discord_mod.DMChannel = type("DMChannel", (), {})
discord_mod.Thread = type("Thread", (), {})
discord_mod.ForumChannel = type("ForumChannel", (), {})
discord_mod.Interaction = object
class _FakePermissions:
def __init__(self, value=0, **_):
self.value = value
discord_mod.Permissions = _FakePermissions
class _FakeGroup:
def __init__(self, *, name, description, parent=None):
self.name = name
self.description = description
self.parent = parent
self._children: dict[str, object] = {}
if parent is not None:
parent.add_command(self)
def add_command(self, cmd):
self._children[cmd.name] = cmd
class _FakeCommand:
def __init__(self, *, name, description, callback, parent=None):
self.name = name
self.description = description
self.callback = callback
self.parent = parent
self.default_permissions = None
discord_mod.app_commands = SimpleNamespace(
describe=lambda **kwargs: (lambda fn: fn),
choices=lambda **kwargs: (lambda fn: fn),
autocomplete=lambda **kwargs: (lambda fn: fn),
Choice=lambda **kwargs: SimpleNamespace(**kwargs),
Group=_FakeGroup,
Command=_FakeCommand,
)
ext_mod = MagicMock()
commands_mod = MagicMock()
commands_mod.Bot = MagicMock
ext_mod.commands = commands_mod
sys.modules["discord"] = discord_mod
sys.modules.setdefault("discord.ext", ext_mod)
sys.modules.setdefault("discord.ext.commands", commands_mod)
_ensure_discord_mock()
from gateway.platforms.discord import DiscordAdapter # noqa: E402
@pytest.fixture(autouse=True)
def _isolate_discord_env(monkeypatch):
for var in (
"DISCORD_ALLOWED_USERS",
"DISCORD_ALLOWED_ROLES",
"DISCORD_ALLOWED_CHANNELS",
"DISCORD_IGNORED_CHANNELS",
"DISCORD_HIDE_SLASH_COMMANDS",
"DISCORD_ALLOW_BOTS",
):
monkeypatch.delenv(var, raising=False)
@pytest.fixture(autouse=True)
def _stub_discord_permissions(monkeypatch):
"""Pin discord.Permissions to a plain stand-in so tests can assert the
bitfield value regardless of whether real discord.py or a sibling test
module's MagicMock is loaded."""
import discord
class _Perm:
def __init__(self, value=0, **_):
self.value = value
monkeypatch.setattr(discord, "Permissions", _Perm)
@pytest.fixture
def adapter():
config = PlatformConfig(enabled=True, token="***")
a = DiscordAdapter(config)
a._client = SimpleNamespace(user=SimpleNamespace(id=99999, name="HermesBot"), guilds=[])
return a
_SENTINEL = object()
def _make_interaction(
user_id, *, channel_id=12345, guild_id=42, in_dm=False, in_thread=False,
parent_channel_id=None, user=_SENTINEL,
):
"""Build a mock Discord Interaction with a still-unresponded response.
``channel_id`` may be set to ``None`` to simulate a guild interaction
payload missing a resolvable channel id (fail-closed exercise).
Pass ``user=None`` to simulate a payload missing the user object.
"""
import discord
response = SimpleNamespace(send_message=AsyncMock(), defer=AsyncMock())
if in_dm:
channel = discord.DMChannel()
elif in_thread:
channel = discord.Thread()
channel.id = channel_id
channel.parent_id = parent_channel_id
elif channel_id is None:
channel = None
else:
channel = SimpleNamespace(id=channel_id)
if user is _SENTINEL:
user_obj = SimpleNamespace(id=int(user_id), name=f"user_{user_id}")
else:
user_obj = user
return SimpleNamespace(
user=user_obj,
guild=SimpleNamespace(owner_id=999),
guild_id=guild_id,
channel_id=channel_id,
channel=channel,
response=response,
)
# ---------------------------------------------------------------------------
# Backwards-compat: empty allowlist → everything passes (matches on_message)
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_no_allowlist_allows_everyone(adapter):
"""SECURITY-CRITICAL backwards-compat: deployments without any allowlist
env vars set must see ZERO behavior change. on_message lets everyone
through in this case (returns True at line 1890); slash must do the same.
"""
interaction = _make_interaction("999999999")
assert await adapter._check_slash_authorization(interaction, "/help") is True
interaction.response.send_message.assert_not_awaited()
@pytest.mark.asyncio
async def test_no_allowlist_dm_also_allowed(adapter):
"""Same for DMs — no allowlist means no restriction, matching on_message."""
interaction = _make_interaction("999999999", in_dm=True)
assert await adapter._check_slash_authorization(interaction, "/help") is True
# ---------------------------------------------------------------------------
# User allowlist (DISCORD_ALLOWED_USERS) parity
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_allowed_user_passes(adapter):
adapter._allowed_user_ids = {"100200300"}
interaction = _make_interaction("100200300")
assert await adapter._check_slash_authorization(interaction, "/background hi") is True
interaction.response.send_message.assert_not_awaited()
@pytest.mark.asyncio
async def test_disallowed_user_rejected_with_ephemeral(adapter, caplog):
adapter._allowed_user_ids = {"100200300"}
interaction = _make_interaction("999999999")
with caplog.at_level(logging.WARNING):
assert await adapter._check_slash_authorization(interaction, "/background hi") is False
interaction.response.send_message.assert_awaited_once()
args, kwargs = interaction.response.send_message.call_args
assert kwargs.get("ephemeral") is True
assert "not authorized" in (args[0] if args else kwargs.get("content", "")).lower()
assert any("Unauthorized slash attempt" in r.message for r in caplog.records)
assert any("DISCORD_ALLOWED_USERS" in r.message for r in caplog.records)
# ---------------------------------------------------------------------------
# Role allowlist (DISCORD_ALLOWED_ROLES) parity
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_role_member_passes(adapter):
"""A user whose Member.roles includes an allowed role passes the gate."""
adapter._allowed_role_ids = {1234}
interaction = _make_interaction("999999999")
interaction.user.roles = [SimpleNamespace(id=1234)]
assert await adapter._check_slash_authorization(interaction, "/help") is True
@pytest.mark.asyncio
async def test_role_non_member_rejected(adapter):
"""A user without any matching role is rejected even if no user allowlist."""
adapter._allowed_role_ids = {1234}
interaction = _make_interaction("999999999")
interaction.user.roles = [SimpleNamespace(id=9999)] # different role
assert await adapter._check_slash_authorization(interaction, "/help") is False
# ---------------------------------------------------------------------------
# Channel allowlist (DISCORD_ALLOWED_CHANNELS) parity — the gate prajer used
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_channel_not_in_allowlist_rejected(adapter, monkeypatch, caplog):
"""on_message blocks messages in channels not in DISCORD_ALLOWED_CHANNELS;
slash must do the same. This is the EXACT bypass prajer exploited.
"""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
interaction = _make_interaction("100200300", channel_id=9999)
with caplog.at_level(logging.WARNING):
assert await adapter._check_slash_authorization(interaction, "/background hi") is False
assert any("DISCORD_ALLOWED_CHANNELS" in r.message for r in caplog.records)
@pytest.mark.asyncio
async def test_channel_in_allowlist_passes(adapter, monkeypatch):
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
interaction = _make_interaction("100200300", channel_id=1111)
assert await adapter._check_slash_authorization(interaction, "/help") is True
@pytest.mark.asyncio
async def test_channel_allowlist_wildcard_passes(adapter, monkeypatch):
"""``*`` in DISCORD_ALLOWED_CHANNELS = allow any channel, matching on_message."""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "*")
interaction = _make_interaction("100200300", channel_id=9999)
assert await adapter._check_slash_authorization(interaction, "/help") is True
@pytest.mark.asyncio
async def test_channel_allowlist_does_not_apply_to_dms(adapter, monkeypatch):
"""DMs aren't channel-gated — they go through on_message's DM lockdown."""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111")
interaction = _make_interaction("100200300", in_dm=True)
assert await adapter._check_slash_authorization(interaction, "/help") is True
# ---------------------------------------------------------------------------
# Channel blocklist (DISCORD_IGNORED_CHANNELS) parity
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_ignored_channel_rejected(adapter, monkeypatch, caplog):
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "9999")
interaction = _make_interaction("100200300", channel_id=9999)
with caplog.at_level(logging.WARNING):
assert await adapter._check_slash_authorization(interaction, "/help") is False
assert any("DISCORD_IGNORED_CHANNELS" in r.message for r in caplog.records)
@pytest.mark.asyncio
async def test_ignored_channel_wildcard_blocks_all(adapter, monkeypatch):
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "*")
interaction = _make_interaction("100200300", channel_id=9999)
assert await adapter._check_slash_authorization(interaction, "/help") is False
# ---------------------------------------------------------------------------
# Cross-platform admin notification
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_unauthorized_attempt_notifies_telegram(adapter):
from gateway.session import Platform
telegram_adapter = SimpleNamespace(send=AsyncMock())
home = SimpleNamespace(chat_id="987654321")
runner = SimpleNamespace(
adapters={Platform.TELEGRAM: telegram_adapter},
config=SimpleNamespace(get_home_channel=lambda p: home if p is Platform.TELEGRAM else None),
)
adapter.gateway_runner = runner
adapter._allowed_user_ids = {"100200300"}
interaction = _make_interaction("999999999")
await adapter._check_slash_authorization(interaction, "/background hi")
# Notify is fire-and-forget — let the scheduled task run.
await asyncio.sleep(0)
await asyncio.sleep(0)
telegram_adapter.send.assert_awaited_once()
chat_id, msg = telegram_adapter.send.call_args.args
assert chat_id == "987654321"
assert "Unauthorized" in msg
assert "999999999" in msg
assert "/background hi" in msg
assert "DISCORD_ALLOWED_USERS" in msg
@pytest.mark.asyncio
async def test_notify_silently_no_ops_without_runner(adapter):
adapter.gateway_runner = None
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason") # must not raise
@pytest.mark.asyncio
async def test_notify_falls_back_to_slack_if_no_telegram(adapter):
from gateway.session import Platform
slack_adapter = SimpleNamespace(send=AsyncMock())
home_slack = SimpleNamespace(chat_id="C12345")
runner = SimpleNamespace(
adapters={Platform.SLACK: slack_adapter},
config=SimpleNamespace(
get_home_channel=lambda p: home_slack if p is Platform.SLACK else None,
),
)
adapter.gateway_runner = runner
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
slack_adapter.send.assert_awaited_once()
# ---------------------------------------------------------------------------
# Opt-in visibility hide
# ---------------------------------------------------------------------------
def test_visibility_hide_off_by_default_is_noop(adapter, monkeypatch):
"""DISCORD_HIDE_SLASH_COMMANDS unset → don't touch any command's permissions."""
cmd = SimpleNamespace(name="x", default_permissions="UNCHANGED")
tree = SimpleNamespace(get_commands=lambda: [cmd])
# Re-run the registration tail logic by calling the bit that decides:
# we don't have a clean way to simulate the env-gated branch from
# _register_slash_commands, so we just confirm the helper itself works
# AND assert the env-gating logic is correct.
assert os.environ.get("DISCORD_HIDE_SLASH_COMMANDS") is None
# Helper should still work when called directly:
adapter._apply_owner_only_visibility(tree)
# When called directly the helper applies — env gating is at the call site,
# which we exercise in an integration-style test below.
def test_visibility_hide_helper_zeroes_perms(adapter):
cmd_a = SimpleNamespace(name="a", default_permissions=None)
cmd_b = SimpleNamespace(name="b", default_permissions=None)
tree = SimpleNamespace(get_commands=lambda: [cmd_a, cmd_b])
adapter._apply_owner_only_visibility(tree)
assert cmd_a.default_permissions is not None
assert cmd_b.default_permissions is not None
assert cmd_a.default_permissions.value == 0
assert cmd_b.default_permissions.value == 0
def test_visibility_hide_tolerates_unsetable_command(adapter, caplog):
class _Frozen:
__slots__ = ("name",)
def __init__(self, name):
self.name = name
cmd_ok = SimpleNamespace(name="ok", default_permissions=None)
cmd_bad = _Frozen("bad")
tree = SimpleNamespace(get_commands=lambda: [cmd_bad, cmd_ok])
with caplog.at_level(logging.DEBUG):
adapter._apply_owner_only_visibility(tree)
assert cmd_ok.default_permissions.value == 0
# os import for test_visibility_hide_off_by_default_is_noop
import os # noqa: E402
# ---------------------------------------------------------------------------
# Fail-closed parity on malformed slash auth context
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_missing_channel_id_rejected_when_channel_policy_configured(
adapter, monkeypatch,
):
"""A guild interaction without a resolvable channel id must fail
closed when DISCORD_ALLOWED_CHANNELS is configured. Without this
guard the entire channel-policy block silently fell through."""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
interaction = _make_interaction("100200300", channel_id=None)
assert await adapter._check_slash_authorization(interaction, "/help") is False
interaction.response.send_message.assert_awaited_once()
@pytest.mark.asyncio
async def test_missing_channel_id_allowed_when_no_channel_policy(adapter):
"""No DISCORD_ALLOWED_CHANNELS configured + missing channel id: still
pass through the channel block (matches no-allowlist default)."""
interaction = _make_interaction("100200300", channel_id=None)
assert await adapter._check_slash_authorization(interaction, "/help") is True
@pytest.mark.asyncio
async def test_missing_user_rejected_when_allowlist_configured(adapter):
"""interaction.user is None with a user/role allowlist active:
fail closed without raising AttributeError."""
adapter._allowed_user_ids = {"100200300"}
interaction = _make_interaction("100200300", user=None)
# Must not raise — must return False with an ephemeral rejection
assert await adapter._check_slash_authorization(interaction, "/help") is False
interaction.response.send_message.assert_awaited_once()
@pytest.mark.asyncio
async def test_missing_user_allowed_when_no_allowlist_configured(adapter):
"""interaction.user is None but no allowlist configured: allow
(preserves no-allowlist back-compat -- anyone is allowed when no
policy is in effect)."""
interaction = _make_interaction("100200300", user=None)
assert await adapter._check_slash_authorization(interaction, "/help") is True
# ---------------------------------------------------------------------------
# Thread parent channel allowlist parity
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_thread_parent_in_allowlist_passes(adapter, monkeypatch):
"""Thread whose parent channel is on DISCORD_ALLOWED_CHANNELS passes
even though the thread id itself isn't on the list."""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "5555")
interaction = _make_interaction(
"100200300", channel_id=9999, in_thread=True, parent_channel_id=5555,
)
assert await adapter._check_slash_authorization(interaction, "/help") is True
@pytest.mark.asyncio
async def test_thread_parent_in_ignorelist_rejects(adapter, monkeypatch):
"""Thread whose parent channel is on DISCORD_IGNORED_CHANNELS rejects
even when the thread id itself isn't ignored."""
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "5555")
interaction = _make_interaction(
"100200300", channel_id=9999, in_thread=True, parent_channel_id=5555,
)
assert await adapter._check_slash_authorization(interaction, "/help") is False
@pytest.mark.asyncio
async def test_ignored_beats_allowed(adapter, monkeypatch):
"""Channel listed in BOTH allowed and ignored: the ignored entry wins.
Anything else would be a foot-gun where adding to ignored does nothing
if the channel is also explicitly allowed."""
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111")
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "1111")
interaction = _make_interaction("100200300", channel_id=1111)
assert await adapter._check_slash_authorization(interaction, "/help") is False
# ---------------------------------------------------------------------------
# Admin notify soft-fail fallback
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_notify_falls_back_to_slack_on_telegram_soft_fail(adapter):
"""adapter.send returning SendResult(success=False) must NOT short-
circuit the fallback chain. Treating a soft failure as delivered
means a Telegram outage swallows alerts silently."""
from gateway.session import Platform
soft_fail = SimpleNamespace(success=False, error="rate limited")
telegram_adapter = SimpleNamespace(send=AsyncMock(return_value=soft_fail))
slack_adapter = SimpleNamespace(send=AsyncMock())
home_tg = SimpleNamespace(chat_id="987654321")
home_sl = SimpleNamespace(chat_id="C12345")
homes = {Platform.TELEGRAM: home_tg, Platform.SLACK: home_sl}
runner = SimpleNamespace(
adapters={
Platform.TELEGRAM: telegram_adapter,
Platform.SLACK: slack_adapter,
},
config=SimpleNamespace(get_home_channel=lambda p: homes.get(p)),
)
adapter.gateway_runner = runner
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
telegram_adapter.send.assert_awaited_once()
slack_adapter.send.assert_awaited_once()
@pytest.mark.asyncio
async def test_notify_returns_on_telegram_truthy_success(adapter):
"""adapter.send returning SendResult(success=True) -- or any object
without a falsy success attribute -- should still short-circuit at
Telegram. (This guards against the soft-fail patch over-correcting.)"""
from gateway.session import Platform
ok = SimpleNamespace(success=True, message_id="m1")
telegram_adapter = SimpleNamespace(send=AsyncMock(return_value=ok))
slack_adapter = SimpleNamespace(send=AsyncMock())
home_tg = SimpleNamespace(chat_id="987654321")
home_sl = SimpleNamespace(chat_id="C12345")
homes = {Platform.TELEGRAM: home_tg, Platform.SLACK: home_sl}
runner = SimpleNamespace(
adapters={
Platform.TELEGRAM: telegram_adapter,
Platform.SLACK: slack_adapter,
},
config=SimpleNamespace(get_home_channel=lambda p: homes.get(p)),
)
adapter.gateway_runner = runner
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
telegram_adapter.send.assert_awaited_once()
slack_adapter.send.assert_not_awaited()
# ---------------------------------------------------------------------------
# /skill autocomplete + callback gating
# ---------------------------------------------------------------------------
def _capture_skill_registration(adapter, monkeypatch, entries):
"""Run ``_register_skill_group`` against a stubbed skill catalog and
return ``(handler_callback, autocomplete_callback)``.
The autocomplete callback is captured by monkeypatching
``discord.app_commands.autocomplete`` -- the production decorator is
a no-op stub in this test file's discord mock, so capturing the
callback through it is the direct route in tests.
"""
import discord
captured: dict = {}
def fake_categories(reserved_names):
# Match discord_skill_commands_by_category's tuple shape:
# (categories_dict, uncategorized_list, hidden_count)
return ({}, list(entries), 0)
import hermes_cli.commands as _hc
monkeypatch.setattr(
_hc, "discord_skill_commands_by_category", fake_categories,
)
def capture_autocomplete(**kwargs):
# Only one autocomplete in /skill registration: name=...
captured["autocomplete"] = kwargs.get("name")
def _passthrough(fn):
return fn
return _passthrough
monkeypatch.setattr(
discord.app_commands, "autocomplete", capture_autocomplete,
raising=False,
)
registered: list = []
class _Tree:
def get_commands(self):
return []
def add_command(self, cmd):
registered.append(cmd)
adapter._register_skill_group(_Tree())
assert registered, "_register_skill_group did not register a command"
return registered[0].callback, captured["autocomplete"]
@pytest.mark.asyncio
async def test_skill_autocomplete_returns_empty_for_unauthorized(
adapter, monkeypatch,
):
"""Autocomplete must not leak the installed skill catalog to users
who can't run /skill. With DISCORD_ALLOWED_USERS configured and the
interaction user outside it, the autocomplete callback returns []."""
adapter._allowed_user_ids = {"100200300"}
entries = [
("alpha", "First skill", "/alpha"),
("beta", "Second skill", "/beta"),
]
_handler, autocomplete = _capture_skill_registration(
adapter, monkeypatch, entries,
)
interaction = _make_interaction("999999999")
result = await autocomplete(interaction, "")
assert result == []
@pytest.mark.asyncio
async def test_skill_autocomplete_returns_choices_for_authorized(
adapter, monkeypatch,
):
"""Sanity: an authorized user still gets the autocomplete suggestions."""
adapter._allowed_user_ids = {"100200300"}
entries = [
("alpha", "First skill", "/alpha"),
("beta", "Second skill", "/beta"),
]
_handler, autocomplete = _capture_skill_registration(
adapter, monkeypatch, entries,
)
interaction = _make_interaction("100200300")
result = await autocomplete(interaction, "")
assert len(result) == 2
assert {choice.value for choice in result} == {"alpha", "beta"}
@pytest.mark.asyncio
async def test_skill_handler_rejects_before_dispatch_for_unauthorized(
adapter, monkeypatch,
):
"""The /skill handler must call _check_slash_authorization BEFORE
skill_lookup. Otherwise unknown vs known names produce divergent
responses ("Unknown skill: foo" vs auth rejection) which is a
catalog-probing oracle."""
adapter._allowed_user_ids = {"100200300"}
entries = [("alpha", "First skill", "/alpha")]
handler, _autocomplete = _capture_skill_registration(
adapter, monkeypatch, entries,
)
# Patch _run_simple_slash so we can detect any leak through it.
dispatched: list = []
async def fake_dispatch(_interaction, text):
dispatched.append(text)
adapter._run_simple_slash = fake_dispatch # type: ignore[assignment]
interaction = _make_interaction("999999999")
await handler(interaction, "alpha", "")
interaction.response.send_message.assert_awaited_once()
args, kwargs = interaction.response.send_message.call_args
assert kwargs.get("ephemeral") is True
assert "not authorized" in (
args[0] if args else kwargs.get("content", "")
).lower()
# Critically: nothing was dispatched, and the auth message did NOT
# mention the skill name "alpha" (no catalog leak).
assert dispatched == []
@pytest.mark.asyncio
async def test_skill_handler_known_and_unknown_produce_same_rejection(
adapter, monkeypatch,
):
"""An unauthorized user probing for valid skill names must see the
same rejection text regardless of whether the name they tried is
on the registered catalog."""
adapter._allowed_user_ids = {"100200300"}
entries = [("alpha", "First skill", "/alpha")]
handler, _ = _capture_skill_registration(adapter, monkeypatch, entries)
adapter._run_simple_slash = AsyncMock() # type: ignore[assignment]
known_interaction = _make_interaction("999999999")
unknown_interaction = _make_interaction("999999999")
await handler(known_interaction, "alpha", "")
await handler(unknown_interaction, "definitely-not-a-skill", "")
known_interaction.response.send_message.assert_awaited_once()
unknown_interaction.response.send_message.assert_awaited_once()
known_args, known_kwargs = known_interaction.response.send_message.call_args
unknown_args, unknown_kwargs = (
unknown_interaction.response.send_message.call_args
)
assert known_args == unknown_args
assert known_kwargs == unknown_kwargs
@pytest.mark.asyncio
async def test_skill_handler_dispatches_for_authorized(
adapter, monkeypatch,
):
"""Sanity: an authorized user reaches _run_simple_slash with the
resolved cmd_key and arguments."""
adapter._allowed_user_ids = {"100200300"}
entries = [("alpha", "First skill", "/alpha")]
handler, _ = _capture_skill_registration(adapter, monkeypatch, entries)
dispatched: list = []
async def fake_dispatch(_interaction, text):
dispatched.append(text)
adapter._run_simple_slash = fake_dispatch # type: ignore[assignment]
interaction = _make_interaction("100200300")
await handler(interaction, "alpha", "extra args")
assert dispatched == ["/alpha extra args"]

View file

@ -107,6 +107,10 @@ def adapter():
user=SimpleNamespace(id=99999, name="HermesBot"),
)
adapter._text_batch_delay_seconds = 0 # disable batching for tests
# Slash auth is exercised in test_discord_slash_auth.py — bypass it here
# so registration / dispatch / thread behavior tests don't have to
# construct a full auth context (allowlist / channel scope).
adapter._check_slash_authorization = AsyncMock(return_value=True)
return adapter
@ -117,6 +121,10 @@ def adapter():
@pytest.mark.asyncio
async def test_registers_native_thread_slash_command(adapter):
# The /thread slash closure now delegates ALL the work — including
# defer() — to _handle_thread_create_slash so the auth gate can send
# an ephemeral rejection on the still-unresponded interaction. The
# closure should just forward.
adapter._handle_thread_create_slash = AsyncMock()
adapter._register_slash_commands()
@ -127,7 +135,9 @@ async def test_registers_native_thread_slash_command(adapter):
await command(interaction, name="Planning", message="", auto_archive_duration=1440)
interaction.response.defer.assert_awaited_once_with(ephemeral=True)
# defer is now performed inside _handle_thread_create_slash, AFTER the
# auth check passes — not by the closure.
interaction.response.defer.assert_not_awaited()
adapter._handle_thread_create_slash.assert_awaited_once_with(interaction, "Planning", "", 1440)
@ -298,6 +308,7 @@ async def test_handle_thread_create_slash_reports_success(adapter):
user=SimpleNamespace(display_name="Jezza", id=42),
guild=SimpleNamespace(name="TestGuild"),
followup=SimpleNamespace(send=AsyncMock()),
response=SimpleNamespace(defer=AsyncMock()),
)
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
@ -326,6 +337,7 @@ async def test_handle_thread_create_slash_dispatches_session_when_message_provid
user=SimpleNamespace(display_name="Jezza", id=42),
guild=SimpleNamespace(name="TestGuild"),
followup=SimpleNamespace(send=AsyncMock()),
response=SimpleNamespace(defer=AsyncMock()),
)
adapter._dispatch_thread_session = AsyncMock()
@ -348,6 +360,7 @@ async def test_handle_thread_create_slash_no_dispatch_without_message(adapter):
user=SimpleNamespace(display_name="Jezza", id=42),
guild=SimpleNamespace(name="TestGuild"),
followup=SimpleNamespace(send=AsyncMock()),
response=SimpleNamespace(defer=AsyncMock()),
)
adapter._dispatch_thread_session = AsyncMock()
@ -371,6 +384,7 @@ async def test_handle_thread_create_slash_falls_back_to_seed_message(adapter):
user=SimpleNamespace(display_name="Jezza", id=42),
guild=SimpleNamespace(name="TestGuild"),
followup=SimpleNamespace(send=AsyncMock()),
response=SimpleNamespace(defer=AsyncMock()),
)
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
@ -395,6 +409,7 @@ async def test_handle_thread_create_slash_reports_failure(adapter):
channel_id=123,
user=SimpleNamespace(display_name="Jezza", id=42),
followup=SimpleNamespace(send=AsyncMock()),
response=SimpleNamespace(defer=AsyncMock()),
)
await adapter._handle_thread_create_slash(interaction, "Planning", "", 1440)

View file

@ -1771,6 +1771,69 @@ class TestAdapterBehavior(unittest.TestCase):
self.assertIn("GIF downgraded to file", caption)
self.assertIn("look", caption)
def test_download_remote_document_reads_response_before_httpx_client_closes(self):
"""#18451 — snapshot Content-Type + body while the httpx.AsyncClient
context is still active so pooled connections fully release on
exit. Otherwise the response is only readable because httpx
eagerly buffers it; a future refactor to .stream() would silently
read-after-close."""
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter
events: list[str] = []
class _FakeResponse:
headers = {"Content-Type": "application/octet-stream"}
def raise_for_status(self) -> None:
events.append("raise_for_status")
@property
def content(self) -> bytes:
events.append("content_read")
return b"doc-bytes"
class _FakeAsyncClient:
def __init__(self, *_a: object, **_k: object) -> None:
pass
async def __aenter__(self) -> "_FakeAsyncClient":
events.append("client_enter")
return self
async def __aexit__(self, *exc: object) -> None:
events.append("client_exit")
async def get(self, *_a: object, **_k: object) -> _FakeResponse:
events.append("get")
return _FakeResponse()
with tempfile.TemporaryDirectory() as tmp:
with patch.dict(os.environ, {"HERMES_HOME": tmp}, clear=False):
adapter = FeishuAdapter(PlatformConfig())
async def _run() -> tuple[str, str]:
with patch("tools.url_safety.is_safe_url", return_value=True):
with patch("httpx.AsyncClient", _FakeAsyncClient):
with patch(
"gateway.platforms.feishu.cache_document_from_bytes",
return_value="/tmp/cached-doc.bin",
):
return await adapter._download_remote_document(
"https://example.com/doc.bin",
default_ext=".bin",
preferred_name="doc",
)
path, filename = asyncio.run(_run())
self.assertEqual(path, "/tmp/cached-doc.bin")
self.assertTrue(filename)
# content_read MUST happen before client_exit — otherwise we're
# reading response body after the connection pool has been torn
# down, which only works by accident (httpx's eager buffering).
self.assertLess(events.index("content_read"), events.index("client_exit"))
def test_dedup_state_persists_across_adapter_restart(self):
from gateway.config import PlatformConfig
from gateway.platforms.feishu import FeishuAdapter

View file

@ -0,0 +1,217 @@
"""Tests for gateway /goal verdict-message delivery.
The judge verdict message ("✓ Goal achieved", "⏸ budget exhausted", etc.)
must reach the user after each turn. Before this fix the code checked
``hasattr(adapter, "send_message")`` but adapters expose ``send()``,
never ``send_message``, so the check always evaluated False and users
never saw verdicts. This test locks in the fix.
"""
from __future__ import annotations
import asyncio
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.session import SessionEntry, SessionSource, build_session_key
@pytest.fixture()
def hermes_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
def _make_source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
user_id="u1",
chat_id="c1",
user_name="tester",
chat_type="dm",
)
class _RecordingAdapter:
"""Minimal adapter that records send() invocations."""
def __init__(self) -> None:
self._pending_messages: dict = {}
self.sends: list[dict] = []
async def send(self, chat_id: str, content: str, reply_to=None, metadata=None):
self.sends.append({"chat_id": chat_id, "content": content, "metadata": metadata})
class _R:
success = True
message_id = "mock-msg"
return _R()
def _make_runner_with_adapter():
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
)
runner.adapters = {}
runner._running_agents = {}
runner._running_agents_ts = {}
runner._queued_events = {}
src = _make_source()
session_entry = SessionEntry(
session_key=build_session_key(src),
session_id="goal-sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner.session_store = MagicMock()
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store._generate_session_key.return_value = build_session_key(src)
adapter = _RecordingAdapter()
runner.adapters[Platform.TELEGRAM] = adapter
return runner, adapter, session_entry, src
@pytest.mark.asyncio
async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
"""When the judge says done, the '✓ Goal achieved' message must reach
the user through the adapter's ``send()`` method."""
runner, adapter, session_entry, src = _make_runner_with_adapter()
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_entry.session_id)
mgr.set("ship the feature")
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped")):
runner._post_turn_goal_continuation(
session_entry=session_entry,
source=src,
final_response="I shipped the feature.",
)
# fire-and-forget create_task — give the loop a tick
await asyncio.sleep(0.05)
assert len(adapter.sends) == 1, f"expected 1 send, got {len(adapter.sends)}: {adapter.sends}"
msg = adapter.sends[0]
assert msg["chat_id"] == "c1"
assert "Goal achieved" in msg["content"]
assert "the feature shipped" in msg["content"]
@pytest.mark.asyncio
async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
"""When the judge says continue, both the 'continuing' status and the
continuation-prompt event must be delivered. The continuation prompt is
routed through the adapter's pending-messages FIFO so the goal loop
proceeds on the next turn."""
runner, adapter, session_entry, src = _make_runner_with_adapter()
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_entry.session_id)
mgr.set("polish the docs")
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work")):
runner._post_turn_goal_continuation(
session_entry=session_entry,
source=src,
final_response="here's a partial edit",
)
await asyncio.sleep(0.05)
# Status line sent back
assert len(adapter.sends) == 1
assert "Continuing toward goal" in adapter.sends[0]["content"]
# Continuation prompt enqueued for next turn
assert adapter._pending_messages, "continuation prompt must be enqueued in pending_messages"
@pytest.mark.asyncio
async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
"""When the budget is exhausted, a '⏸ Goal paused' message must be sent
and no further continuation enqueued."""
runner, adapter, session_entry, src = _make_runner_with_adapter()
from hermes_cli.goals import GoalManager, save_goal
mgr = GoalManager(session_entry.session_id, default_max_turns=2)
state = mgr.set("tiny goal", max_turns=2)
state.turns_used = 2
save_goal(session_entry.session_id, state)
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going")):
runner._post_turn_goal_continuation(
session_entry=session_entry,
source=src,
final_response="still partial",
)
await asyncio.sleep(0.05)
assert len(adapter.sends) == 1
content = adapter.sends[0]["content"]
assert "paused" in content.lower()
assert "turns used" in content.lower()
# No continuation enqueued when budget is exhausted
assert not adapter._pending_messages
@pytest.mark.asyncio
async def test_goal_verdict_skipped_when_no_active_goal(hermes_home):
"""No goal set → the hook is a no-op. Nothing is sent, nothing enqueued."""
runner, adapter, session_entry, src = _make_runner_with_adapter()
runner._post_turn_goal_continuation(
session_entry=session_entry,
source=src,
final_response="anything",
)
await asyncio.sleep(0.05)
assert adapter.sends == []
assert adapter._pending_messages == {}
@pytest.mark.asyncio
async def test_goal_verdict_survives_adapter_without_send(hermes_home):
"""Bad adapter (no ``send`` attribute) must not crash the judge hook."""
runner, _adapter, session_entry, src = _make_runner_with_adapter()
from hermes_cli.goals import GoalManager
GoalManager(session_entry.session_id).set("survive missing send")
class _NoSendAdapter:
def __init__(self):
self._pending_messages: dict = {}
runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok")):
# must not raise
runner._post_turn_goal_continuation(
session_entry=session_entry,
source=src,
final_response="whatever",
)
await asyncio.sleep(0.05)

View file

@ -8,7 +8,7 @@ to env vars nothing read on startup — the home channel appeared to set
successfully but was lost on every new gateway session.
"""
from gateway.run import _home_target_env_var
from gateway.run import _home_target_env_var, _home_thread_env_var
def test_matrix_home_target_env_var_uses_home_room():
@ -34,3 +34,9 @@ def test_unknown_platform_home_target_env_var_falls_back_to_home_channel():
def test_case_insensitive_platform_name():
assert _home_target_env_var("MATRIX") == "MATRIX_HOME_ROOM"
assert _home_target_env_var("Email") == "EMAIL_HOME_ADDRESS"
def test_home_thread_env_var_uses_home_target_name_plus_thread_id():
assert _home_thread_env_var("discord") == "DISCORD_HOME_CHANNEL_THREAD_ID"
assert _home_thread_env_var("matrix") == "MATRIX_HOME_ROOM_THREAD_ID"
assert _home_thread_env_var("email") == "EMAIL_HOME_ADDRESS_THREAD_ID"

View file

@ -0,0 +1,114 @@
"""Tests for the shared httpx.Limits helper that all long-lived platform
adapters use to tighten their keep-alive pool.
Context: #18451 — on macOS behind Cloudflare Warp, httpx's default
keepalive_expiry=5s let idle CLOSE_WAIT sockets accumulate across
multiple long-lived gateway adapters (QQ Bot, Feishu, WeCom, DingTalk,
Signal, BlueBubbles, WeCom-callback) until the process hit the default
256 fd limit. These tests just verify the helper returns sensibly
tuned limits and respects env-var overrides; the actual fd-pressure
behaviour is only observable at runtime under load.
"""
from __future__ import annotations
import os
import pytest
@pytest.fixture(autouse=True)
def _clear_env(monkeypatch):
monkeypatch.delenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", raising=False)
def test_returns_none_when_httpx_unavailable(monkeypatch):
"""If httpx can't be imported, the helper returns None so callers
fall back to httpx's built-in Limits default without raising."""
import gateway.platforms._http_client_limits as mod
monkeypatch.setattr(mod, "httpx", None)
assert mod.platform_httpx_limits() is None
def test_default_limits_tighten_keepalive_below_httpx_default():
import httpx
from gateway.platforms._http_client_limits import platform_httpx_limits
limits = platform_httpx_limits()
assert isinstance(limits, httpx.Limits)
# httpx default keepalive_expiry is 5.0 — ours must be shorter so
# CLOSE_WAIT sockets drain promptly behind proxies like Warp.
assert limits.keepalive_expiry is not None
assert limits.keepalive_expiry < 5.0
# max_keepalive_connections must be positive and reasonable for a
# single adapter (platform APIs rarely parallelise beyond ~10).
assert limits.max_keepalive_connections is not None
assert 1 <= limits.max_keepalive_connections <= 50
def test_env_override_keepalive_expiry(monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "7.5")
from gateway.platforms._http_client_limits import platform_httpx_limits
limits = platform_httpx_limits()
assert limits.keepalive_expiry == 7.5
def test_env_override_max_keepalive(monkeypatch):
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "25")
from gateway.platforms._http_client_limits import platform_httpx_limits
limits = platform_httpx_limits()
assert limits.max_keepalive_connections == 25
def test_env_override_rejects_garbage(monkeypatch):
"""Malformed env values fall back to defaults rather than raising."""
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "not-a-number")
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "-3")
from gateway.platforms._http_client_limits import platform_httpx_limits
limits = platform_httpx_limits()
# Non-positive / non-numeric → fell back to defaults (not the override values)
assert limits.keepalive_expiry is not None and limits.keepalive_expiry > 0
assert limits.max_keepalive_connections is not None
assert limits.max_keepalive_connections > 0
def test_helper_is_importable_from_every_platform_that_uses_it():
"""Every persistent-httpx-client platform adapter imports this helper.
If any of those modules fails to import, this test surfaces it before
the regression shows up as a runtime adapter-startup crash."""
# Just importing exercises the helper's import path for each adapter.
import gateway.platforms.qqbot.adapter # noqa: F401
import gateway.platforms.wecom # noqa: F401
import gateway.platforms.dingtalk # noqa: F401
import gateway.platforms.signal # noqa: F401
import gateway.platforms.bluebubbles # noqa: F401
import gateway.platforms.wecom_callback # noqa: F401
class TestWhatsappTypingLeakFix:
"""#18451 — whatsapp.send_typing previously used a bare
`await self._http_session.post(...)` which leaked the aiohttp
response object until GC, holding its TCP socket in CLOSE_WAIT.
Must now wrap the call in `async with` so the response is
released immediately when the call returns.
We verify by inspecting the source text rather than exercising
the coroutine the test suite would otherwise need a live
aiohttp server, and the contract we care about is structural.
"""
def test_bare_await_removed(self):
import inspect
import gateway.platforms.whatsapp as mod
src = inspect.getsource(mod.WhatsAppAdapter.send_typing)
# The fix must be structural: the post() call is inside an
# `async with`, not a bare `await`.
assert "async with self._http_session.post(" in src, (
"send_typing must wrap self._http_session.post(...) in "
"`async with` to release the aiohttp response socket "
"(#18451). Otherwise the response sits in CLOSE_WAIT "
"until GC."
)
# The old bare-await form must be gone.
assert "await self._http_session.post(" not in src

View file

@ -0,0 +1,244 @@
"""Tests for `/reload-skills` resyncing the Discord ``/skill`` autocomplete.
Before this change, ``_register_skill_group`` captured the skill catalog
in closure variables (``entries`` and ``skill_lookup``) so that the one
``tree.add_command`` call at startup owned the only live copy of the
skill list. The closure is never re-entered after startup, so
``/reload-skills`` (which rescans the on-disk skill dir and refreshes
the in-process registry) had no way to propagate its results into the
autocomplete new skills stayed invisible in the dropdown and deleted
skills returned an "Unknown skill" error when the stale autocomplete
entry was clicked.
The fix promotes those two variables to instance attributes
(``_skill_entries`` / ``_skill_lookup``) and exposes a
``refresh_skill_group()`` method that rescans and mutates them in
place. The gateway ``_handle_reload_skills_command`` iterates its
connected adapters and calls the method on any that expose it.
No ``tree.sync()`` is required because Discord fetches autocomplete
options dynamically on every keystroke we only need to rebind the
data the live callbacks already read from.
"""
from __future__ import annotations
from unittest.mock import MagicMock
def _make_adapter():
"""Construct a DiscordAdapter without going through __init__ / token checks."""
from gateway.platforms.discord import DiscordAdapter
from gateway.platforms.base import Platform
adapter = object.__new__(DiscordAdapter)
adapter.config = MagicMock()
adapter.config.extra = {}
# ``platform`` is set by BasePlatformAdapter.__init__, which we skip
# above; the inherited ``.name`` property dereferences it for log
# formatting, so set it explicitly.
adapter.platform = Platform.DISCORD
return adapter
class TestRefreshSkillGroup:
def test_refresh_repopulates_entries_after_catalog_change(
self, monkeypatch
) -> None:
"""The initial catalog is replaced wholesale on refresh.
Mirrors the observable /reload-skills case: a user adds a new
skill to ~/.hermes/skills/, runs /reload-skills, and expects
the autocomplete to surface it on the very next keystroke.
"""
adapter = _make_adapter()
# Start-of-process state: /register built the catalog from the
# original collector output.
adapter._skill_entries = [
("old-skill", "Pre-existing skill", "/old-skill"),
]
adapter._skill_lookup = {"old-skill": ("Pre-existing skill", "/old-skill")}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
# User adds new-skill to disk and removes old-skill.
def fake_collector(*, reserved_names):
return (
{"creative": [("new-skill", "Fresh skill", "/new-skill")]}, # categories
[], # uncategorized
0, # hidden
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
new_count, hidden = adapter.refresh_skill_group()
assert new_count == 1
assert hidden == 0
# Old skill is gone, new skill is present.
names = [n for n, _d, _k in adapter._skill_entries]
assert names == ["new-skill"]
assert "old-skill" not in adapter._skill_lookup
assert adapter._skill_lookup["new-skill"] == ("Fresh skill", "/new-skill")
def test_refresh_sorts_entries_alphabetically(self, monkeypatch) -> None:
"""Autocomplete order must be stable and predictable across refreshes."""
adapter = _make_adapter()
adapter._skill_entries = []
adapter._skill_lookup = {}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
def fake_collector(*, reserved_names):
# Intentionally unsorted — the fix must resort.
return (
{"zzz": [("zebra", "", "/zebra")]},
[("alpha", "", "/alpha")],
0,
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
adapter.refresh_skill_group()
names = [n for n, _d, _k in adapter._skill_entries]
assert names == sorted(names) == ["alpha", "zebra"]
def test_refresh_handles_collector_exception_gracefully(
self, monkeypatch
) -> None:
"""A broken collector must not take down /reload-skills."""
adapter = _make_adapter()
adapter._skill_entries = [("keep", "kept", "/keep")]
adapter._skill_lookup = {"keep": ("kept", "/keep")}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
def boom(*, reserved_names):
raise RuntimeError("simulated collector failure")
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
boom,
)
new_count, hidden = adapter.refresh_skill_group()
# Returns previously-cached count, no crash, existing entries
# preserved so the live autocomplete keeps working.
assert new_count == 1
assert hidden == 0
assert adapter._skill_entries == [("keep", "kept", "/keep")]
class TestRegisterSkillGroupUsesInstanceState:
"""The closure-based ``entries`` / ``skill_lookup`` must be gone.
If the callbacks in ``_register_skill_group`` still close over
local variables instead of reading from ``self``, the refresh
method is useless autocomplete will keep serving the stale list.
The full slash-command registration path pulls in ``discord.app_commands``
decorators (``@describe`` / ``@autocomplete`` / ``Command``), which
are unstubbed in the hermetic test env. We assert the data-shaped
side-effects instead: after ``_register_skill_group`` returns
(successfully or not), ``_skill_entries`` and ``_skill_lookup`` must
be populated from the collector output, because
``_refresh_skill_catalog_state`` runs before any decorator evaluation.
"""
def test_refresh_catalog_state_populates_instance_attrs(
self, monkeypatch
) -> None:
adapter = _make_adapter()
adapter._skill_group_reserved_names = set()
def fake_collector(*, reserved_names):
return (
{"creative": [("ascii-art", "Make ASCII", "/ascii-art")]},
[],
0,
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
adapter._refresh_skill_catalog_state()
# Instance-level state populated — the autocomplete + handler
# callbacks both read from these, so `refresh_skill_group`
# mutating them in place is enough to pick up new skills.
assert adapter._skill_entries == [
("ascii-art", "Make ASCII", "/ascii-art"),
]
assert adapter._skill_lookup == {
"ascii-art": ("Make ASCII", "/ascii-art"),
}
assert adapter._skill_group_hidden_count == 0
class TestHandleReloadSkillsCallsRefreshSkillGroup:
"""Gateway-side integration: /reload-skills must call refresh on adapters."""
def test_orchestrator_calls_refresh_skill_group_on_every_adapter(self):
"""Sync + async refresh_skill_group implementations both get awaited/called.
The orchestrator iterates ``self.adapters`` and calls
``refresh_skill_group`` if it exists. Adapters that don't
implement it (today: everything except Discord) are silently
skipped without raising.
"""
import asyncio
from unittest.mock import patch, MagicMock
# Import without constructing a real runner — test the method
# directly against an ``object.__new__`` instance.
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
sync_refresh = MagicMock(return_value=(5, 0))
async_called = {"flag": False}
class AsyncAdapter:
name = "async-platform"
async def refresh_skill_group(self):
async_called["flag"] = True
return (3, 0)
class SyncAdapter:
name = "sync-platform"
refresh_skill_group = sync_refresh
class NoOpAdapter:
name = "other"
# No refresh_skill_group — must not crash.
runner.adapters = {
"discord": AsyncAdapter(),
"slack": SyncAdapter(),
"telegram": NoOpAdapter(),
}
# Mock reload_skills itself so no disk scan runs.
fake_result = {"added": [], "removed": [], "total": 7}
with patch(
"agent.skill_commands.reload_skills", return_value=fake_result
):
event = MagicMock()
event.source = MagicMock()
# _session_key_for_source may be called — make it safe.
runner._session_key_for_source = lambda src: None
runner._pending_skills_reload_notes = {}
result = asyncio.get_event_loop().run_until_complete(
runner._handle_reload_skills_command(event)
)
assert "Skills Reloaded" in result
assert sync_refresh.called, "sync adapter refresh must be invoked"
assert async_called["flag"], "async adapter refresh must be awaited"

View file

@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
import gateway.run as gateway_run
from gateway.config import Platform
from gateway.platforms.base import MessageEvent, MessageType
from gateway.config import HomeChannel, Platform
from gateway.platforms.base import MessageEvent, MessageType, SendResult
from gateway.session import build_session_key
from tests.gateway.restart_test_helpers import (
make_restart_runner,
@ -17,6 +17,22 @@ from tests.gateway.restart_test_helpers import (
)
# ── restart marker helpers ───────────────────────────────────────────────
def test_restart_notification_pending_false_without_marker(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
assert gateway_run._restart_notification_pending() is False
def test_restart_notification_pending_true_with_marker(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
(tmp_path / ".restart_notify.json").write_text("{}")
assert gateway_run._restart_notification_pending() is True
# ── _handle_restart_command writes .restart_notify.json ──────────────────
@ -143,6 +159,184 @@ async def test_restart_command_uses_atomic_json_writes_for_marker_files(tmp_path
assert calls[1][1]["platform"] == "telegram"
@pytest.mark.asyncio
async def test_sethome_updates_running_config_for_same_process_restart(tmp_path, monkeypatch):
"""/sethome persists to env and updates in-memory config before restart."""
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
saved = {}
def _fake_save_env_value(key, value):
saved[key] = value
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
runner, _adapter = make_restart_runner()
source = make_restart_source(chat_id="home-42")
source.chat_name = "Ops Home"
event = MessageEvent(
text="/sethome",
message_type=MessageType.TEXT,
source=source,
message_id="m-home",
)
result = await runner._handle_set_home_command(event)
home = runner.config.get_home_channel(Platform.TELEGRAM)
assert "Home channel set" in result
assert saved["TELEGRAM_HOME_CHANNEL"] == "home-42"
assert home is not None
assert home.chat_id == "home-42"
assert home.name == "Ops Home"
@pytest.mark.asyncio
async def test_sethome_preserves_thread_target_for_same_process_restart(tmp_path, monkeypatch):
"""/sethome from a topic/thread stores the thread-aware home target."""
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
saved = {}
def _fake_save_env_value(key, value):
saved[key] = value
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
runner, _adapter = make_restart_runner()
source = make_restart_source(chat_id="parent-42", thread_id="topic-7")
source.chat_name = "Ops Topic"
event = MessageEvent(
text="/sethome",
message_type=MessageType.TEXT,
source=source,
message_id="m-home-thread",
)
result = await runner._handle_set_home_command(event)
home = runner.config.get_home_channel(Platform.TELEGRAM)
assert "Home channel set" in result
assert saved["TELEGRAM_HOME_CHANNEL"] == "parent-42"
assert saved["TELEGRAM_HOME_CHANNEL_THREAD_ID"] == "topic-7"
assert home is not None
assert home.chat_id == "parent-42"
assert home.thread_id == "topic-7"
# ── home-channel startup notifications ─────────────────────────────────────
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_to_configured_home(tmp_path, monkeypatch):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock()
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == {("telegram", "home-42", None)}
adapter.send.assert_called_once_with(
"home-42",
"♻️ Gateway online — Hermes is back and ready.",
)
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_preserves_thread_metadata(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="parent-42",
name="Ops Topic",
thread_id="topic-7",
)
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == {("telegram", "parent-42", "topic-7")}
adapter.send.assert_called_once_with(
"parent-42",
"♻️ Gateway online — Hermes is back and ready.",
metadata={"thread_id": "topic-7"},
)
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_skips_restart_target(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="42",
name="Ops Home",
)
adapter.send = AsyncMock()
delivered = await runner._send_home_channel_startup_notifications(
skip_targets={("telegram", "42", None)}
)
assert delivered == set()
adapter.send.assert_not_called()
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_does_not_skip_different_thread(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
delivered = await runner._send_home_channel_startup_notifications(
skip_targets={("telegram", "42", "topic-7")}
)
assert delivered == {("telegram", "42", None)}
adapter.send.assert_called_once()
@pytest.mark.asyncio
async def test_send_home_channel_startup_notification_ignores_false_send_result(
tmp_path, monkeypatch
):
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
runner, adapter = make_restart_runner()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
delivered = await runner._send_home_channel_startup_notifications()
assert delivered == set()
adapter.send.assert_called_once()
# ── _send_restart_notification ───────────────────────────────────────────
@ -160,8 +354,9 @@ async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkey
runner, adapter = make_restart_runner()
adapter.send = AsyncMock()
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
assert delivered_target == ("telegram", "42", None)
adapter.send.assert_called_once()
call_args = adapter.send.call_args
assert call_args[0][0] == "42" # chat_id
@ -185,8 +380,9 @@ async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
runner, adapter = make_restart_runner()
adapter.send = AsyncMock()
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
assert delivered_target == ("telegram", "99", "topic_7")
call_args = adapter.send.call_args
assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
assert not notify_path.exists()
@ -240,6 +436,94 @@ async def test_send_restart_notification_cleans_up_on_send_failure(
runner, adapter = make_restart_runner()
adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
await runner._send_restart_notification()
delivered_target = await runner._send_restart_notification()
assert not notify_path.exists() # cleaned up despite error
# File cleaned up even though send raised.
assert delivered_target is None
assert not notify_path.exists()
@pytest.mark.asyncio
async def test_send_restart_notification_logs_warning_on_sendresult_failure(
tmp_path, monkeypatch, caplog
):
"""Adapter that returns SendResult(success=False) must log a WARNING, not INFO.
Regression guard: adapter.send() catches provider errors (e.g. Telegram
"Chat not found") and returns SendResult(success=False) rather than
raising. The caller previously ignored the return value and always
logged "Sent restart notification to ..." at INFO masking real
delivery failures behind a fake success line.
"""
from gateway.platforms.base import SendResult
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
notify_path = tmp_path / ".restart_notify.json"
notify_path.write_text(json.dumps({
"platform": "telegram",
"chat_id": "42",
}))
runner, adapter = make_restart_runner()
adapter.send = AsyncMock(
return_value=SendResult(success=False, error="Chat not found"),
)
with caplog.at_level("DEBUG", logger="gateway.run"):
delivered_target = await runner._send_restart_notification()
success_lines = [
r for r in caplog.records
if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
]
warning_lines = [
r for r in caplog.records
if r.levelname == "WARNING"
and "was not delivered" in r.getMessage()
and "Chat not found" in r.getMessage()
]
assert delivered_target is None
assert not success_lines, (
"Expected no INFO 'Sent restart notification' line when send failed, "
f"got: {[r.getMessage() for r in success_lines]}"
)
assert warning_lines, (
"Expected a WARNING line mentioning the failure; "
f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
)
# Still cleans up.
assert not notify_path.exists()
@pytest.mark.asyncio
async def test_send_restart_notification_logs_info_on_sendresult_success(
tmp_path, monkeypatch, caplog
):
"""Adapter returning SendResult(success=True) keeps the INFO log line."""
from gateway.platforms.base import SendResult
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
notify_path = tmp_path / ".restart_notify.json"
notify_path.write_text(json.dumps({
"platform": "telegram",
"chat_id": "42",
}))
runner, adapter = make_restart_runner()
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="m-1"))
with caplog.at_level("DEBUG", logger="gateway.run"):
delivered_target = await runner._send_restart_notification()
success_lines = [
r for r in caplog.records
if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
]
assert delivered_target == ("telegram", "42", None)
assert success_lines, (
"Expected INFO 'Sent restart notification' when send succeeded; "
f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
)
assert not notify_path.exists()

View file

@ -32,7 +32,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.config import GatewayConfig, HomeChannel, Platform, PlatformConfig
from gateway.platforms.base import SendResult
from gateway.run import (
_auto_continue_freshness_window,
_coerce_gateway_timestamp,
@ -376,8 +377,8 @@ class TestSuspendRecentlyActiveSkipsResumePending:
assert e.suspended is False
assert e.resume_pending is True
def test_non_resume_pending_still_suspended(self, tmp_path):
"""Non-resume sessions still get the old crash-recovery suspension."""
def test_non_resume_pending_gets_resume_pending(self, tmp_path):
"""Non-resume sessions are now marked resume_pending (not suspended)."""
store = _make_store(tmp_path)
source_a = _make_source(chat_id="a")
source_b = _make_source(chat_id="b")
@ -386,9 +387,11 @@ class TestSuspendRecentlyActiveSkipsResumePending:
store.mark_resume_pending(entry_a.session_key)
count = store.suspend_recently_active()
# entry_a is already resume_pending → skipped. entry_b gets marked.
assert count == 1
assert store._entries[entry_a.session_key].suspended is False
assert store._entries[entry_b.session_key].suspended is True
assert store._entries[entry_b.session_key].resume_pending is True
assert store._entries[entry_b.session_key].suspended is False
# ---------------------------------------------------------------------------
@ -929,6 +932,84 @@ async def test_restart_banner_uses_try_to_resume_wording():
assert "try to resume" in msg
@pytest.mark.asyncio
async def test_restart_notifies_home_channel_even_without_active_sessions():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert adapter.sent == [
"⚠️ Gateway restarting — Your current task will be interrupted. "
"Send any message after restart and I'll try to resume where you left off."
]
@pytest.mark.asyncio
async def test_restart_home_channel_notification_dedupes_active_chat():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner._running_agents["agent:main:telegram:dm:999"] = MagicMock()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="999",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert len(adapter.sent) == 1
@pytest.mark.asyncio
async def test_restart_home_channel_notification_not_deduped_across_threads():
runner, adapter = make_restart_runner()
runner._restart_requested = True
session_key = "agent:main:telegram:group:999"
runner.session_store._entries[session_key] = MagicMock(
origin=SessionSource(
platform=Platform.TELEGRAM,
chat_id="999",
chat_type="group",
user_id="u1",
thread_id="topic-7",
)
)
runner._running_agents[session_key] = MagicMock()
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="999",
name="Ops Home",
)
await runner._notify_active_sessions_of_shutdown()
assert len(adapter.sent) == 2
assert adapter.sent_calls[0][2] == {"thread_id": "topic-7"}
assert adapter.sent_calls[1][2] is None
@pytest.mark.asyncio
async def test_restart_home_channel_notification_ignores_false_send_result():
runner, adapter = make_restart_runner()
runner._restart_requested = True
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
platform=Platform.TELEGRAM,
chat_id="home-42",
name="Ops Home",
)
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
await runner._notify_active_sessions_of_shutdown()
adapter.send.assert_called_once()
# ---------------------------------------------------------------------------
# Stuck-loop escalation integration
# ---------------------------------------------------------------------------

View file

@ -231,6 +231,55 @@ class TestSlackConnectCleanup:
mock_release.assert_called_once_with("slack-app-token", "xapp-fake")
assert adapter._platform_lock_identity is None
@pytest.mark.asyncio
async def test_reconnect_closes_previous_handler_to_prevent_zombie_socket(self):
"""Regression for #18980: calling connect() on an adapter that already has
a live handler (e.g. during a gateway restart) must close the old
AsyncSocketModeHandler before creating a new one. Without this guard,
the old Socket Mode websocket stays alive and both connections dispatch
every Slack event, producing double responses the same bug that
affected DiscordAdapter (#18187).
"""
config = PlatformConfig(enabled=True, token="xoxb-fake")
adapter = SlackAdapter(config)
# Simulate state left over from a prior connect() call.
first_handler = AsyncMock()
first_handler.close_async = AsyncMock()
adapter._handler = first_handler
mock_app = MagicMock()
def _noop_decorator(event_type):
def decorator(fn): return fn
return decorator
mock_app.event = _noop_decorator
mock_app.command = _noop_decorator
mock_app.action = _noop_decorator
mock_app.client = AsyncMock()
mock_web_client = AsyncMock()
mock_web_client.auth_test = AsyncMock(return_value={
"user_id": "U_BOT",
"user": "testbot",
"team_id": "T_FAKE",
"team": "FakeTeam",
})
second_handler = MagicMock()
with patch.object(_slack_mod, "AsyncApp", return_value=mock_app), \
patch.object(_slack_mod, "AsyncWebClient", return_value=mock_web_client), \
patch.object(_slack_mod, "AsyncSocketModeHandler", return_value=second_handler), \
patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}), \
patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
patch("gateway.status.release_scoped_lock"), \
patch("asyncio.create_task"):
result = await adapter.connect()
assert result is True
first_handler.close_async.assert_awaited_once_with()
assert adapter._handler is second_handler
# ---------------------------------------------------------------------------
# TestSlackProxyBehavior

View file

@ -132,6 +132,7 @@ async def test_reconnect_success_resets_error_count():
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock(return_value=MagicMock()) # heartbeat probe path
adapter._app = mock_app
with patch("asyncio.sleep", new_callable=AsyncMock):
@ -139,6 +140,15 @@ async def test_reconnect_success_resets_error_count():
assert adapter._polling_network_error_count == 0
# Clean up the heartbeat-probe task scheduled after a successful reconnect.
pending = [t for t in adapter._background_tasks if not t.done()]
for t in pending:
t.cancel()
try:
await t
except (asyncio.CancelledError, Exception):
pass
@pytest.mark.asyncio
async def test_reconnect_triggers_fatal_after_max_retries():
@ -284,3 +294,182 @@ async def test_drain_helper_noop_without_app():
adapter._app = None
# Should not raise
await adapter._drain_polling_connections()
# ── Heartbeat probe ──────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_heartbeat_probe_no_op_when_polling_healthy():
"""
Probe scheduled after a successful reconnect: Updater.running=True and
bot.get_me() returns quickly recovery confirmed, no further action.
"""
adapter = _make_adapter()
mock_updater = MagicMock()
mock_updater.running = True
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
adapter._app = mock_app
adapter._handle_polling_network_error = AsyncMock()
with patch("asyncio.sleep", new_callable=AsyncMock):
await adapter._verify_polling_after_reconnect()
mock_app.bot.get_me.assert_awaited_once()
adapter._handle_polling_network_error.assert_not_awaited()
@pytest.mark.asyncio
async def test_heartbeat_probe_reenters_ladder_when_updater_not_running():
"""
If Updater.running has flipped to False by the heartbeat delay, treat
as wedged: re-enter the reconnect ladder.
"""
adapter = _make_adapter()
mock_updater = MagicMock()
mock_updater.running = False
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock()
adapter._app = mock_app
adapter._handle_polling_network_error = AsyncMock()
with patch("asyncio.sleep", new_callable=AsyncMock):
await adapter._verify_polling_after_reconnect()
mock_app.bot.get_me.assert_not_called()
adapter._handle_polling_network_error.assert_awaited_once()
err = adapter._handle_polling_network_error.await_args.args[0]
assert isinstance(err, RuntimeError)
assert "not running" in str(err).lower()
@pytest.mark.asyncio
async def test_heartbeat_probe_reenters_ladder_when_get_me_times_out():
"""
If bot.get_me() hangs longer than PROBE_TIMEOUT, treat as wedged.
Simulates the connection-pool wedge that motivated this fix.
"""
adapter = _make_adapter()
mock_updater = MagicMock()
mock_updater.running = True
async def hang_forever(*args, **kwargs):
await asyncio.sleep(3600)
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock(side_effect=hang_forever)
adapter._app = mock_app
adapter._handle_polling_network_error = AsyncMock()
async def fast_wait_for(coro, timeout):
if asyncio.iscoroutine(coro):
coro.close()
raise asyncio.TimeoutError()
with patch("asyncio.sleep", new_callable=AsyncMock):
with patch("gateway.platforms.telegram.asyncio.wait_for", new=fast_wait_for):
await adapter._verify_polling_after_reconnect()
adapter._handle_polling_network_error.assert_awaited_once()
@pytest.mark.asyncio
async def test_heartbeat_probe_reenters_ladder_on_get_me_network_error():
"""
Any exception raised by bot.get_me() (NetworkError, ConnectionError, etc.)
should re-enter the reconnect ladder with the original exception.
"""
adapter = _make_adapter()
mock_updater = MagicMock()
mock_updater.running = True
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock(side_effect=ConnectionError("pool wedged"))
adapter._app = mock_app
adapter._handle_polling_network_error = AsyncMock()
with patch("asyncio.sleep", new_callable=AsyncMock):
await adapter._verify_polling_after_reconnect()
adapter._handle_polling_network_error.assert_awaited_once()
assert isinstance(
adapter._handle_polling_network_error.await_args.args[0], ConnectionError
)
@pytest.mark.asyncio
async def test_heartbeat_probe_skips_when_already_fatal():
"""
If the adapter is already in fatal-error state by the time the probe
delay elapses, the probe should bail without further action.
"""
adapter = _make_adapter()
adapter._set_fatal_error("telegram_polling_conflict", "already fatal", retryable=False)
mock_app = MagicMock()
mock_app.bot.get_me = AsyncMock()
adapter._app = mock_app
adapter._handle_polling_network_error = AsyncMock()
with patch("asyncio.sleep", new_callable=AsyncMock):
await adapter._verify_polling_after_reconnect()
mock_app.bot.get_me.assert_not_called()
adapter._handle_polling_network_error.assert_not_awaited()
@pytest.mark.asyncio
async def test_reconnect_schedules_heartbeat_probe_on_success():
"""
After a successful start_polling() in the reconnect path, a probe task
must be added to _background_tasks. Without it, a wedged Updater would
sit silent indefinitely with no further error_callback to advance the
reconnect ladder.
"""
adapter = _make_adapter()
adapter._polling_network_error_count = 1
mock_updater = MagicMock()
mock_updater.running = True
mock_updater.stop = AsyncMock()
mock_updater.start_polling = AsyncMock() # succeeds
mock_app = MagicMock()
mock_app.updater = mock_updater
mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
adapter._app = mock_app
initial_count = len(adapter._background_tasks)
with patch("asyncio.sleep", new_callable=AsyncMock):
await adapter._handle_polling_network_error(Exception("Bad Gateway"))
assert len(adapter._background_tasks) > initial_count, (
"Expected a heartbeat probe task to be scheduled after a successful "
"reconnect's start_polling()"
)
# Clean up.
pending = [t for t in adapter._background_tasks if not t.done()]
for t in pending:
t.cancel()
try:
await t
except (asyncio.CancelledError, Exception):
pass

View file

@ -0,0 +1,185 @@
"""Tests for gateway.run._check_unavailable_skill.
Regression coverage for the dir-name-vs-frontmatter-name drift bug.
The hint function used to compare the skill's parent-directory name
against the typed command and the disabled list. That silently missed
every skill whose directory name differs from its declared frontmatter
name (~19 skills on a standard install), so users typing a real slug
like ``/stable-diffusion-image-generation`` got a generic "unknown
command" response instead of the intended "disabled enable with "
or "not installed — install with …" hint.
These tests pin the fixed behavior:
* Slug is derived from the frontmatter ``name:`` (exactly matching
:func:`agent.skill_commands.scan_skill_commands`), so the slug differs
from the directory name when the declared name is multi-word.
* ``disabled`` membership is checked by the declared name, because that
is what :func:`hermes_cli.skills_config.save_disabled_skills` stores.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
@pytest.fixture
def tmp_skills(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Isolated skills dir + HERMES_HOME so the real user config is untouched."""
home = tmp_path / ".hermes"
home.mkdir()
(home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
return home / "skills"
def _write_skill(skills_dir: Path, rel: str, frontmatter_name: str) -> Path:
"""Create a SKILL.md at ``<skills_dir>/<rel>/SKILL.md``."""
skill_dir = skills_dir / rel
skill_dir.mkdir(parents=True, exist_ok=True)
skill_md = skill_dir / "SKILL.md"
skill_md.write_text(
f"---\nname: {frontmatter_name}\ndescription: test skill\n---\nBody.\n",
encoding="utf-8",
)
return skill_md
def test_frontmatter_slug_matched_even_when_dir_name_differs(
tmp_skills: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Directory ``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``.
Command typed: ``stable-diffusion-image-generation`` (the slug the
agent actually registers). The old dir-name-based check would have
compared ``stable-diffusion`` to the typed command and missed.
"""
from gateway import run as gateway_run
_write_skill(tmp_skills, "mlops/stable-diffusion", "Stable Diffusion Image Generation")
# Config disables by declared name (matches what `hermes skills config` writes).
monkeypatch.setattr(
"gateway.run._get_disabled_skill_names",
lambda: {"Stable Diffusion Image Generation"},
raising=False,
)
with patch(
"tools.skills_tool._get_disabled_skill_names",
return_value={"Stable Diffusion Image Generation"},
), patch(
"agent.skill_utils.get_all_skills_dirs",
return_value=[tmp_skills],
):
msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
assert msg is not None, (
"expected a 'disabled' hint for the frontmatter-derived slug; "
"the old code compared the dir name 'stable-diffusion' and returned None"
)
assert "disabled" in msg.lower()
assert "hermes skills config" in msg
def test_unknown_command_still_returns_none(
tmp_skills: Path,
) -> None:
"""A command that matches no on-disk skill still returns None."""
from gateway import run as gateway_run
_write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
with patch(
"tools.skills_tool._get_disabled_skill_names", return_value=set()
), patch(
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
):
assert gateway_run._check_unavailable_skill("no-such-skill") is None
def test_matched_but_not_disabled_returns_none(
tmp_skills: Path,
) -> None:
"""A skill that exists and isn't disabled shouldn't produce a hint."""
from gateway import run as gateway_run
_write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
with patch(
"tools.skills_tool._get_disabled_skill_names", return_value=set()
), patch(
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
):
assert gateway_run._check_unavailable_skill("ascii-art") is None
def test_slug_normalization_strips_non_alnum(
tmp_skills: Path,
) -> None:
"""Frontmatter ``C++ Code Review`` → slug ``c-code-review`` (``+`` stripped)."""
from gateway import run as gateway_run
_write_skill(tmp_skills, "software-development/cpp-review", "C++ Code Review")
with patch(
"tools.skills_tool._get_disabled_skill_names",
return_value={"C++ Code Review"},
), patch(
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
):
msg = gateway_run._check_unavailable_skill("c-code-review")
assert msg is not None
assert "disabled" in msg.lower()
def test_optional_skill_uses_frontmatter_slug(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Same drift bug applies to the optional-skills branch.
Before: directory name was matched against the typed command, so an
optional skill at ``optional-skills/mlops/stable-diffusion/SKILL.md``
with frontmatter ``Stable Diffusion Image Generation`` returned None
when the user typed the real slug.
"""
from gateway import run as gateway_run
# Build an isolated optional-skills dir
optional = tmp_path / "optional-skills"
skill_dir = optional / "mlops" / "stable-diffusion"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"---\nname: Stable Diffusion Image Generation\ndescription: test\n---\n",
encoding="utf-8",
)
# Point the optional lookup at our tmp dir. The source reads from
# ``get_optional_skills_dir(repo_root / "optional-skills")`` — we
# can't easily retarget ``repo_root``, so patch the resolver.
monkeypatch.setattr(
"hermes_constants.get_optional_skills_dir",
lambda _default: optional,
raising=False,
)
# Ensure the "disabled" branch doesn't match anything so we fall
# through to the optional-skills branch.
empty_skills = tmp_path / "empty-skills"
empty_skills.mkdir()
with patch(
"tools.skills_tool._get_disabled_skill_names", return_value=set()
), patch(
"agent.skill_utils.get_all_skills_dirs", return_value=[empty_skills]
):
msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
assert msg is not None, (
"optional-skills branch should recognize the frontmatter-derived slug; "
"the old dir-name-based check returned None here too"
)
assert "not installed" in msg.lower()
assert "official/mlops/stable-diffusion" in msg

View file

@ -284,6 +284,66 @@ class TestBridgeRuntimeFailure:
mock_fh.close.assert_called_once()
assert adapter._bridge_log_fh is None
@pytest.mark.asyncio
@pytest.mark.parametrize("returncode", [0, -2, -15])
async def test_shutdown_suppresses_fatal_on_planned_bridge_exit(self, returncode):
"""During graceful disconnect(), SIGTERM/SIGINT/clean-exit are NOT fatal.
Regression guard for the bug where every gateway shutdown/restart
logged "Fatal whatsapp adapter error (whatsapp_bridge_exited)" and
dispatched a fatal-error notification just before the normal
"✓ whatsapp disconnected" because _check_managed_bridge_exit()
saw the bridge's returncode of -15 (our own SIGTERM) and classified
it as an unexpected crash.
"""
adapter = _make_adapter()
fatal_handler = AsyncMock()
adapter.set_fatal_error_handler(fatal_handler)
adapter._running = True
adapter._http_session = MagicMock()
adapter._bridge_log_fh = MagicMock()
adapter._shutting_down = True # disconnect() sets this before SIGTERM
mock_proc = MagicMock()
mock_proc.poll.return_value = returncode
adapter._bridge_process = mock_proc
result = await adapter._check_managed_bridge_exit()
assert result is None, (
f"returncode={returncode} during shutdown should be suppressed, "
f"got fatal message: {result!r}"
)
assert adapter.fatal_error_code is None
fatal_handler.assert_not_awaited()
@pytest.mark.asyncio
async def test_shutdown_still_surfaces_nonzero_crash(self):
"""Even during shutdown, a truly crashed bridge (e.g. returncode 9) is fatal.
The suppression list is deliberately narrow (0, -2, -15) so that
OOM-kill (137), assertion failures, or custom error exits still
reach the fatal-error handler and user notification path.
"""
adapter = _make_adapter()
fatal_handler = AsyncMock()
adapter.set_fatal_error_handler(fatal_handler)
adapter._running = True
adapter._http_session = MagicMock()
adapter._bridge_log_fh = MagicMock()
adapter._shutting_down = True
mock_proc = MagicMock()
mock_proc.poll.return_value = 137 # SIGKILL / OOM-kill
adapter._bridge_process = mock_proc
result = await adapter._check_managed_bridge_exit()
assert result is not None
assert "exited unexpectedly" in result
assert adapter.fatal_error_code == "whatsapp_bridge_exited"
fatal_handler.assert_awaited_once()
@pytest.mark.asyncio
async def test_closed_when_http_not_ready(self):
"""Health endpoint never returns 200 within 15 attempts."""

View file

@ -203,6 +203,30 @@ class TestListAuthenticatedProvidersBedrock:
bedrock = next((p for p in providers if p["slug"] == "bedrock"), None)
assert bedrock is None, "bedrock should NOT appear when AWS credentials are absent"
def test_non_bedrock_picker_does_not_probe_full_aws_chain(self, monkeypatch):
"""Non-Bedrock provider discovery must not touch boto3's full credential chain."""
from hermes_cli.model_switch import list_authenticated_providers
monkeypatch.delenv("AWS_PROFILE", raising=False)
monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
monkeypatch.delenv("AWS_WEB_IDENTITY_TOKEN_FILE", raising=False)
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False)
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False)
calls = {"has_aws_credentials": 0}
def _has_aws_credentials():
calls["has_aws_credentials"] += 1
return False
with patch("agent.bedrock_adapter.has_aws_credentials", side_effect=_has_aws_credentials):
providers = list_authenticated_providers(current_provider="openrouter", max_models=0)
assert calls["has_aws_credentials"] == 0
assert all(p["slug"] != "bedrock" for p in providers)
def test_bedrock_falls_back_to_curated_when_discovery_fails(self, monkeypatch):
"""When discover_bedrock_models() raises, fall back to curated list without crashing."""
from hermes_cli.model_switch import list_authenticated_providers

View file

@ -822,6 +822,103 @@ class TestClampTelegramNames:
assert result[0] == ("foo", "d1")
class TestClampCommandNamesTriples:
"""Tests for _clamp_command_names with 3-tuples (name, desc, cmd_key).
Skill entries pass through _clamp_command_names as 3-tuples so the
original cmd_key survives name truncation. Before the fix in PR #18951,
the code stripped cmd_key into a side-dict keyed by the *original*
(name, desc) pair after truncation the lookup key no longer matched,
silently losing the cmd_key.
"""
def test_short_triple_preserved(self):
entries = [("skill", "A skill", "/skill")]
result = _clamp_command_names(entries, set())
assert result == [("skill", "A skill", "/skill")]
def test_long_name_preserves_cmd_key(self):
long = "a" * 50
cmd_key = f"/{long}"
result = _clamp_command_names([(long, "desc", cmd_key)], set())
assert len(result) == 1
name, desc, key = result[0]
assert len(name) == _CMD_NAME_LIMIT
assert key == cmd_key, "cmd_key must survive name clamping"
def test_collision_preserves_cmd_key(self):
prefix = "x" * _CMD_NAME_LIMIT
long = "x" * 50
result = _clamp_command_names(
[(long, "desc", "/long-skill")], reserved={prefix},
)
assert len(result) == 1
name, _desc, key = result[0]
assert name == "x" * (_CMD_NAME_LIMIT - 1) + "0"
assert key == "/long-skill"
def test_multiple_long_names_preserve_respective_keys(self):
base = "y" * 40
entries = [
(base + "_alpha", "d1", "/alpha-skill"),
(base + "_beta", "d2", "/beta-skill"),
]
result = _clamp_command_names(entries, set())
assert len(result) == 2
assert result[0][2] == "/alpha-skill"
assert result[1][2] == "/beta-skill"
def test_backward_compat_with_pairs(self):
"""Legacy 2-tuple callers (Telegram) must still work."""
entries = [("help", "Show help"), ("status", "Show status")]
result = _clamp_command_names(entries, set())
assert result == entries
class TestDiscordSkillCmdKeyDispatch:
"""Integration: discord_skill_commands preserves cmd_key for long names.
This tests the full pipeline: skill_commands _collect_gateway_skill_entries
_clamp_command_names returned triples, verifying that skills with names
exceeding Discord's 32-char limit still have their original cmd_key for
dispatch.
"""
def test_long_skill_name_retains_cmd_key(self, tmp_path, monkeypatch):
from unittest.mock import patch
long_name = "this-is-a-very-long-skill-name-that-exceeds-limit"
cmd_key = f"/{long_name}"
fake_skills_dir = tmp_path / "skills"
fake_skills_dir.mkdir(exist_ok=True)
# Use resolved path — macOS /var → /private/var symlink
# causes SKILLS_DIR.resolve() to differ from tmp_path.
resolved_dir = str(fake_skills_dir.resolve())
fake_cmds = {
cmd_key: {
"name": long_name,
"description": "A skill with a long name",
"skill_md_path": f"{resolved_dir}/{long_name}/SKILL.md",
"skill_dir": f"{resolved_dir}/{long_name}",
},
}
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
patch("tools.skills_tool.SKILLS_DIR", fake_skills_dir), \
patch("agent.skill_utils.get_external_skills_dirs", return_value=[]):
entries, hidden = discord_skill_commands(
max_slots=100, reserved_names=set(),
)
assert len(entries) == 1
name, desc, key = entries[0]
assert len(name) <= _CMD_NAME_LIMIT, "Name should be clamped to 32 chars"
assert key == cmd_key, (
f"cmd_key must be the original /{long_name}, got {key!r}"
)
class TestTelegramMenuCommands:
"""Integration: telegram_menu_commands enforces the 32-char limit."""
@ -899,6 +996,73 @@ class TestTelegramMenuCommands:
assert "my_enabled_skill" in menu_names
assert "my_disabled_skill" not in menu_names
def test_external_dir_skills_included_in_telegram_menu(self, tmp_path, monkeypatch):
"""External skills (``skills.external_dirs``) must appear in the Telegram menu.
Regression test for #8110 — external skills were visible to the
agent and CLI but silently excluded from gateway slash menus
because ``_collect_gateway_skill_entries`` only accepted skills
whose path started with ``SKILLS_DIR``.
Also verifies the trailing-slash boundary: a directory that
simply shares a prefix with a configured ``external_dirs`` entry
(``/tmp/my-skills-extra`` vs ``/tmp/my-skills``) must NOT be
admitted.
"""
from unittest.mock import patch
local_dir = tmp_path / "skills"
local_dir.mkdir()
external_dir = tmp_path / "my-skills"
external_dir.mkdir()
lookalike_dir = tmp_path / "my-skills-extra"
lookalike_dir.mkdir()
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / "config.yaml").write_text(
f"skills:\n external_dirs:\n - {external_dir}\n"
)
fake_cmds = {
"/local-one": {
"name": "local-one",
"description": "Local",
"skill_md_path": f"{local_dir}/local-one/SKILL.md",
"skill_dir": f"{local_dir}/local-one",
},
"/morning-briefing": {
"name": "morning-briefing",
"description": "External skill",
"skill_md_path": f"{external_dir}/morning-briefing/SKILL.md",
"skill_dir": f"{external_dir}/morning-briefing",
},
"/lookalike-skill": {
"name": "lookalike-skill",
"description": "Lives in a sibling dir that shares a prefix",
"skill_md_path": f"{lookalike_dir}/lookalike-skill/SKILL.md",
"skill_dir": f"{lookalike_dir}/lookalike-skill",
},
}
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", local_dir),
patch(
"agent.skill_utils.get_external_skills_dirs",
return_value=[external_dir],
),
):
menu, _ = telegram_menu_commands(max_commands=100)
menu_names = {n for n, _ in menu}
assert "local_one" in menu_names, "local skill must appear"
assert "morning_briefing" in menu_names, (
"external skill from skills.external_dirs must appear (fixes #8110)"
)
assert "lookalike_skill" not in menu_names, (
"prefix-match sibling directories must not be admitted"
)
def test_special_chars_in_skill_names_sanitized(self, tmp_path, monkeypatch):
"""Skills with +, /, or other special chars produce valid Telegram names."""
from unittest.mock import patch
@ -1353,6 +1517,119 @@ class TestDiscordSkillCommandsByCategory:
assert "vllm" in names
assert len(uncategorized) == 0
def test_no_legacy_25x25_cap(self, tmp_path, monkeypatch):
"""The old nested-layout caps (25 groups × 25 skills/group) are gone.
The live caller flattens categories into a single autocomplete list,
which Discord fetches dynamically the per-command 8KB payload
concern from the old nested layout (#11321, #10259) no longer applies.
Guards against accidentally re-introducing the caps, which would
silently drop skills in the 26th+ alphabetical category (the exact
failure mode users were hitting with 29 category dirs on real
installs).
"""
from unittest.mock import patch
fake_skills_dir = str(tmp_path / "skills")
# Build 30 categories (> old _MAX_GROUPS=25) each with 30 skills
# (> old _MAX_PER_GROUP=25).
fake_cmds = {}
for c in range(30):
cat = f"cat{c:02d}" # cat00, cat01, ..., cat29 — 30 categories
for s in range(30):
name = f"skill-{c:02d}-{s:02d}"
skill_subdir = tmp_path / "skills" / cat / name
skill_subdir.mkdir(parents=True, exist_ok=True)
(skill_subdir / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds[f"/{name}"] = {
"name": name,
"description": f"Category {cat} skill {s}",
"skill_md_path": f"{fake_skills_dir}/{cat}/{name}/SKILL.md",
}
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Every category should be present — no 25-group cap
assert len(categories) == 30, (
f"expected all 30 categories, got {len(categories)} "
f"(cap from old nested layout must be removed)"
)
# Every skill in every category must be present — no 25-per-group cap
for cat_name, entries in categories.items():
assert len(entries) == 30, (
f"category {cat_name}: expected 30 skills, got {len(entries)} "
f"(cap from old nested layout must be removed)"
)
# Nothing should be reported hidden for the cap reason (the only
# legitimate hidden reason now is name clamp collisions, which
# don't happen here since all names are unique).
assert hidden == 0
def test_external_dirs_skills_included(self, tmp_path, monkeypatch):
"""Skills in ``skills.external_dirs`` must appear in /skill autocomplete.
#18741 fixed this for the flat ``discord_skill_commands`` collector
but left ``discord_skill_commands_by_category`` (the live caller for
Discord's ``/skill`` command) still filtering by
``SKILLS_DIR`` prefix only. Regression guard that both collectors
now accept external-dir skills.
"""
from unittest.mock import patch
local_skills_dir = tmp_path / "local-skills"
external_dir = tmp_path / "external-skills"
(local_skills_dir / "creative" / "local-skill").mkdir(parents=True)
(local_skills_dir / "creative" / "local-skill" / "SKILL.md").write_text("")
(external_dir / "mlops" / "external-skill").mkdir(parents=True)
(external_dir / "mlops" / "external-skill" / "SKILL.md").write_text("")
fake_cmds = {
"/local-skill": {
"name": "local-skill",
"description": "Local",
"skill_md_path": str(local_skills_dir / "creative" / "local-skill" / "SKILL.md"),
},
"/external-skill": {
"name": "external-skill",
"description": "External",
"skill_md_path": str(external_dir / "mlops" / "external-skill" / "SKILL.md"),
},
}
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", local_skills_dir),
patch(
"agent.skill_utils.get_external_skills_dirs",
return_value=[external_dir],
),
):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Local skill → grouped under "creative"
assert "creative" in categories
assert any(n == "local-skill" for n, _d, _k in categories["creative"])
# External skill → grouped under its own top-level dir "mlops"
assert "mlops" in categories, (
"external-dir skills must be included — the old SKILLS_DIR-only "
"prefix check was broken for by_category (completes #18741)"
)
assert any(n == "external-skill" for n, _d, _k in categories["mlops"])
assert uncategorized == []
assert hidden == 0
# ---------------------------------------------------------------------------
# Plugin slash command integration

View file

@ -0,0 +1,246 @@
"""Tests for Discord /skill 32-char clamp collision warnings.
Discord's per-command name limit is 32 chars, so
``discord_skill_commands_by_category`` clamps skill slugs to that width
before deduping. When two skills share the same 32-char prefix, only
the first (alphabetical) wins; the second is dropped. Previously the
drop was silent the ``hidden`` count incremented but nothing named
which skills collided, so authors had no way to discover the drop
short of noticing that their skill was missing from the autocomplete.
This module pins the upgraded behavior: a WARNING log with both full
cmd_keys + the clamped name, so whoever named the skills sees the
collision and can rename one.
"""
from __future__ import annotations
import logging
from pathlib import Path
from unittest.mock import patch
def test_clamp_collision_emits_warning_naming_both_skills(
tmp_path: Path, caplog
) -> None:
"""Two skills with identical first 32 chars — warning names both."""
from hermes_cli.commands import discord_skill_commands_by_category
# Craft cmd_keys that share the first 32 chars.
# 40-char prefix 'skill-collision-prefix-identical-first-32'
# -> clamped to 'skill-collision-prefix-identical'
prefix = "skill-collision-prefix-identical" # exactly 32 chars
name_a = prefix + "-alpha" # /skill-collision-prefix-identical-alpha
name_b = prefix + "-bravo" # /skill-collision-prefix-identical-bravo
assert name_a[:32] == name_b[:32] == prefix
skills_dir = tmp_path / "skills"
for nm in (name_a, name_b):
d = skills_dir / "creative" / nm
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
f"/{name_a}": {
"name": name_a,
"description": "Alpha",
"skill_md_path": str(skills_dir / "creative" / name_a / "SKILL.md"),
},
f"/{name_b}": {
"name": name_b,
"description": "Bravo",
"skill_md_path": str(skills_dir / "creative" / name_b / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# One skill made it through, one was dropped (hidden counted).
assert hidden == 1
kept_names = [n for n, _d, _k in categories.get("creative", [])]
assert len(kept_names) == 1
# Alphabetical iteration means the -alpha variant wins the slot.
assert kept_names[0] == prefix # clamped
# Exactly one warning, naming BOTH full cmd_keys and the clamped name.
warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING and "clamp" in r.getMessage()
]
assert len(warnings) == 1, (
f"expected exactly one clamp-collision warning, got {len(warnings)}: "
f"{[r.getMessage() for r in warnings]}"
)
msg = warnings[0].getMessage()
assert f"/{name_a}" in msg, f"winner not named in warning: {msg!r}"
assert f"/{name_b}" in msg, f"loser not named in warning: {msg!r}"
assert prefix in msg, f"clamped name not in warning: {msg!r}"
def test_clamp_collision_with_reserved_name_emits_distinct_warning(
tmp_path: Path, caplog
) -> None:
"""A skill clashing with a reserved gateway command gets its own phrasing.
The reserved-vs-skill case is operationally different the fix is
still "rename the skill," but there's no second skill to also
rename. The warning should say so explicitly.
"""
from hermes_cli.commands import discord_skill_commands_by_category
# Reserved name 'help' is 4 chars — make a skill whose slug
# clamps to 'help' (so, exactly 'help').
reserved = "help"
skills_dir = tmp_path / "skills"
d = skills_dir / "creative" / reserved
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
f"/{reserved}": {
"name": reserved,
"description": "desc",
"skill_md_path": str(d / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names={"help"},
)
# Skill dropped in favor of the reserved command.
assert hidden == 1
assert categories == {}
assert uncategorized == []
warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING and "reserved" in r.getMessage()
]
assert len(warnings) == 1, (
f"expected one reserved-name collision warning, got "
f"{[r.getMessage() for r in warnings]}"
)
msg = warnings[0].getMessage()
assert f"/{reserved}" in msg
assert "reserved" in msg.lower()
def test_no_collision_no_warning(tmp_path: Path, caplog) -> None:
"""Sanity: two distinct-prefix skills produce zero warnings."""
from hermes_cli.commands import discord_skill_commands_by_category
skills_dir = tmp_path / "skills"
for nm in ("alpha", "bravo"):
d = skills_dir / "creative" / nm
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
"/alpha": {
"name": "alpha", "description": "",
"skill_md_path": str(skills_dir / "creative" / "alpha" / "SKILL.md"),
},
"/bravo": {
"name": "bravo", "description": "",
"skill_md_path": str(skills_dir / "creative" / "bravo" / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
assert hidden == 0
assert {n for n, _d, _k in categories["creative"]} == {"alpha", "bravo"}
clamp_warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING
and ("clamp" in r.getMessage() or "reserved" in r.getMessage())
]
assert clamp_warnings == []
def test_long_skill_name_preserves_cmd_key_through_by_category(
tmp_path: Path,
) -> None:
"""Skills with names > 32 chars must keep their original cmd_key.
``discord_skill_commands_by_category`` clamps the display name to 32
chars but the third tuple element (cmd_key) must stay as the original
``/full-skill-name`` so that ``_skill_handler`` dispatches via
``_run_simple_slash`` with the full command, not the truncated one.
This is the actual runtime path used by the Discord adapter via
``_refresh_skill_catalog_state``.
"""
from hermes_cli.commands import discord_skill_commands_by_category
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
resolved = str(skills_dir.resolve())
long_name = "generate-ascii-art-from-text-description-detailed"
cmd_key = f"/{long_name}"
fake_cmds = {
cmd_key: {
"name": long_name,
"description": "Generate ASCII art from a text description",
"skill_md_path": f"{resolved}/creative/{long_name}/SKILL.md",
"skill_dir": f"{resolved}/creative/{long_name}",
},
"/short-skill": {
"name": "short-skill",
"description": "A short skill",
"skill_md_path": f"{resolved}/creative/short-skill/SKILL.md",
"skill_dir": f"{resolved}/creative/short-skill",
},
}
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Flatten (same as _refresh_skill_catalog_state does)
entries = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Build lookup (same as _refresh_skill_catalog_state does)
skill_lookup = {n: (d, k) for n, d, k in entries}
# Find the long skill
long_entry = [e for e in entries if e[2] == cmd_key]
assert len(long_entry) == 1, f"Long skill should appear once, got: {long_entry}"
display_name, desc, key = long_entry[0]
assert len(display_name) <= 32, (
f"Display name should be clamped to 32 chars, got {len(display_name)}"
)
assert key == cmd_key, (
f"cmd_key must be the original /{long_name}, got {key!r}"
)
# Verify lookup works: clamped display name -> original cmd_key
assert display_name in skill_lookup
_desc, looked_up_key = skill_lookup[display_name]
assert looked_up_key == cmd_key, (
f"Lookup must map clamped name to original cmd_key, got {looked_up_key!r}"
)
# Short skill should also be present and correct
short_entry = [e for e in entries if e[2] == "/short-skill"]
assert len(short_entry) == 1
assert short_entry[0][0] == "short-skill"

View file

@ -51,6 +51,57 @@ class TestProviderEnvDetection:
assert not _has_provider_env_config(content)
class TestDoctorEnvFileEncoding:
"""Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
Chinese locale (GBK) because `.env` was read with Path.read_text() which
defaults to the system locale encoding, not UTF-8."""
def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
self, monkeypatch, tmp_path
):
import pathlib
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
# 0x94 byte is exactly the one the issue reporter hit: it's invalid
# as a GBK trailing byte in this position, so locale-default reads
# raise UnicodeDecodeError on Chinese Windows.
env_path = hermes_home / ".env"
env_path.write_text(
"OPENAI_API_KEY=sk-test # em-dash here — should not crash\n",
encoding="utf-8",
)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
orig_read_text = pathlib.Path.read_text
def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
# Simulate a GBK locale: refuse to decode this specific UTF-8
# .env unless the caller pins encoding="utf-8".
if self == env_path and encoding != "utf-8":
raise UnicodeDecodeError(
"gbk", b"\x94", 0, 1, "illegal multibyte sequence"
)
return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
# Short-circuit the expensive tool-availability probe — we only
# need doctor to reach the .env read without crashing.
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
# Run doctor. If the .env read still uses locale encoding, this
# raises UnicodeDecodeError and the test fails.
with pytest.raises(SystemExit):
doctor_mod.run_doctor(Namespace(fix=False))
class TestDoctorToolAvailabilityOverrides:
def test_marks_honcho_available_when_configured(self, monkeypatch):
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)

View file

@ -4,11 +4,16 @@ from hermes_cli.setup import setup_agent_settings
def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
"""The helper text should match the value shown in the prompt."""
"""The helper text should match the value shown in the prompt.
After PR#18413 max_turns is read exclusively from config.yaml — the
.env `HERMES_MAX_ITERATIONS` fallback was removed because it was
shadowing the user's current config (see the 60-vs-500 incident).
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config = {
"agent": {"max_turns": 90},
"agent": {"max_turns": 60},
"display": {"tool_progress": "all"},
"compression": {"threshold": 0.50},
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
@ -16,10 +21,10 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
prompt_answers = iter(["60", "all", "0.5"])
monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
monkeypatch.setattr("hermes_cli.setup.remove_env_value", lambda *args, **kwargs: None)
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
setup_agent_settings(config)
@ -27,3 +32,47 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
out = capsys.readouterr().out
assert "Press Enter to keep 60." in out
assert "Default is 90" not in out
def test_setup_agent_settings_prefers_config_over_stale_env(tmp_path, monkeypatch, capsys):
"""Config.yaml wins even when a stale .env value disagrees.
Regression guard for the bug where `.env HERMES_MAX_ITERATIONS=60`
from an old `hermes setup` run shadowed `agent.max_turns: 500` in
config.yaml. The wizard must now display the config value.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config = {
"agent": {"max_turns": 500}, # user bumped this in config.yaml
"display": {"tool_progress": "all"},
"compression": {"threshold": 0.50},
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
}
prompt_answers = iter(["500", "all", "0.5"])
# Simulate stale .env value — the wizard must ignore this.
monkeypatch.setattr(
"hermes_cli.setup.get_env_value",
lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "",
)
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
removed_keys: list[str] = []
monkeypatch.setattr(
"hermes_cli.setup.remove_env_value",
lambda key: (removed_keys.append(key), True)[1],
)
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
setup_agent_settings(config)
out = capsys.readouterr().out
# Config value wins
assert "Press Enter to keep 500." in out
assert "Press Enter to keep 60." not in out
# And the stale .env entry gets cleaned up
assert "HERMES_MAX_ITERATIONS" in removed_keys

View file

@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
_configure_provider,
_get_platform_tools,
_platform_toolset_summary,
_reconfigure_tool,
_save_platform_tools,
_toolset_has_keys,
CONFIGURABLE_TOOLSETS,
@ -468,6 +469,33 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
assert config["browser"]["cloud_provider"] == "local"
def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypatch):
config = {"platform_toolsets": {"cli": ["web"]}}
seen = {}
configured = []
monkeypatch.setattr(
"hermes_cli.tools_config._toolset_has_keys",
lambda ts_key, config=None: False,
)
def fake_prompt_choice(question, choices, default=0):
seen["choices"] = choices
return 0
monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
monkeypatch.setattr(
"hermes_cli.tools_config._configure_tool_category_for_reconfig",
lambda ts_key, cat, config: configured.append(ts_key),
)
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
_reconfigure_tool(config)
assert any("Web Search" in choice for choice in seen["choices"])
assert configured == ["web"]
def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)

View file

@ -0,0 +1,69 @@
"""Regression test for #17929: AIAgent.__init__ should try fallback_model
when primary provider credentials are exhausted."""
import pytest
from unittest.mock import patch, MagicMock
from run_agent import AIAgent
def _make_tool_defs():
return [{"type": "function", "function": {"name": "web_search",
"description": "search", "parameters": {"type": "object", "properties": {}}}}]
def _mock_client(api_key="fb-key-1234567890", base_url="https://fb.example.com/v1"):
c = MagicMock()
c.api_key = api_key
c.base_url = base_url
c._default_headers = None
return c
def test_init_tries_fallback_when_primary_returns_none():
"""When resolve_provider_client returns None for primary but succeeds for
a fallback entry, __init__ should NOT raise RuntimeError."""
fb = _mock_client()
def fake_resolve(provider, model=None, raw_codex=False,
explicit_base_url=None, explicit_api_key=None):
if provider == "tencent-token-plan":
return fb, "kimi2.5"
return None, None # primary exhausted
with patch("agent.auxiliary_client.resolve_provider_client", side_effect=fake_resolve), \
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
patch("run_agent.check_toolset_requirements", return_value={}), \
patch("run_agent.OpenAI", return_value=MagicMock()):
agent = AIAgent(
provider="alibaba-coding-plan",
model="qwen3.6-plus",
api_key=None,
base_url=None,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
fallback_model=[{"provider": "tencent-token-plan", "model": "kimi2.5"}],
)
assert agent.provider == "tencent-token-plan"
assert agent.model == "kimi2.5"
assert agent._fallback_activated is True
def test_init_raises_when_no_fallback_configured():
"""When primary returns None and no fallback is set, should raise."""
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(None, None)), \
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
patch("run_agent.check_toolset_requirements", return_value={}), \
patch("run_agent.OpenAI", return_value=MagicMock()):
with pytest.raises(RuntimeError, match="no API key was found"):
AIAgent(
provider="alibaba-coding-plan",
model="qwen3.6-plus",
api_key=None,
base_url=None,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
fallback_model=None,
)

View file

@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai):
agent._apply_client_headers_for_base_url("https://api.example.com/v1")
assert "default_headers" not in agent._client_kwargs
@patch("run_agent.OpenAI")
def test_openrouter_headers_include_response_cache_when_enabled(mock_openai):
"""When openrouter.response_cache is True, the cache header is injected."""
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
with patch("hermes_cli.config.load_config", return_value={
"openrouter": {"response_cache": True, "response_cache_ttl": 600},
}):
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
headers = agent._client_kwargs["default_headers"]
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert headers["X-OpenRouter-Cache"] == "true"
assert headers["X-OpenRouter-Cache-TTL"] == "600"
@patch("run_agent.OpenAI")
def test_openrouter_headers_no_cache_when_disabled(mock_openai):
"""When openrouter.response_cache is False, no cache headers are sent."""
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
with patch("hermes_cli.config.load_config", return_value={
"openrouter": {"response_cache": False},
}):
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
headers = agent._client_kwargs["default_headers"]
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert "X-OpenRouter-Cache" not in headers
assert "X-OpenRouter-Cache-TTL" not in headers

View file

@ -0,0 +1,116 @@
"""Tests for get_hermes_home() profile-mode fallback warning.
Regression test for https://github.com/NousResearch/hermes-agent/issues/18594.
When HERMES_HOME is unset but an active_profile file indicates a non-default
profile is active, get_hermes_home() should:
1. STILL return ~/.hermes (raising would brick 30+ module-level callers)
2. Emit a loud one-shot warning to stderr so operators can diagnose
cross-profile data contamination after the fact.
The warning goes to stderr directly (not through logging) because this
function is called at module-import time from 30+ sites, often before the
logging subsystem has been configured.
"""
from pathlib import Path
import pytest
@pytest.fixture
def fresh_constants(monkeypatch, tmp_path):
"""Import hermes_constants fresh and reset the one-shot warn flag."""
import importlib
import hermes_constants
importlib.reload(hermes_constants)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.delenv("HERMES_HOME", raising=False)
return hermes_constants
class TestGetHermesHomeProfileWarning:
def test_classic_mode_no_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""Classic mode: no active_profile file → silent, returns ~/.hermes."""
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_default_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""active_profile=default → still no warning, returns ~/.hermes."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("default\n")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_named_profile_unset_home_warns_once(
self, fresh_constants, tmp_path, capsys
):
"""active_profile=coder + HERMES_HOME unset → warn loudly, still return fallback."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("coder\n")
result = fresh_constants.get_hermes_home()
# 1. Still returns the fallback — no import-time crash
assert result == tmp_path / ".hermes"
# 2. Stderr got the warning exactly once
err = capsys.readouterr().err
assert err.count("HERMES_HOME fallback") == 1
assert "'coder'" in err
assert "#18594" in err
# 3. One-shot: second and third calls don't re-warn
fresh_constants.get_hermes_home()
fresh_constants.get_hermes_home()
err2 = capsys.readouterr().err
assert "HERMES_HOME fallback" not in err2
def test_hermes_home_set_suppresses_warning(
self, fresh_constants, tmp_path, capsys, monkeypatch
):
"""Even if active_profile is 'coder', setting HERMES_HOME suppresses warning."""
profile_dir = tmp_path / ".hermes" / "profiles" / "coder"
profile_dir.mkdir(parents=True)
(tmp_path / ".hermes" / "active_profile").write_text("coder\n")
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
result = fresh_constants.get_hermes_home()
assert result == profile_dir
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_unreadable_active_profile_no_crash(
self, fresh_constants, tmp_path, capsys
):
"""active_profile that can't be decoded → fall through silently."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
# Write bytes that aren't valid utf-8
(hermes_dir / "active_profile").write_bytes(b"\xff\xfe\x00\x00")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
# Shouldn't crash; shouldn't warn either (can't tell what profile was intended)
assert "HERMES_HOME fallback" not in capsys.readouterr().err
def test_empty_active_profile_no_warning(
self, fresh_constants, tmp_path, capsys
):
"""Empty active_profile file → treated as default, no warning."""
hermes_dir = tmp_path / ".hermes"
hermes_dir.mkdir()
(hermes_dir / "active_profile").write_text("")
result = fresh_constants.get_hermes_home()
assert result == tmp_path / ".hermes"
assert "HERMES_HOME fallback" not in capsys.readouterr().err

View file

@ -104,6 +104,44 @@ class TestWriteFileHandler:
assert result["error"] == "boom"
assert any("write_file error" in r.getMessage() for r in caplog.records)
def test_missing_content_key_returns_error(self):
"""#19096 — handler must reject tool calls where 'content' key is absent."""
from tools.file_tools import _handle_write_file
result = json.loads(_handle_write_file({"path": "/tmp/oops.md"}))
assert "error" in result
assert "content" in result["error"]
assert "path" not in result.get("error", "").lower() or "missing" not in result.get("error", "").lower() or True # just check error present
def test_missing_path_key_returns_error(self):
"""#19096 — handler must reject tool calls where 'path' key is absent."""
from tools.file_tools import _handle_write_file
result = json.loads(_handle_write_file({"content": "hello"}))
assert "error" in result
def test_explicit_empty_content_is_allowed(self):
"""#19096 — explicit empty string content (file truncation) must still work."""
from tools.file_tools import _handle_write_file
with patch("tools.file_tools._get_file_ops") as mock_get:
mock_ops = MagicMock()
result_obj = MagicMock()
result_obj.to_dict.return_value = {"status": "ok", "path": "/tmp/empty.txt", "bytes": 0}
mock_ops.write_file.return_value = result_obj
mock_get.return_value = mock_ops
result = json.loads(_handle_write_file({"path": "/tmp/empty.txt", "content": ""}))
assert result["status"] == "ok"
def test_non_string_content_returns_error(self):
"""#19096 — content must be a string, not a dict or list."""
from tools.file_tools import _handle_write_file
result = json.loads(_handle_write_file({"path": "/tmp/x.txt", "content": {"nested": "dict"}}))
assert "error" in result
assert "string" in result["error"].lower() or "content" in result["error"].lower()
class TestPatchHandler:
@patch("tools.file_tools._get_file_ops")

View file

@ -371,6 +371,57 @@ class TestDeleteSkill:
_delete_skill("my-skill")
assert not (tmp_path / "devops").exists()
def test_delete_with_absorbed_into_valid_target(self, tmp_path):
with _skill_dir(tmp_path):
_create_skill("umbrella", VALID_SKILL_CONTENT)
_create_skill("narrow", VALID_SKILL_CONTENT)
result = _delete_skill("narrow", absorbed_into="umbrella")
assert result["success"] is True
assert "absorbed into 'umbrella'" in result["message"]
assert not (tmp_path / "narrow").exists()
assert (tmp_path / "umbrella").exists()
def test_delete_with_absorbed_into_empty_string_means_pruned(self, tmp_path):
with _skill_dir(tmp_path):
_create_skill("stale-skill", VALID_SKILL_CONTENT)
result = _delete_skill("stale-skill", absorbed_into="")
assert result["success"] is True
# Empty absorbed_into is explicit prune — no "absorbed into" suffix in message
assert "absorbed into" not in result["message"]
def test_delete_with_absorbed_into_nonexistent_target_rejected(self, tmp_path):
with _skill_dir(tmp_path):
_create_skill("narrow", VALID_SKILL_CONTENT)
result = _delete_skill("narrow", absorbed_into="ghost-umbrella")
assert result["success"] is False
assert "does not exist" in result["error"]
# Skill must NOT have been deleted on validation failure
assert (tmp_path / "narrow").exists()
def test_delete_with_absorbed_into_equals_self_rejected(self, tmp_path):
with _skill_dir(tmp_path):
_create_skill("narrow", VALID_SKILL_CONTENT)
result = _delete_skill("narrow", absorbed_into="narrow")
assert result["success"] is False
assert "cannot equal" in result["error"]
assert (tmp_path / "narrow").exists()
def test_delete_with_absorbed_into_whitespace_only_treated_as_prune(self, tmp_path):
# Leading/trailing whitespace only: .strip() → "" → pruned path
with _skill_dir(tmp_path):
_create_skill("narrow", VALID_SKILL_CONTENT)
result = _delete_skill("narrow", absorbed_into=" ")
assert result["success"] is True
assert "absorbed into" not in result["message"]
def test_delete_without_absorbed_into_backward_compat(self, tmp_path):
# Legacy callers that don't pass the arg still work — the curator
# reconciler falls back to its heuristic+YAML logic for such deletes.
with _skill_dir(tmp_path):
_create_skill("my-skill", VALID_SKILL_CONTENT)
result = _delete_skill("my-skill")
assert result["success"] is True
# ---------------------------------------------------------------------------
# write_file / remove_file
@ -485,6 +536,25 @@ class TestSkillManageDispatcher:
result = json.loads(raw)
assert result["success"] is True
def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path):
# Dispatcher must plumb absorbed_into through to _delete_skill so the
# validation + message suffix paths are exercised end-to-end.
with _skill_dir(tmp_path):
skill_manage(action="create", name="umbrella", content=VALID_SKILL_CONTENT)
skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
raw = skill_manage(action="delete", name="narrow", absorbed_into="umbrella")
result = json.loads(raw)
assert result["success"] is True
assert "absorbed into 'umbrella'" in result["message"]
def test_delete_via_dispatcher_rejects_missing_absorbed_target(self, tmp_path):
with _skill_dir(tmp_path):
skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
raw = skill_manage(action="delete", name="narrow", absorbed_into="ghost")
result = json.loads(raw)
assert result["success"] is False
assert "does not exist" in result["error"]
class TestSecurityScanGate:
"""_security_scan_skill is gated by skills.guard_agent_created config flag."""

View file

@ -0,0 +1,196 @@
"""Tests for /goal handling in tui_gateway.
The TUI routes ``/goal`` through ``command.dispatch`` (not ``slash.exec``)
because the CLI's ``_handle_goal_command`` queues the kickoff message onto
``_pending_input``, which the slash-worker subprocess has no reader for.
Instead we handle ``/goal`` directly in the server and return a
``{"type": "send", "notice": ..., "message": ...}`` payload the TUI client
uses to render a system line and fire the kickoff prompt.
"""
from __future__ import annotations
import importlib
import threading
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture()
def hermes_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal-module DB cache so it re-resolves HERMES_HOME.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
@pytest.fixture()
def server(hermes_home):
with patch.dict(
"sys.modules",
{
"hermes_cli.env_loader": MagicMock(),
"hermes_cli.banner": MagicMock(),
},
):
mod = importlib.import_module("tui_gateway.server")
yield mod
mod._sessions.clear()
mod._pending.clear()
mod._answers.clear()
mod._methods.clear()
importlib.reload(mod)
@pytest.fixture()
def session(server):
sid = "sid-test"
session_key = "tui-goal-session-1"
s = {
"session_key": session_key,
"history": [],
"history_lock": threading.Lock(),
"history_version": 0,
"running": False,
"attached_images": [],
"cols": 120,
}
server._sessions[sid] = s
return sid, session_key, s
def _call(server, method, **params):
handler = server._methods[method]
return handler(1, params)
# ── command.dispatch /goal ────────────────────────────────────────────
def test_goal_bare_shows_status_when_none_set(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg="", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_whitespace_only_shows_status(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg=" ", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_status_alias_shows_status(server, session):
sid, _, _ = session
r = _call(server, "command.dispatch", name="goal", arg="status", session_id=sid)
assert r["result"]["type"] == "exec"
assert "No active goal" in r["result"]["output"]
def test_goal_set_returns_send_with_notice(server, session):
sid, session_key, _ = session
r = _call(server, "command.dispatch", name="goal", arg="build a rocket", session_id=sid)
result = r["result"]
assert result["type"] == "send"
assert result["message"] == "build a rocket"
assert "notice" in result
assert "Goal set" in result["notice"]
assert "20-turn budget" in result["notice"]
# Persisted in SessionDB
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_key)
assert mgr.state is not None
assert mgr.state.goal == "build a rocket"
assert mgr.state.status == "active"
def test_goal_pause_after_set(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
assert r["result"]["type"] == "exec"
assert "paused" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
assert GoalManager(session_key).state.status == "paused"
def test_goal_resume_reactivates(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
_call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="resume", session_id=sid)
assert r["result"]["type"] == "exec"
assert "resumed" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
assert GoalManager(session_key).state.status == "active"
def test_goal_clear_removes_active_goal(server, session):
sid, session_key, _ = session
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="clear", session_id=sid)
assert r["result"]["type"] == "exec"
assert "cleared" in r["result"]["output"].lower()
from hermes_cli.goals import GoalManager
# After clear the row is marked status=cleared (kept for audit);
# ``has_goal()`` / ``is_active()`` return False so the goal loop
# stays off and ``status`` reports "No active goal".
mgr = GoalManager(session_key)
assert not mgr.has_goal()
assert not mgr.is_active()
assert "No active goal" in mgr.status_line()
def test_goal_stop_and_done_are_clear_aliases(server, session):
sid, _, _ = session
_call(server, "command.dispatch", name="goal", arg="first goal", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="stop", session_id=sid)
assert "cleared" in r["result"]["output"].lower()
_call(server, "command.dispatch", name="goal", arg="second goal", session_id=sid)
r = _call(server, "command.dispatch", name="goal", arg="done", session_id=sid)
assert "cleared" in r["result"]["output"].lower()
def test_goal_requires_session(server):
r = _call(server, "command.dispatch", name="goal", arg="nope", session_id="unknown")
assert "error" in r
assert r["error"]["code"] == 4001
# ── slash.exec /goal routing ──────────────────────────────────────────
def test_slash_exec_rejects_goal_routes_to_command_dispatch(server, session):
"""slash.exec must reject /goal with 4018 so the TUI client falls through
to command.dispatch. Without this, the HermesCLI slash-worker subprocess
would set the goal but silently drop the kickoff the queue is in-proc."""
sid, _, _ = session
r = _call(server, "slash.exec", command="goal status", session_id=sid)
assert "error" in r
assert r["error"]["code"] == 4018
assert "command.dispatch" in r["error"]["message"]
def test_pending_input_commands_includes_goal(server):
"""Guard: _PENDING_INPUT_COMMANDS must list 'goal' — removing it would
silently re-break the TUI."""
assert "goal" in server._PENDING_INPUT_COMMANDS

View file

@ -94,10 +94,20 @@ _HERMES_ENV_PATH = (
)
_PROJECT_ENV_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)'
_PROJECT_CONFIG_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)'
_SHELL_RC_FILES = (
r'(?:~|\$home|\$\{home\})/\.'
r'(?:bashrc|zshrc|profile|bash_profile|zprofile)\b'
)
_CREDENTIAL_FILES = (
r'(?:~|\$home|\$\{home\})/\.'
r'(?:netrc|pgpass|npmrc|pypirc)\b'
)
_SENSITIVE_WRITE_TARGET = (
r'(?:/etc/|/dev/sd|'
rf'{_SSH_SENSITIVE_PATH}|'
rf'{_HERMES_ENV_PATH})'
rf'{_HERMES_ENV_PATH}|'
rf'{_SHELL_RC_FILES}|'
rf'{_CREDENTIAL_FILES})'
)
_PROJECT_SENSITIVE_WRITE_TARGET = rf'(?:{_PROJECT_ENV_PATH}|{_PROJECT_CONFIG_PATH})'
_COMMAND_TAIL = r'(?:\s*(?:&&|\|\||;).*)?$'

View file

@ -1097,7 +1097,25 @@ def _handle_read_file(args, **kw):
def _handle_write_file(args, **kw):
tid = kw.get("task_id") or "default"
return write_file_tool(path=args.get("path", ""), content=args.get("content", ""), task_id=tid)
if not args.get("path") or not isinstance(args.get("path"), str):
return tool_error(
"write_file: missing required field 'path'. Re-emit the tool call with "
"both 'path' and 'content' set."
)
if "content" not in args:
return tool_error(
"write_file: missing required field 'content'. The tool call included a "
"path but no content argument — this is almost always a dropped-arg bug "
"under context pressure. Re-emit the tool call with the full content "
"payload, or use execute_code with hermes_tools.write_file() for very "
"large files."
)
if not isinstance(args["content"], str):
return tool_error(
f"write_file: 'content' must be a string, got "
f"{type(args['content']).__name__}."
)
return write_file_tool(path=args["path"], content=args["content"], task_id=tid)
def _handle_patch(args, **kw):

View file

@ -560,8 +560,18 @@ def _patch_skill(
}
def _delete_skill(name: str) -> Dict[str, Any]:
"""Delete a skill."""
def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, Any]:
"""Delete a skill.
``absorbed_into`` declares intent:
- ``None`` / missing caller didn't declare (legacy / non-curator path);
accepted for backward compat but logs a warning because the curator
classification pipeline can't tell consolidation from pruning without it.
- ``""`` (empty) explicit "truly pruned, no forwarding target".
- ``"<skill-name>"`` content was absorbed into that umbrella; the
target must exist on disk. Validated here so the model can't claim an
umbrella that doesn't exist.
"""
existing = _find_skill(name)
if not existing:
return {"success": False, "error": f"Skill '{name}' not found."}
@ -570,6 +580,24 @@ def _delete_skill(name: str) -> Dict[str, Any]:
if pinned_err:
return {"success": False, "error": pinned_err}
# Validate absorbed_into target when declared non-empty
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
target_name = absorbed_into.strip()
if target_name == name:
return {
"success": False,
"error": f"absorbed_into='{target_name}' cannot equal the skill being deleted.",
}
target = _find_skill(target_name)
if not target:
return {
"success": False,
"error": (
f"absorbed_into='{target_name}' does not exist. "
f"Create or patch the umbrella skill first, then retry the delete."
),
}
skill_dir = existing["path"]
skills_root = _containing_skills_root(skill_dir)
shutil.rmtree(skill_dir)
@ -579,9 +607,13 @@ def _delete_skill(name: str) -> Dict[str, Any]:
if parent != skills_root and parent.exists() and not any(parent.iterdir()):
parent.rmdir()
message = f"Skill '{name}' deleted."
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
message += f" Content absorbed into '{absorbed_into.strip()}'."
return {
"success": True,
"message": f"Skill '{name}' deleted.",
"message": message,
}
@ -702,6 +734,7 @@ def skill_manage(
old_string: str = None,
new_string: str = None,
replace_all: bool = False,
absorbed_into: str = None,
) -> str:
"""
Manage user-created skills. Dispatches to the appropriate action handler.
@ -726,7 +759,7 @@ def skill_manage(
result = _patch_skill(name, old_string, new_string, file_path, replace_all)
elif action == "delete":
result = _delete_skill(name)
result = _delete_skill(name, absorbed_into=absorbed_into)
elif action == "write_file":
if not file_path:
@ -778,6 +811,13 @@ SKILL_MANAGE_SCHEMA = {
"patch (old_string/new_string — preferred for fixes), "
"edit (full SKILL.md rewrite — major overhauls only), "
"delete, write_file, remove_file.\n\n"
"On delete, pass `absorbed_into=<umbrella>` when you're merging this "
"skill's content into another one, or `absorbed_into=\"\"` when you're "
"pruning it with no forwarding target. This lets the curator tell "
"consolidation from pruning without guessing, so downstream consumers "
"(cron jobs that reference the old skill name, etc.) get updated "
"correctly. The target you name in `absorbed_into` must already "
"exist — create/patch the umbrella first, then delete.\n\n"
"Create when: complex task succeeded (5+ calls), errors overcome, "
"user-corrected approach worked, non-trivial workflow discovered, "
"or user asks you to remember a procedure.\n"
@ -855,6 +895,20 @@ SKILL_MANAGE_SCHEMA = {
"type": "string",
"description": "Content for the file. Required for 'write_file'."
},
"absorbed_into": {
"type": "string",
"description": (
"For 'delete' only — declares intent so the curator can "
"tell consolidation from pruning without guessing. "
"Pass the umbrella skill name when this skill's content "
"was merged into another (the target must already exist). "
"Pass an empty string when the skill is truly stale and "
"being pruned with no forwarding target. Omitting the arg "
"on delete is supported for backward compatibility but "
"downstream tooling (e.g. cron-job skill reference "
"rewriting) will have to guess at intent."
)
},
},
"required": ["action", "name"],
},
@ -877,6 +931,7 @@ registry.register(
file_content=args.get("file_content"),
old_string=args.get("old_string"),
new_string=args.get("new_string"),
replace_all=args.get("replace_all", False)),
replace_all=args.get("replace_all", False),
absorbed_into=args.get("absorbed_into")),
emoji="📝",
)

View file

@ -3128,6 +3128,7 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
def run():
approval_token = None
session_tokens = []
goal_followup = None # set by the post-turn goal hook below
try:
from tools.approval import (
reset_current_session_key,
@ -3294,6 +3295,55 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
payload["rendered"] = rendered
_emit("message.complete", sid, payload)
# ── /goal continuation (Ralph-style loop) ─────────────────
# After every TUI turn, if a /goal is active, ask the judge
# whether the goal is done and — if not and we're still under
# budget — queue a continuation prompt to run after this
# thread releases session["running"]. The verdict message
# ("✓ Goal achieved" / "⏸ budget exhausted") is surfaced as
# a system line so the user sees progress regardless of
# outcome. Mirrors gateway/run._post_turn_goal_continuation.
if (
status == "complete"
and isinstance(raw, str)
and raw.strip()
):
try:
from hermes_cli.goals import GoalManager
sid_key = session.get("session_key") or ""
if sid_key:
try:
goals_cfg = (_load_cfg().get("goals") or {})
goal_max_turns = int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
goal_max_turns = 20
goal_mgr = GoalManager(
session_id=sid_key,
default_max_turns=goal_max_turns,
)
if goal_mgr.is_active():
decision = goal_mgr.evaluate_after_turn(
raw, user_initiated=True,
)
verdict_msg = decision.get("message") or ""
if verdict_msg:
_emit(
"status.update",
sid,
{"kind": "goal", "text": verdict_msg},
)
if decision.get("should_continue"):
cont_prompt = decision.get("continuation_prompt") or ""
if cont_prompt:
goal_followup = cont_prompt
except Exception as _goal_exc:
print(
f"[tui_gateway] goal continuation hook failed: "
f"{type(_goal_exc).__name__}: {_goal_exc}",
file=sys.stderr,
)
# Apply pending_title now that the DB row exists.
_pending = session.get("pending_title")
if _pending and status == "complete":
@ -3375,6 +3425,31 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
session["running"] = False
_emit("session.info", sid, _session_info(agent, session))
# Chain a goal-continuation turn if the judge said so. We do
# this AFTER the finally releases session["running"], so the
# nested _run_prompt_submit doesn't deadlock on the busy
# guard. A real user prompt that races us wins because
# prompt.submit sets running=True under the history_lock and
# we check that guard before re-firing.
if goal_followup:
with session["history_lock"]:
if session.get("running"):
# User already sent something — their turn wins,
# the judge will re-run on the next turn anyway.
return
session["running"] = True
try:
_emit("message.start", sid)
_run_prompt_submit(rid, sid, session, goal_followup)
except Exception as _cont_exc:
print(
f"[tui_gateway] goal continuation dispatch failed: "
f"{type(_cont_exc).__name__}: {_cont_exc}",
file=sys.stderr,
)
with session["history_lock"]:
session["running"] = False
threading.Thread(target=run, daemon=True).start()
@ -4366,6 +4441,7 @@ _PENDING_INPUT_COMMANDS: frozenset[str] = frozenset(
"q",
"steer",
"plan",
"goal",
}
)
@ -4678,6 +4754,77 @@ def _(rid, params: dict) -> dict:
# Fallback: no active run, treat as next-turn message
return _ok(rid, {"type": "send", "message": arg})
if name == "goal":
if not session:
return _err(rid, 4001, "no active session")
try:
from hermes_cli.goals import GoalManager
except Exception as exc:
return _err(rid, 5030, f"goals unavailable: {exc}")
sid_key = session.get("session_key") or ""
if not sid_key:
return _err(rid, 4001, "no session key")
try:
goals_cfg = (_load_cfg().get("goals") or {})
max_turns = int(goals_cfg.get("max_turns", 20) or 20)
except Exception:
max_turns = 20
mgr = GoalManager(session_id=sid_key, default_max_turns=max_turns)
lower = arg.strip().lower()
if not arg.strip() or lower == "status":
return _ok(rid, {"type": "exec", "output": mgr.status_line()})
if lower == "pause":
state = mgr.pause(reason="user-paused")
out = "No goal set." if state is None else f"⏸ Goal paused: {state.goal}"
return _ok(rid, {"type": "exec", "output": out})
if lower == "resume":
state = mgr.resume()
if state is None:
return _ok(rid, {"type": "exec", "output": "No goal to resume."})
return _ok(
rid,
{
"type": "exec",
"output": (
f"▶ Goal resumed: {state.goal}\n"
"Send any message to continue, or wait — I'll take the next step on the next turn."
),
},
)
if lower in ("clear", "stop", "done"):
had = mgr.has_goal()
mgr.clear()
return _ok(
rid,
{
"type": "exec",
"output": "✓ Goal cleared." if had else "No active goal.",
},
)
# Otherwise — treat the remaining text as the new goal.
try:
state = mgr.set(arg)
except ValueError as exc:
return _err(rid, 4004, f"invalid goal: {exc}")
notice = (
f"⊙ Goal set ({state.max_turns}-turn budget): {state.goal}\n"
"I'll keep working until the goal is done, you pause/clear it, or the budget is exhausted.\n"
"Controls: /goal status · /goal pause · /goal resume · /goal clear"
)
# Send the goal text as the kickoff prompt. The TUI client sees
# {type: send, notice, message} → renders `notice` as a sys line,
# then submits `message` as a user turn. The post-turn judge
# wired in _run_prompt_submit takes over from there.
return _ok(
rid,
{"type": "send", "notice": notice, "message": state.goal},
)
return _err(rid, 4018, f"not a quick/plugin/skill command: {name}")

Some files were not shown because too many files have changed in this diff Show more