mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-14 09:11:54 +00:00
Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui
This commit is contained in:
commit
9ca5ea1375
109 changed files with 11761 additions and 479 deletions
|
|
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
|
|
@ -47,6 +48,7 @@ from acp.schema import (
|
|||
TextContentBlock,
|
||||
UnstructuredCommandInput,
|
||||
Usage,
|
||||
UsageUpdate,
|
||||
UserMessageChunk,
|
||||
)
|
||||
|
||||
|
|
@ -65,6 +67,7 @@ from acp_adapter.events import (
|
|||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
from acp_adapter.tools import build_tool_complete, build_tool_start
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -315,6 +318,66 @@ class HermesACPAgent(acp.Agent):
|
|||
|
||||
return target_provider, new_model
|
||||
|
||||
@staticmethod
|
||||
def _build_usage_update(state: SessionState) -> UsageUpdate | None:
|
||||
"""Build ACP native context-usage data for clients like Zed.
|
||||
|
||||
Zed's circular context indicator is driven by ACP ``usage_update``
|
||||
session updates: ``size`` is the model context window and ``used`` is
|
||||
the current request pressure. Hermes estimates ``used`` from the same
|
||||
buckets it sends to providers: system prompt, conversation history, and
|
||||
tool schemas.
|
||||
"""
|
||||
agent = state.agent
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
size = int(getattr(compressor, "context_length", 0) or 0)
|
||||
if size <= 0:
|
||||
return None
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
used = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=getattr(agent, "_cached_system_prompt", "") or "",
|
||||
tools=getattr(agent, "tools", None) or None,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP native context usage", exc_info=True)
|
||||
used = int(getattr(compressor, "last_prompt_tokens", 0) or 0)
|
||||
|
||||
return UsageUpdate(
|
||||
session_update="usage_update",
|
||||
size=max(size, 0),
|
||||
used=max(used, 0),
|
||||
)
|
||||
|
||||
async def _send_usage_update(self, state: SessionState) -> None:
|
||||
"""Send ACP native context usage to the connected client."""
|
||||
if not self._conn:
|
||||
return
|
||||
update = self._build_usage_update(state)
|
||||
if update is None:
|
||||
return
|
||||
try:
|
||||
await self._conn.session_update(
|
||||
session_id=state.session_id,
|
||||
update=update,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to send ACP usage update for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def _schedule_usage_update(self, state: SessionState) -> None:
|
||||
"""Schedule native context indicator refresh after ACP responses."""
|
||||
if not self._conn:
|
||||
return
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.call_soon(asyncio.create_task, self._send_usage_update(state))
|
||||
|
||||
async def _register_session_mcp_servers(
|
||||
self,
|
||||
state: SessionState,
|
||||
|
|
@ -485,37 +548,99 @@ class HermesACPAgent(acp.Agent):
|
|||
)
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
"""Extract function name/arguments from an OpenAI-style tool_call."""
|
||||
function = tool_call.get("function") if isinstance(tool_call.get("function"), dict) else {}
|
||||
name = str(function.get("name") or tool_call.get("name") or "unknown_tool")
|
||||
raw_args = function.get("arguments") or tool_call.get("arguments") or tool_call.get("args") or {}
|
||||
if isinstance(raw_args, str):
|
||||
try:
|
||||
parsed = json.loads(raw_args)
|
||||
except Exception:
|
||||
parsed = {"raw": raw_args}
|
||||
raw_args = parsed
|
||||
if not isinstance(raw_args, dict):
|
||||
raw_args = {}
|
||||
return name, raw_args
|
||||
|
||||
@staticmethod
|
||||
def _history_tool_call_id(tool_call: dict[str, Any]) -> str:
|
||||
"""Return the stable provider tool call id for ACP history replay."""
|
||||
return str(
|
||||
tool_call.get("id")
|
||||
or tool_call.get("call_id")
|
||||
or tool_call.get("tool_call_id")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
async def _replay_session_history(self, state: SessionState) -> None:
|
||||
"""Send persisted user/assistant history to clients during session/load.
|
||||
|
||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||
from the Agents sidebar. The agent must then replay the full conversation
|
||||
as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
|
||||
restoring server-side state makes Hermes remember context, but leaves the
|
||||
editor looking like a clean thread.
|
||||
as user/assistant chunks plus reconstructed tool-call start/completion
|
||||
notifications; merely restoring server-side state makes Hermes remember
|
||||
context, but leaves the editor looking like a clean thread.
|
||||
"""
|
||||
if not self._conn or not state.history:
|
||||
return
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
text = self._history_message_text(message)
|
||||
if not text:
|
||||
continue
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is None:
|
||||
continue
|
||||
active_tool_calls: dict[str, tuple[str, dict[str, Any]]] = {}
|
||||
|
||||
async def _send(update: Any) -> bool:
|
||||
try:
|
||||
await self._conn.session_update(session_id=state.session_id, update=update)
|
||||
return True
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to replay ACP history for session %s",
|
||||
state.session_id,
|
||||
exc_info=True,
|
||||
)
|
||||
return
|
||||
return False
|
||||
|
||||
for message in state.history:
|
||||
role = str(message.get("role") or "")
|
||||
|
||||
if role in {"user", "assistant"}:
|
||||
text = self._history_message_text(message)
|
||||
if text:
|
||||
update = self._history_message_update(role=role, text=text)
|
||||
if update is not None and not await _send(update):
|
||||
return
|
||||
|
||||
if role == "assistant" and isinstance(message.get("tool_calls"), list):
|
||||
for tool_call in message["tool_calls"]:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
tool_call_id = self._history_tool_call_id(tool_call)
|
||||
if not tool_call_id:
|
||||
continue
|
||||
tool_name, args = self._history_tool_call_name_args(tool_call)
|
||||
active_tool_calls[tool_call_id] = (tool_name, args)
|
||||
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
|
||||
return
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
tool_call_id = str(message.get("tool_call_id") or "").strip()
|
||||
tool_name = str(message.get("tool_name") or "").strip()
|
||||
function_args: dict[str, Any] | None = None
|
||||
if tool_call_id in active_tool_calls:
|
||||
tool_name, function_args = active_tool_calls.pop(tool_call_id)
|
||||
if not tool_call_id or not tool_name:
|
||||
continue
|
||||
result = message.get("content")
|
||||
if not await _send(
|
||||
build_tool_complete(
|
||||
tool_call_id,
|
||||
tool_name,
|
||||
result=result if isinstance(result, str) else None,
|
||||
function_args=function_args,
|
||||
)
|
||||
):
|
||||
return
|
||||
|
||||
async def new_session(
|
||||
self,
|
||||
|
|
@ -527,11 +652,24 @@ class HermesACPAgent(acp.Agent):
|
|||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("New session %s (cwd=%s)", state.session_id, cwd)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return NewSessionResponse(
|
||||
session_id=state.session_id,
|
||||
models=self._build_model_state(state),
|
||||
)
|
||||
|
||||
def _schedule_history_replay(self, state: SessionState) -> None:
|
||||
"""Replay persisted history after session/load or session/resume returns.
|
||||
|
||||
Zed only attaches streamed transcript/tool updates once the load/resume
|
||||
response has completed. Sending replay notifications while the request is
|
||||
still in-flight can make the server look correct in logs while the editor
|
||||
drops or fails to attach the tool-call history.
|
||||
"""
|
||||
loop = asyncio.get_running_loop()
|
||||
replay_coro = self._replay_session_history(state)
|
||||
loop.call_soon(asyncio.create_task, replay_coro)
|
||||
|
||||
async def load_session(
|
||||
self,
|
||||
cwd: str,
|
||||
|
|
@ -545,8 +683,9 @@ class HermesACPAgent(acp.Agent):
|
|||
return None
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Loaded session %s", session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return LoadSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def resume_session(
|
||||
|
|
@ -562,8 +701,9 @@ class HermesACPAgent(acp.Agent):
|
|||
state = self.session_manager.create_session(cwd=cwd)
|
||||
await self._register_session_mcp_servers(state, mcp_servers)
|
||||
logger.info("Resumed session %s", state.session_id)
|
||||
await self._replay_session_history(state)
|
||||
self._schedule_history_replay(state)
|
||||
self._schedule_available_commands_update(state.session_id)
|
||||
self._schedule_usage_update(state)
|
||||
return ResumeSessionResponse(models=self._build_model_state(state))
|
||||
|
||||
async def cancel(self, session_id: str, **kwargs: Any) -> None:
|
||||
|
|
@ -712,6 +852,7 @@ class HermesACPAgent(acp.Agent):
|
|||
if self._conn:
|
||||
update = acp.update_agent_message_text(response_text)
|
||||
await self._conn.session_update(session_id, update)
|
||||
await self._send_usage_update(state)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
# If Zed sends another regular prompt while the same ACP session is
|
||||
|
|
@ -744,24 +885,37 @@ class HermesACPAgent(acp.Agent):
|
|||
tool_call_meta: dict[str, dict[str, Any]] = {}
|
||||
previous_approval_cb = None
|
||||
|
||||
streamed_message = False
|
||||
|
||||
if conn:
|
||||
tool_progress_cb = make_tool_progress_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
thinking_cb = make_thinking_cb(conn, session_id, loop)
|
||||
reasoning_cb = make_thinking_cb(conn, session_id, loop)
|
||||
step_cb = make_step_cb(conn, session_id, loop, tool_call_ids, tool_call_meta)
|
||||
message_cb = make_message_cb(conn, session_id, loop)
|
||||
|
||||
def stream_delta_cb(text: str) -> None:
|
||||
nonlocal streamed_message
|
||||
if text:
|
||||
streamed_message = True
|
||||
message_cb(text)
|
||||
|
||||
approval_cb = make_approval_callback(conn.request_permission, loop, session_id)
|
||||
else:
|
||||
tool_progress_cb = None
|
||||
thinking_cb = None
|
||||
reasoning_cb = None
|
||||
step_cb = None
|
||||
message_cb = None
|
||||
stream_delta_cb = None
|
||||
approval_cb = None
|
||||
|
||||
agent = state.agent
|
||||
agent.tool_progress_callback = tool_progress_cb
|
||||
agent.thinking_callback = thinking_cb
|
||||
# ACP thought panes should not receive Hermes' local kawaii waiting/status
|
||||
# updates. Route provider/model reasoning deltas instead; if the provider
|
||||
# emits no reasoning, Zed should not get a fake "thinking" accordion.
|
||||
agent.thinking_callback = None
|
||||
agent.reasoning_callback = reasoning_cb
|
||||
agent.step_callback = step_cb
|
||||
agent.message_callback = message_cb
|
||||
agent.stream_delta_callback = stream_delta_cb
|
||||
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
|
|
@ -867,7 +1021,7 @@ class HermesACPAgent(acp.Agent):
|
|||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to auto-title ACP session %s", session_id, exc_info=True)
|
||||
if final_response and conn:
|
||||
if final_response and conn and not streamed_message:
|
||||
update = acp.update_agent_message_text(final_response)
|
||||
await conn.session_update(session_id, update)
|
||||
|
||||
|
|
@ -903,6 +1057,8 @@ class HermesACPAgent(acp.Agent):
|
|||
cached_read_tokens=result.get("cache_read_tokens"),
|
||||
)
|
||||
|
||||
await self._send_usage_update(state)
|
||||
|
||||
stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
|
||||
return PromptResponse(stop_reason=stop_reason, usage=usage)
|
||||
|
||||
|
|
@ -1035,22 +1191,84 @@ class HermesACPAgent(acp.Agent):
|
|||
return f"Could not list tools: {e}"
|
||||
|
||||
def _cmd_context(self, args: str, state: SessionState) -> str:
|
||||
"""Show ACP session context pressure and compression guidance."""
|
||||
n_messages = len(state.history)
|
||||
if n_messages == 0:
|
||||
return "Conversation is empty (no messages yet)."
|
||||
# Count by role
|
||||
|
||||
# Count by role.
|
||||
roles: dict[str, int] = {}
|
||||
for msg in state.history:
|
||||
role = msg.get("role", "unknown")
|
||||
roles[role] = roles.get(role, 0) + 1
|
||||
|
||||
agent = state.agent
|
||||
model = state.model or getattr(agent, "model", "")
|
||||
provider = getattr(agent, "provider", None) or "auto"
|
||||
compressor = getattr(agent, "context_compressor", None)
|
||||
context_length = int(getattr(compressor, "context_length", 0) or 0)
|
||||
threshold_tokens = int(getattr(compressor, "threshold_tokens", 0) or 0)
|
||||
|
||||
try:
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
|
||||
system_prompt = getattr(agent, "_cached_system_prompt", "") or ""
|
||||
tools = getattr(agent, "tools", None) or None
|
||||
approx_tokens = estimate_request_tokens_rough(
|
||||
state.history,
|
||||
system_prompt=system_prompt,
|
||||
tools=tools,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Could not estimate ACP context usage", exc_info=True)
|
||||
approx_tokens = 0
|
||||
|
||||
if threshold_tokens <= 0 and context_length > 0:
|
||||
threshold_tokens = int(context_length * 0.80)
|
||||
|
||||
lines = [
|
||||
f"Conversation: {n_messages} messages",
|
||||
f"Conversation: {n_messages} messages"
|
||||
if n_messages
|
||||
else "Conversation is empty (no messages yet).",
|
||||
f" user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
|
||||
f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
|
||||
]
|
||||
model = state.model or getattr(state.agent, "model", "")
|
||||
if model:
|
||||
lines.append(f"Model: {model}")
|
||||
lines.append(f"Provider: {provider}")
|
||||
|
||||
if approx_tokens > 0:
|
||||
if context_length > 0:
|
||||
usage_pct = (approx_tokens / context_length) * 100
|
||||
lines.append(
|
||||
f"Context usage: ~{approx_tokens:,} / {context_length:,} tokens ({usage_pct:.1f}%)"
|
||||
)
|
||||
else:
|
||||
lines.append(f"Context usage: ~{approx_tokens:,} tokens")
|
||||
|
||||
if threshold_tokens > 0:
|
||||
if approx_tokens > 0:
|
||||
threshold_pct = (threshold_tokens / context_length) * 100 if context_length > 0 else 0
|
||||
remaining = max(threshold_tokens - approx_tokens, 0)
|
||||
if approx_tokens >= threshold_tokens:
|
||||
lines.append(
|
||||
f"Compression: due now (threshold ~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ "). Run /compact."
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f"Compression: ~{remaining:,} tokens until threshold "
|
||||
f"(~{threshold_tokens:,}"
|
||||
+ (f", {threshold_pct:.0f}%" if threshold_pct else "")
|
||||
+ ")."
|
||||
)
|
||||
else:
|
||||
lines.append(f"Compression threshold: ~{threshold_tokens:,} tokens")
|
||||
|
||||
if getattr(agent, "compression_enabled", True) is False:
|
||||
lines.append("Compression is disabled for this agent.")
|
||||
else:
|
||||
lines.append("Tip: run /compact to compress manually before the threshold.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _cmd_reset(self, args: str, state: SessionState) -> str:
|
||||
|
|
|
|||
|
|
@ -28,6 +28,11 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
|||
"terminal": "execute",
|
||||
"process": "execute",
|
||||
"execute_code": "execute",
|
||||
# Session/meta tools
|
||||
"todo": "other",
|
||||
"skill_view": "read",
|
||||
"skills_list": "read",
|
||||
"skill_manage": "edit",
|
||||
# Web / fetch
|
||||
"web_search": "fetch",
|
||||
"web_extract": "fetch",
|
||||
|
|
@ -51,6 +56,28 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
|
|||
}
|
||||
|
||||
|
||||
_POLISHED_TOOLS = {
|
||||
# Core operator loop
|
||||
"todo", "memory", "session_search", "delegate_task",
|
||||
# Files / execution
|
||||
"read_file", "write_file", "patch", "search_files", "terminal", "process", "execute_code",
|
||||
# Skills / web / browser / media
|
||||
"skill_view", "skills_list", "skill_manage", "web_search", "web_extract",
|
||||
"browser_navigate", "browser_click", "browser_type", "browser_press", "browser_scroll",
|
||||
"browser_back", "browser_snapshot", "browser_console", "browser_get_images", "browser_vision",
|
||||
"vision_analyze", "image_generate", "text_to_speech",
|
||||
# Schedulers / platform integrations
|
||||
"cronjob", "send_message", "clarify", "discord", "discord_admin",
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
"feishu_doc_read", "feishu_drive_list_comments", "feishu_drive_list_comment_replies",
|
||||
"feishu_drive_reply_comment", "feishu_drive_add_comment",
|
||||
"kanban_create", "kanban_show", "kanban_comment", "kanban_complete",
|
||||
"kanban_block", "kanban_link", "kanban_heartbeat",
|
||||
"yb_query_group_info", "yb_query_group_members", "yb_search_sticker",
|
||||
"yb_send_dm", "yb_send_sticker", "mixture_of_agents",
|
||||
}
|
||||
|
||||
|
||||
def get_tool_kind(tool_name: str) -> ToolKind:
|
||||
"""Return the ACP ToolKind for a hermes tool, defaulting to 'other'."""
|
||||
return TOOL_KIND_MAP.get(tool_name, "other")
|
||||
|
|
@ -85,18 +112,645 @@ def build_tool_title(tool_name: str, args: Dict[str, Any]) -> str:
|
|||
if urls:
|
||||
return f"extract: {urls[0]}" + (f" (+{len(urls)-1})" if len(urls) > 1 else "")
|
||||
return "web extract"
|
||||
if tool_name == "process":
|
||||
action = str(args.get("action") or "").strip() or "manage"
|
||||
sid = str(args.get("session_id") or "").strip()
|
||||
return f"process {action}: {sid}" if sid else f"process {action}"
|
||||
if tool_name == "delegate_task":
|
||||
tasks = args.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
return f"delegate batch ({len(tasks)} tasks)"
|
||||
goal = args.get("goal", "")
|
||||
if goal and len(goal) > 60:
|
||||
goal = goal[:57] + "..."
|
||||
return f"delegate: {goal}" if goal else "delegate task"
|
||||
if tool_name == "session_search":
|
||||
query = str(args.get("query") or "").strip()
|
||||
return f"session search: {query}" if query else "recent sessions"
|
||||
if tool_name == "memory":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
target = str(args.get("target") or "memory").strip() or "memory"
|
||||
return f"memory {action}: {target}"
|
||||
if tool_name == "execute_code":
|
||||
return "execute code"
|
||||
code = str(args.get("code") or "").strip()
|
||||
first_line = next((line.strip() for line in code.splitlines() if line.strip()), "")
|
||||
if first_line:
|
||||
if len(first_line) > 70:
|
||||
first_line = first_line[:67] + "..."
|
||||
return f"python: {first_line}"
|
||||
return "python code"
|
||||
if tool_name == "todo":
|
||||
items = args.get("todos")
|
||||
if isinstance(items, list):
|
||||
return f"todo ({len(items)} item{'s' if len(items) != 1 else ''})"
|
||||
return "todo"
|
||||
if tool_name == "skill_view":
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
suffix = f"/{file_path}" if file_path else ""
|
||||
return f"skill view ({name}{suffix})"
|
||||
if tool_name == "skills_list":
|
||||
category = str(args.get("category") or "").strip()
|
||||
return f"skills list ({category})" if category else "skills list"
|
||||
if tool_name == "skill_manage":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
name = str(args.get("name") or "?").strip() or "?"
|
||||
file_path = str(args.get("file_path") or "").strip()
|
||||
target = f"{name}/{file_path}" if file_path else name
|
||||
if len(target) > 64:
|
||||
target = target[:61] + "..."
|
||||
return f"skill {action}: {target}"
|
||||
if tool_name == "browser_navigate":
|
||||
return f"navigate: {args.get('url', '?')}"
|
||||
if tool_name == "browser_snapshot":
|
||||
return "browser snapshot"
|
||||
if tool_name == "browser_vision":
|
||||
return f"browser vision: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "browser_get_images":
|
||||
return "browser images"
|
||||
if tool_name == "vision_analyze":
|
||||
return f"analyze image: {args.get('question', '?')[:50]}"
|
||||
return f"analyze image: {str(args.get('question', '?'))[:50]}"
|
||||
if tool_name == "image_generate":
|
||||
prompt = str(args.get("prompt") or args.get("description") or "").strip()
|
||||
return f"generate image: {prompt[:50]}" if prompt else "generate image"
|
||||
if tool_name == "cronjob":
|
||||
action = str(args.get("action") or "manage").strip() or "manage"
|
||||
job_id = str(args.get("job_id") or args.get("id") or "").strip()
|
||||
return f"cron {action}: {job_id}" if job_id else f"cron {action}"
|
||||
return tool_name
|
||||
|
||||
|
||||
def _text(content: str) -> Any:
|
||||
return acp.tool_content(acp.text_block(content))
|
||||
|
||||
|
||||
def _json_loads_maybe(value: Optional[str]) -> Any:
|
||||
if not isinstance(value, str):
|
||||
return value
|
||||
try:
|
||||
return json.loads(value)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Some Hermes tools append a human hint after a JSON payload, e.g.
|
||||
# ``{...}\n\n[Hint: Results truncated...]``. Keep the structured rendering path
|
||||
# by decoding the first JSON value instead of falling back to raw text.
|
||||
try:
|
||||
decoded, _ = json.JSONDecoder().raw_decode(value.lstrip())
|
||||
return decoded
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _truncate_text(text: str, limit: int = 5000) -> str:
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[: max(0, limit - 100)] + f"\n... ({len(text)} chars total, truncated)"
|
||||
|
||||
|
||||
def _fenced_text(text: str, language: str = "") -> str:
|
||||
"""Return a Markdown fence that cannot be broken by backticks in text."""
|
||||
longest = max((len(run) for run in text.split("`")[1::2]), default=0)
|
||||
fence = "`" * max(3, longest + 1)
|
||||
return f"{fence}{language}\n{text}\n{fence}"
|
||||
|
||||
|
||||
def _format_todo_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict) or not isinstance(data.get("todos"), list):
|
||||
return None
|
||||
summary = data.get("summary") if isinstance(data.get("summary"), dict) else {}
|
||||
icon = {
|
||||
"completed": "✅",
|
||||
"in_progress": "🔄",
|
||||
"pending": "⏳",
|
||||
"cancelled": "✗",
|
||||
}
|
||||
lines = ["**Todo list**", ""]
|
||||
for item in data["todos"]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
status = str(item.get("status") or "pending")
|
||||
content = str(item.get("content") or item.get("id") or "").strip()
|
||||
if content:
|
||||
lines.append(f"- {icon.get(status, '•')} {content}")
|
||||
if summary:
|
||||
cancelled = summary.get("cancelled", 0)
|
||||
lines.extend([
|
||||
"",
|
||||
"**Progress:** "
|
||||
f"{summary.get('completed', 0)} completed, "
|
||||
f"{summary.get('in_progress', 0)} in progress, "
|
||||
f"{summary.get('pending', 0)} pending"
|
||||
+ (f", {cancelled} cancelled" if cancelled else ""),
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_read_file_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not data.get("content"):
|
||||
return f"Read failed: {data.get('error')}"
|
||||
content = data.get("content")
|
||||
if not isinstance(content, str):
|
||||
return None
|
||||
path = str((args or {}).get("path") or data.get("path") or "file").strip()
|
||||
offset = (args or {}).get("offset")
|
||||
limit = (args or {}).get("limit")
|
||||
range_bits = []
|
||||
if offset:
|
||||
range_bits.append(f"from line {offset}")
|
||||
if limit:
|
||||
range_bits.append(f"limit {limit}")
|
||||
suffix = f" ({', '.join(range_bits)})" if range_bits else ""
|
||||
header = f"Read {path}{suffix}"
|
||||
if data.get("total_lines") is not None:
|
||||
header += f" — {data.get('total_lines')} total lines"
|
||||
# Hermes read_file output is line-numbered with `|`. If we send it as raw
|
||||
# Markdown, Zed can interpret pipes as tables and collapse the layout.
|
||||
# Fence the payload so file lines stay readable and literal.
|
||||
return _truncate_text(f"{header}\n\n{_fenced_text(content)}")
|
||||
|
||||
|
||||
def _format_search_files_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
matches = data.get("matches")
|
||||
if not isinstance(matches, list):
|
||||
return None
|
||||
|
||||
total = data.get("total_count", len(matches))
|
||||
shown = min(len(matches), 12)
|
||||
truncated = bool(data.get("truncated")) or len(matches) > shown
|
||||
lines = [
|
||||
"Search results",
|
||||
f"Found {total} match{'es' if total != 1 else ''}; showing {shown}.",
|
||||
"",
|
||||
]
|
||||
|
||||
for match in matches[:shown]:
|
||||
if not isinstance(match, dict):
|
||||
lines.append(f"- {match}")
|
||||
continue
|
||||
|
||||
path = str(match.get("path") or match.get("file") or match.get("filename") or "?")
|
||||
line = match.get("line") or match.get("line_number")
|
||||
content = str(match.get("content") or match.get("text") or "").strip()
|
||||
loc = f"{path}:{line}" if line else path
|
||||
lines.append(f"- {loc}")
|
||||
if content:
|
||||
snippet = _truncate_text(" ".join(content.split()), 300)
|
||||
lines.append(f" {snippet}")
|
||||
|
||||
if truncated:
|
||||
lines.extend([
|
||||
"",
|
||||
"Results truncated. Narrow the search, add file_glob, or use offset to page.",
|
||||
])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_execute_code_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
output = str(data.get("output") or "")
|
||||
error = str(data.get("error") or "")
|
||||
exit_code = data.get("exit_code")
|
||||
parts = [f"Exit code: {exit_code}" if exit_code is not None else "Execution complete"]
|
||||
if output:
|
||||
parts.extend(["", "Output:", output])
|
||||
if error:
|
||||
parts.extend(["", "Error:", error])
|
||||
return _truncate_text("\n".join(parts))
|
||||
|
||||
|
||||
def _extract_markdown_headings(content: str, limit: int = 8) -> list[str]:
|
||||
headings: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("#"):
|
||||
heading = stripped.lstrip("#").strip()
|
||||
if heading:
|
||||
headings.append(heading)
|
||||
if len(headings) >= limit:
|
||||
break
|
||||
return headings
|
||||
|
||||
|
||||
def _format_skill_view_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Skill view failed: {data.get('error', 'unknown error')}"
|
||||
name = str(data.get("name") or "skill")
|
||||
file_path = str(data.get("file") or data.get("path") or "SKILL.md")
|
||||
description = str(data.get("description") or "").strip()
|
||||
content = str(data.get("content") or "")
|
||||
linked = data.get("linked_files") if isinstance(data.get("linked_files"), dict) else None
|
||||
|
||||
lines = ["**Skill loaded**", "", f"- **Name:** `{name}`", f"- **File:** `{file_path}`"]
|
||||
if description:
|
||||
lines.append(f"- **Description:** {description}")
|
||||
if content:
|
||||
lines.append(f"- **Content:** {len(content):,} chars loaded into agent context")
|
||||
if linked:
|
||||
linked_count = sum(len(v) for v in linked.values() if isinstance(v, list))
|
||||
lines.append(f"- **Linked files:** {linked_count}")
|
||||
|
||||
headings = _extract_markdown_headings(content)
|
||||
if headings:
|
||||
lines.extend(["", "**Sections**"])
|
||||
lines.extend(f"- {heading}" for heading in headings)
|
||||
|
||||
lines.extend([
|
||||
"",
|
||||
"_Full skill content is available to the agent but hidden here to keep ACP readable._",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_skill_manage_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
action = str((args or {}).get("action") or "manage").strip() or "manage"
|
||||
name = str((args or {}).get("name") or data.get("name") or "skill").strip() or "skill"
|
||||
file_path = str((args or {}).get("file_path") or data.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
success = data.get("success")
|
||||
status = "✅ Skill updated" if success is not False else "✗ Skill update failed"
|
||||
|
||||
lines = [f"**{status}**", "", f"- **Action:** `{action}`", f"- **Skill:** `{name}`"]
|
||||
if action not in {"delete"}:
|
||||
lines.append(f"- **File:** `{file_path}`")
|
||||
|
||||
message = str(data.get("message") or data.get("error") or "").strip()
|
||||
if message:
|
||||
lines.append(f"- **Result:** {message}")
|
||||
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
if replacements is not None:
|
||||
lines.append(f"- **Replacements:** {replacements}")
|
||||
|
||||
path = str(data.get("path") or "").strip()
|
||||
if path:
|
||||
lines.append(f"- **Path:** `{path}`")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_web_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
web = data.get("data", {}).get("web") if isinstance(data.get("data"), dict) else data.get("web")
|
||||
if not isinstance(web, list):
|
||||
return None
|
||||
lines = [f"Web results: {len(web)}"]
|
||||
for item in web[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
title = str(item.get("title") or item.get("url") or "result").strip()
|
||||
url = str(item.get("url") or "").strip()
|
||||
desc = str(item.get("description") or "").strip()
|
||||
lines.append(f"• {title}" + (f" — {url}" if url else ""))
|
||||
if desc:
|
||||
lines.append(f" {desc}")
|
||||
return _truncate_text("\n".join(lines))
|
||||
|
||||
|
||||
def _format_web_extract_result(result: Optional[str]) -> Optional[str]:
|
||||
"""Return only web_extract errors for ACP; success stays compact via title."""
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Web extract failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
|
||||
failures: list[str] = []
|
||||
for item in results[:10]:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
error = str(item.get("error") or "").strip()
|
||||
if not error or error in {"None", "null"}:
|
||||
continue
|
||||
url = str(item.get("url") or "").strip()
|
||||
title = str(item.get("title") or url or "Untitled").strip()
|
||||
failures.append(
|
||||
f"- {title}" + (f" — {url}" if url and url != title else "") + f"\n Error: {_truncate_text(error, limit=500)}"
|
||||
)
|
||||
|
||||
if not failures:
|
||||
return None
|
||||
lines = [f"Web extract failed for {len(failures)} URL{'s' if len(failures) != 1 else ''}"]
|
||||
lines.extend(failures)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_process_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False and data.get("error"):
|
||||
return f"Process error: {data.get('error')}"
|
||||
action = str((args or {}).get("action") or "process").strip() or "process"
|
||||
if isinstance(data.get("processes"), list):
|
||||
processes = data["processes"]
|
||||
lines = [f"Processes: {len(processes)}"]
|
||||
for proc in processes[:20]:
|
||||
if not isinstance(proc, dict):
|
||||
lines.append(f"- {proc}")
|
||||
continue
|
||||
sid = str(proc.get("session_id") or proc.get("id") or "?")
|
||||
status = str(proc.get("status") or ("exited" if proc.get("exited") else "running"))
|
||||
cmd = str(proc.get("command") or "").strip()
|
||||
pid = proc.get("pid")
|
||||
code = proc.get("exit_code")
|
||||
bits = [status]
|
||||
if pid is not None:
|
||||
bits.append(f"pid {pid}")
|
||||
if code is not None:
|
||||
bits.append(f"exit {code}")
|
||||
lines.append(f"- `{sid}` — {', '.join(bits)}" + (f" — {cmd[:120]}" if cmd else ""))
|
||||
if len(processes) > 20:
|
||||
lines.append(f"... {len(processes) - 20} more process(es)")
|
||||
return "\n".join(lines)
|
||||
|
||||
status = str(data.get("status") or data.get("state") or action).strip()
|
||||
sid = str(data.get("session_id") or (args or {}).get("session_id") or "").strip()
|
||||
lines = [f"Process {action}: {status}" + (f" (`{sid}`)" if sid else "")]
|
||||
for key, label in (("command", "Command"), ("pid", "PID"), ("exit_code", "Exit code"), ("returncode", "Exit code"), ("lines", "Lines")):
|
||||
if data.get(key) is not None:
|
||||
lines.append(f"- **{label}:** {data.get(key)}")
|
||||
output = data.get("output") or data.get("new_output") or data.get("log") or data.get("stdout")
|
||||
error = data.get("error") or data.get("stderr")
|
||||
if output:
|
||||
lines.extend(["", "Output:", _truncate_text(str(output), limit=5000)])
|
||||
if error:
|
||||
lines.extend(["", "Error:", _truncate_text(str(error), limit=2000)])
|
||||
msg = data.get("message")
|
||||
if msg and not output and not error:
|
||||
lines.append(str(msg))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_delegate_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("error") and not isinstance(data.get("results"), list):
|
||||
return f"Delegation failed: {data.get('error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
total = data.get("total_duration_seconds")
|
||||
lines = [f"Delegation results: {len(results)} task{'s' if len(results) != 1 else ''}" + (f" in {total}s" if total is not None else "")]
|
||||
icon = {"completed": "✅", "failed": "✗", "error": "✗", "timeout": "⏱", "interrupted": "⚠"}
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
lines.append(f"- {item}")
|
||||
continue
|
||||
idx = item.get("task_index")
|
||||
status = str(item.get("status") or "unknown")
|
||||
model = item.get("model")
|
||||
dur = item.get("duration_seconds")
|
||||
role = item.get("_child_role")
|
||||
header = f"{icon.get(status, '•')} Task {idx + 1 if isinstance(idx, int) else '?'}: {status}"
|
||||
bits = []
|
||||
if model:
|
||||
bits.append(str(model))
|
||||
if role:
|
||||
bits.append(f"role={role}")
|
||||
if dur is not None:
|
||||
bits.append(f"{dur}s")
|
||||
if bits:
|
||||
header += " (" + ", ".join(bits) + ")"
|
||||
lines.extend(["", header])
|
||||
summary = str(item.get("summary") or "").strip()
|
||||
error = str(item.get("error") or "").strip()
|
||||
if summary:
|
||||
lines.append(_truncate_text(summary, limit=1200))
|
||||
if error:
|
||||
lines.append("Error: " + _truncate_text(error, limit=800))
|
||||
trace = item.get("tool_trace")
|
||||
if isinstance(trace, list) and trace:
|
||||
names = [str(t.get("tool") or "?") for t in trace if isinstance(t, dict)]
|
||||
if names:
|
||||
lines.append("Tools: " + ", ".join(names[:12]) + (f" (+{len(names)-12})" if len(names) > 12 else ""))
|
||||
return _truncate_text("\n".join(lines), limit=8000)
|
||||
|
||||
|
||||
def _format_session_search_result(result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("success") is False:
|
||||
return f"Session search failed: {data.get('error', 'unknown error')}"
|
||||
results = data.get("results")
|
||||
if not isinstance(results, list):
|
||||
return None
|
||||
mode = data.get("mode") or "search"
|
||||
query = data.get("query")
|
||||
lines = ["Recent sessions" if mode == "recent" else f"Session search results" + (f" for `{query}`" if query else "")]
|
||||
if not results:
|
||||
lines.append(str(data.get("message") or "No matching sessions found."))
|
||||
return "\n".join(lines)
|
||||
for item in results:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
sid = str(item.get("session_id") or "?")
|
||||
title = str(item.get("title") or item.get("when") or "Untitled session").strip()
|
||||
when = str(item.get("last_active") or item.get("started_at") or item.get("when") or "").strip()
|
||||
count = item.get("message_count")
|
||||
source = str(item.get("source") or "").strip()
|
||||
meta = ", ".join(str(x) for x in [when, source, f"{count} msgs" if count is not None else ""] if x)
|
||||
lines.append(f"- **{title}** (`{sid}`)" + (f" — {meta}" if meta else ""))
|
||||
summary = str(item.get("summary") or item.get("preview") or "").strip()
|
||||
if summary:
|
||||
lines.append(" " + _truncate_text(" ".join(summary.split()), limit=500))
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_memory_result(result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
action = str((args or {}).get("action") or "memory").strip() or "memory"
|
||||
target = str(data.get("target") or (args or {}).get("target") or "memory")
|
||||
if data.get("success") is False:
|
||||
lines = [f"✗ Memory {action} failed ({target})", str(data.get("error") or "unknown error")]
|
||||
matches = data.get("matches")
|
||||
if isinstance(matches, list) and matches:
|
||||
lines.append("Matches:")
|
||||
lines.extend(f"- {_truncate_text(str(m), 160)}" for m in matches[:5])
|
||||
return "\n".join(lines)
|
||||
lines = [f"✅ Memory {action} saved ({target})"]
|
||||
if data.get("message"):
|
||||
lines.append(str(data.get("message")))
|
||||
if data.get("entry_count") is not None:
|
||||
lines.append(f"Entries: {data.get('entry_count')}")
|
||||
if data.get("usage"):
|
||||
lines.append(f"Usage: {data.get('usage')}")
|
||||
# Avoid dumping all memory entries into ACP UI; show only the explicit new value preview.
|
||||
preview = str((args or {}).get("content") or (args or {}).get("old_text") or "").strip()
|
||||
if preview:
|
||||
lines.append("Preview: " + _truncate_text(preview, limit=300))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_edit_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
path = str((args or {}).get("path") or "file").strip()
|
||||
if isinstance(data, dict):
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed for {path}: {data.get('error', 'unknown error')}"
|
||||
message = str(data.get("message") or "").strip()
|
||||
replacements = data.get("replacements") or data.get("replacement_count")
|
||||
lines = [f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")]
|
||||
if message:
|
||||
lines.append(message)
|
||||
if replacements is not None:
|
||||
lines.append(f"Replacements: {replacements}")
|
||||
if data.get("files_modified"):
|
||||
files = data.get("files_modified")
|
||||
if isinstance(files, list):
|
||||
lines.append("Files: " + ", ".join(f"`{f}`" for f in files[:8]))
|
||||
return "\n".join(lines)
|
||||
if isinstance(result, str) and result.strip():
|
||||
return _truncate_text(result, limit=3000)
|
||||
return f"✅ {tool_name} completed" + (f" for `{path}`" if path else "")
|
||||
|
||||
|
||||
def _format_browser_result(tool_name: str, result: Optional[str], args: Optional[Dict[str, Any]]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
if tool_name == "browser_get_images":
|
||||
images = data.get("images") or data.get("data")
|
||||
if isinstance(images, list):
|
||||
lines = [f"Images found: {len(images)}"]
|
||||
for img in images[:12]:
|
||||
if isinstance(img, dict):
|
||||
alt = str(img.get("alt") or "").strip()
|
||||
url = str(img.get("url") or img.get("src") or "").strip()
|
||||
lines.append(f"- {alt or 'image'}" + (f" — {url}" if url else ""))
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
title = str(data.get("title") or data.get("url") or data.get("status") or tool_name)
|
||||
text = str(data.get("text") or data.get("content") or data.get("snapshot") or data.get("analysis") or data.get("message") or "").strip()
|
||||
lines = [title]
|
||||
if data.get("url") and data.get("url") != title:
|
||||
lines.append(str(data.get("url")))
|
||||
if text:
|
||||
lines.extend(["", _truncate_text(text, limit=5000)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _format_media_or_cron_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, dict):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
lines = [f"✅ {tool_name} completed"]
|
||||
for key in ("file_path", "path", "url", "image_url", "job_id", "id", "status", "message", "next_run"):
|
||||
if data.get(key):
|
||||
lines.append(f"- **{key}:** {data.get(key)}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _format_generic_structured_result(tool_name: str, result: Optional[str]) -> Optional[str]:
|
||||
data = _json_loads_maybe(result)
|
||||
if not isinstance(data, (dict, list)):
|
||||
return result if isinstance(result, str) and result.strip() else None
|
||||
if isinstance(data, list):
|
||||
lines = [f"{tool_name}: {len(data)} item{'s' if len(data) != 1 else ''}"]
|
||||
for item in data[:12]:
|
||||
lines.append(f"- {_truncate_text(str(item), limit=240)}")
|
||||
return _truncate_text("\n".join(lines), limit=5000)
|
||||
|
||||
if data.get("success") is False or data.get("error"):
|
||||
return f"{tool_name} failed: {data.get('error', 'unknown error')}"
|
||||
|
||||
lines = [f"✅ {tool_name} completed" if data.get("success") is True else f"{tool_name} result"]
|
||||
priority_keys = (
|
||||
"message", "status", "id", "task_id", "issue_id", "title", "name", "entity_id",
|
||||
"state", "service", "url", "path", "file_path", "count", "total", "next_run",
|
||||
)
|
||||
seen = set()
|
||||
for key in priority_keys:
|
||||
value = data.get(key)
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
seen.add(key)
|
||||
lines.append(f"- **{key}:** {_truncate_text(str(value), limit=500)}")
|
||||
|
||||
for key, value in data.items():
|
||||
if key in seen or key in {"success", "raw", "content", "entries"}:
|
||||
continue
|
||||
if value in (None, "", [], {}):
|
||||
continue
|
||||
if isinstance(value, (dict, list)):
|
||||
preview = json.dumps(value, ensure_ascii=False, default=str)
|
||||
else:
|
||||
preview = str(value)
|
||||
lines.append(f"- **{key}:** {_truncate_text(preview, limit=500)}")
|
||||
if len(lines) >= 14:
|
||||
break
|
||||
|
||||
content = data.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
lines.extend(["", _truncate_text(content.strip(), limit=1500)])
|
||||
return _truncate_text("\n".join(lines), limit=7000)
|
||||
|
||||
|
||||
def _build_polished_completion_content(
|
||||
tool_name: str,
|
||||
result: Optional[str],
|
||||
function_args: Optional[Dict[str, Any]],
|
||||
) -> Optional[List[Any]]:
|
||||
formatter = {
|
||||
"todo": lambda: _format_todo_result(result),
|
||||
"read_file": lambda: _format_read_file_result(result, function_args),
|
||||
"write_file": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"patch": lambda: _format_edit_result(tool_name, result, function_args),
|
||||
"search_files": lambda: _format_search_files_result(result),
|
||||
"execute_code": lambda: _format_execute_code_result(result),
|
||||
"process": lambda: _format_process_result(result, function_args),
|
||||
"delegate_task": lambda: _format_delegate_result(result),
|
||||
"session_search": lambda: _format_session_search_result(result),
|
||||
"memory": lambda: _format_memory_result(result, function_args),
|
||||
"skill_view": lambda: _format_skill_view_result(result),
|
||||
"skill_manage": lambda: _format_skill_manage_result(result, function_args),
|
||||
"web_search": lambda: _format_web_search_result(result),
|
||||
"web_extract": lambda: _format_web_extract_result(result),
|
||||
"browser_navigate": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_snapshot": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_vision": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"browser_get_images": lambda: _format_browser_result(tool_name, result, function_args),
|
||||
"vision_analyze": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"image_generate": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
"cronjob": lambda: _format_media_or_cron_result(tool_name, result),
|
||||
}.get(tool_name)
|
||||
if formatter is None and tool_name in _POLISHED_TOOLS:
|
||||
formatter = lambda: _format_generic_structured_result(tool_name, result)
|
||||
if formatter is None:
|
||||
return None
|
||||
text = formatter()
|
||||
if not text:
|
||||
return None
|
||||
return [_text(text)]
|
||||
|
||||
|
||||
def _build_patch_mode_content(patch_text: str) -> List[Any]:
|
||||
"""Parse V4A patch mode input into ACP diff blocks when possible."""
|
||||
if not patch_text:
|
||||
|
|
@ -258,7 +912,11 @@ def _build_tool_complete_content(
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
return [acp.tool_content(acp.text_block(display_result))]
|
||||
polished_content = _build_polished_completion_content(tool_name, result, function_args)
|
||||
if polished_content:
|
||||
return polished_content
|
||||
|
||||
return [_text(display_result)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -288,7 +946,6 @@ def build_tool_start(
|
|||
content = _build_patch_mode_content(patch_text)
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "write_file":
|
||||
|
|
@ -297,32 +954,172 @@ def build_tool_start(
|
|||
content = [acp.tool_diff_content(path=path, new_text=file_content)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "terminal":
|
||||
command = arguments.get("command", "")
|
||||
content = [acp.tool_content(acp.text_block(f"$ {command}"))]
|
||||
content = [_text(f"$ {command}")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
if tool_name == "read_file":
|
||||
path = arguments.get("path", "")
|
||||
content = [acp.tool_content(acp.text_block(f"Reading {path}"))]
|
||||
# The title and location already identify the file. Sending a synthetic
|
||||
# "Reading ..." content block makes Zed render an unhelpful Output
|
||||
# section before the real file contents arrive on completion.
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "search_files":
|
||||
pattern = arguments.get("pattern", "")
|
||||
target = arguments.get("target", "content")
|
||||
content = [acp.tool_content(acp.text_block(f"Searching for '{pattern}' ({target})"))]
|
||||
search_path = arguments.get("path")
|
||||
where = f" in {search_path}" if search_path else ""
|
||||
content = [_text(f"Searching for '{pattern}' ({target}){where}")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "todo":
|
||||
items = arguments.get("todos")
|
||||
if isinstance(items, list):
|
||||
preview_lines = ["Updating todo list", ""]
|
||||
for item in items[:8]:
|
||||
if isinstance(item, dict):
|
||||
preview_lines.append(f"- {item.get('status', 'pending')}: {item.get('content', item.get('id', ''))}")
|
||||
if len(items) > 8:
|
||||
preview_lines.append(f"... {len(items) - 8} more")
|
||||
content = [_text("\n".join(preview_lines))]
|
||||
else:
|
||||
content = [_text("Reading todo list")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_view":
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
content = [_text(f"Loading skill '{name}' ({file_path})")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "skill_manage":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
name = str(arguments.get("name") or "?").strip() or "?"
|
||||
file_path = str(arguments.get("file_path") or "SKILL.md").strip() or "SKILL.md"
|
||||
path = f"skills/{name}/{file_path}" if file_path else f"skills/{name}"
|
||||
|
||||
if action == "patch":
|
||||
old = str(arguments.get("old_string") or "")
|
||||
new = str(arguments.get("new_string") or "")
|
||||
content = [acp.tool_diff_content(path=path, old_text=old or None, new_text=new)]
|
||||
elif action in {"edit", "create"}:
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=path,
|
||||
new_text=str(arguments.get("content") or ""),
|
||||
)
|
||||
]
|
||||
elif action == "write_file":
|
||||
target = str(arguments.get("file_path") or "file")
|
||||
content = [
|
||||
acp.tool_diff_content(
|
||||
path=f"skills/{name}/{target}",
|
||||
new_text=str(arguments.get("file_content") or ""),
|
||||
)
|
||||
]
|
||||
elif action in {"delete", "remove_file"}:
|
||||
target = str(arguments.get("file_path") or file_path or name)
|
||||
content = [_text(f"Removing {target} from skill '{name}'")]
|
||||
else:
|
||||
content = [_text(f"Running skill_manage action '{action}' on skill '{name}' ({file_path})")]
|
||||
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "execute_code":
|
||||
code = str(arguments.get("code") or "").strip()
|
||||
preview = code[:1200] + (f"\n... ({len(code)} chars total, truncated)" if len(code) > 1200 else "")
|
||||
content = [_text(f"Running Python helper script:\n\n```python\n{preview}\n```" if preview else "Running Python helper script")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching the web for: {query}" if query else "Searching the web")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "web_extract":
|
||||
# The title identifies the URL(s). Avoid a duplicate content block so
|
||||
# Zed renders this like read_file: compact start, concise completion.
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=None, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "process":
|
||||
action = str(arguments.get("action") or "").strip() or "manage"
|
||||
sid = str(arguments.get("session_id") or "").strip()
|
||||
data_preview = str(arguments.get("data") or "").strip()
|
||||
text = f"Process action: {action}" + (f"\nSession: {sid}" if sid else "")
|
||||
if data_preview:
|
||||
text += "\nInput: " + _truncate_text(data_preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "delegate_task":
|
||||
tasks = arguments.get("tasks")
|
||||
if isinstance(tasks, list) and tasks:
|
||||
lines = [f"Delegating {len(tasks)} tasks", ""]
|
||||
for i, task in enumerate(tasks[:8], 1):
|
||||
if isinstance(task, dict):
|
||||
goal = str(task.get("goal") or "").strip()
|
||||
role = str(task.get("role") or "").strip()
|
||||
lines.append(f"{i}. " + _truncate_text(goal, limit=160) + (f" ({role})" if role else ""))
|
||||
if len(tasks) > 8:
|
||||
lines.append(f"... {len(tasks) - 8} more")
|
||||
content = [_text("\n".join(lines))]
|
||||
else:
|
||||
goal = str(arguments.get("goal") or "").strip()
|
||||
content = [_text("Delegating task" + (f":\n{_truncate_text(goal, limit=800)}" if goal else ""))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = str(arguments.get("query") or "").strip()
|
||||
content = [_text(f"Searching past sessions for: {query}" if query else "Loading recent sessions")]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name == "memory":
|
||||
action = str(arguments.get("action") or "manage").strip() or "manage"
|
||||
target = str(arguments.get("target") or "memory").strip() or "memory"
|
||||
preview = str(arguments.get("content") or arguments.get("old_text") or "").strip()
|
||||
text = f"Memory {action} ({target})"
|
||||
if preview:
|
||||
text += "\nPreview: " + _truncate_text(preview, limit=500)
|
||||
content = [_text(text)]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
)
|
||||
|
||||
if tool_name in _POLISHED_TOOLS:
|
||||
try:
|
||||
args_text = json.dumps(arguments, indent=2, default=str)
|
||||
except (TypeError, ValueError):
|
||||
args_text = str(arguments)
|
||||
content = [_text(_truncate_text(args_text, limit=1200))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
)
|
||||
|
||||
# Generic fallback
|
||||
|
|
@ -334,7 +1131,7 @@ def build_tool_start(
|
|||
content = [acp.tool_content(acp.text_block(args_text))]
|
||||
return acp.start_tool_call(
|
||||
tool_call_id, title, kind=kind, content=content, locations=locations,
|
||||
raw_input=arguments,
|
||||
raw_input=None if tool_name in _POLISHED_TOOLS else arguments,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -347,18 +1144,22 @@ def build_tool_complete(
|
|||
) -> ToolCallProgress:
|
||||
"""Create a ToolCallUpdate (progress) event for a completed tool call."""
|
||||
kind = get_tool_kind(tool_name)
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
if tool_name == "web_extract":
|
||||
error_text = _format_web_extract_result(result)
|
||||
content = [_text(error_text)] if error_text else None
|
||||
else:
|
||||
content = _build_tool_complete_content(
|
||||
tool_name,
|
||||
result,
|
||||
function_args=function_args,
|
||||
snapshot=snapshot,
|
||||
)
|
||||
return acp.update_tool_call(
|
||||
tool_call_id,
|
||||
kind=kind,
|
||||
status="completed",
|
||||
content=content,
|
||||
raw_output=result,
|
||||
raw_output=None if tool_name in _POLISHED_TOOLS else result,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1241,10 +1241,24 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
|||
if not tools:
|
||||
return []
|
||||
result = []
|
||||
seen_names: set = set()
|
||||
for t in tools:
|
||||
fn = t.get("function", {})
|
||||
name = fn.get("name", "")
|
||||
# Defensive dedup: Anthropic rejects requests with duplicate tool
|
||||
# names. Upstream injection paths already dedup, but this guard
|
||||
# converts a hard API failure into a warning. See: #18478
|
||||
if name and name in seen_names:
|
||||
logger.warning(
|
||||
"convert_tools_to_anthropic: duplicate tool name '%s' "
|
||||
"— dropping second occurrence",
|
||||
name,
|
||||
)
|
||||
continue
|
||||
if name:
|
||||
seen_names.add(name)
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
|
|
|
|||
|
|
@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
|
|||
"kimi-coding-cn",
|
||||
})
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
# OpenRouter app attribution headers (base — always sent)
|
||||
_OR_HEADERS_BASE = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Truthy values for boolean env-var parsing.
|
||||
_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
|
||||
|
||||
|
||||
def build_or_headers(or_config: dict | None = None) -> dict:
|
||||
"""Build OpenRouter headers, optionally including response-cache headers.
|
||||
|
||||
Precedence for response cache: env var > config.yaml > default (enabled).
|
||||
|
||||
Environment variables:
|
||||
``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
|
||||
enables caching; ``0``/``false``/``no``/``off`` disables.
|
||||
Overrides ``openrouter.response_cache`` in config.yaml.
|
||||
``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
|
||||
Overrides ``openrouter.response_cache_ttl`` in config.yaml.
|
||||
|
||||
*or_config* is the ``openrouter`` section from config.yaml. When *None*,
|
||||
falls back to reading config from disk via ``load_config()``.
|
||||
"""
|
||||
headers = dict(_OR_HEADERS_BASE)
|
||||
|
||||
# Resolve config from disk if not provided.
|
||||
if or_config is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
or_config = load_config().get("openrouter", {})
|
||||
except Exception:
|
||||
or_config = {}
|
||||
|
||||
# Determine cache enabled: env var overrides config.
|
||||
env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
|
||||
if env_cache:
|
||||
cache_enabled = env_cache in _TRUTHY_ENV_VALUES
|
||||
else:
|
||||
cache_enabled = or_config.get("response_cache", False)
|
||||
|
||||
if not cache_enabled:
|
||||
return headers
|
||||
|
||||
headers["X-OpenRouter-Cache"] = "true"
|
||||
|
||||
# Determine TTL: env var overrides config.
|
||||
env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
|
||||
if env_ttl:
|
||||
if env_ttl.isdigit():
|
||||
ttl = int(env_ttl)
|
||||
if 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(ttl)
|
||||
else:
|
||||
ttl = or_config.get("response_cache_ttl", 300)
|
||||
if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
|
||||
|
||||
return headers
|
||||
|
||||
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
|
||||
# referrerUrl and X-Title maps to appName in the gateway's analytics.
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
|
@ -1149,23 +1204,23 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
|
||||
|
||||
|
||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
or_key = _pool_runtime_api_key(entry)
|
||||
or_key = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
if not or_key:
|
||||
return None, None
|
||||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
|
|
@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
|||
}
|
||||
sync_base_url = str(sync_client.base_url)
|
||||
if base_url_host_matches(sync_base_url, "openrouter.ai"):
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
async_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
|
|
@ -2053,9 +2108,9 @@ def resolve_provider_client(
|
|||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenRouter ───────────────────────────────────────────────────
|
||||
# ── OpenRouter ───────────────────────────────────────────
|
||||
if provider == "openrouter":
|
||||
client, default = _try_openrouter()
|
||||
client, default = _try_openrouter(explicit_api_key=explicit_api_key)
|
||||
if client is None:
|
||||
logger.warning(
|
||||
"resolve_provider_client: openrouter requested but %s",
|
||||
|
|
@ -3237,7 +3292,26 @@ def _build_call_kwargs(
|
|||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
if tools:
|
||||
kwargs["tools"] = tools
|
||||
# Defensive dedup: providers like Google Vertex, Azure, and Bedrock
|
||||
# reject requests with duplicate tool names (HTTP 400). The upstream
|
||||
# injection paths (run_agent.py) already dedup, but this guard
|
||||
# converts a hard API failure into a warning if an upstream regression
|
||||
# reintroduces duplicates. See: #18478
|
||||
_seen: set = set()
|
||||
_deduped: list = []
|
||||
for _t in tools:
|
||||
_tname = (_t.get("function") or {}).get("name", "")
|
||||
if _tname and _tname in _seen:
|
||||
logger.warning(
|
||||
"_build_call_kwargs: duplicate tool name '%s' removed "
|
||||
"(provider=%s model=%s)",
|
||||
_tname, provider, model,
|
||||
)
|
||||
continue
|
||||
if _tname:
|
||||
_seen.add(_tname)
|
||||
_deduped.append(_t)
|
||||
kwargs["tools"] = _deduped
|
||||
|
||||
# Provider-specific extra_body
|
||||
merged_extra = dict(extra_body or {})
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
|
@ -13,7 +14,7 @@ from datetime import datetime
|
|||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value
|
||||
from hermes_cli.config import get_env_value, load_env
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
|
|
@ -1380,6 +1381,16 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
|
||||
# Prefer ~/.hermes/.env over os.environ — the user's config file is the
|
||||
# authoritative source for Hermes credentials. Stale env vars from parent
|
||||
# processes (Codex CLI, test scripts, etc.) should not override deliberate
|
||||
# changes to the .env file.
|
||||
def _get_env_prefer_dotenv(key: str) -> str:
|
||||
env_file = load_env()
|
||||
val = env_file.get(key) or os.environ.get(key) or ""
|
||||
return val.strip()
|
||||
|
||||
# Honour user suppression — `hermes auth remove <provider> <N>` for an
|
||||
# env-seeded credential marks the env:<VAR> source as suppressed so it
|
||||
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
|
||||
|
|
@ -1391,8 +1402,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
|||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv("OPENROUTER_API_KEY")
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
|
|
@ -1418,7 +1429,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
|||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
|
||||
env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
|
|
@ -1429,8 +1440,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
|||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value(env_var) or "").strip()
|
||||
# Prefer ~/.hermes/.env over os.environ
|
||||
token = _get_env_prefer_dotenv(env_var)
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
|
|
|||
109
agent/curator.py
109
agent/curator.py
|
|
@ -387,6 +387,11 @@ CURATOR_REVIEW_PROMPT = (
|
|||
" - skill_manage action=write_file — add a references/, templates/, "
|
||||
"or scripts/ file under an existing skill (the skill must already "
|
||||
"exist)\n"
|
||||
" - skill_manage action=delete — archive a skill. MUST pass "
|
||||
"`absorbed_into=<umbrella>` when you've merged its content into another "
|
||||
"skill, or `absorbed_into=\"\"` when you're truly pruning with no "
|
||||
"forwarding target. This drives cron-job skill-reference migration — "
|
||||
"guessing from your YAML summary after the fact is fragile.\n"
|
||||
" - terminal — mv a sibling into the archive "
|
||||
"OR move its content into a support subfile\n\n"
|
||||
"'keep' is a legitimate decision ONLY when the skill is already a "
|
||||
|
|
@ -637,15 +642,76 @@ def _parse_structured_summary(
|
|||
return out
|
||||
|
||||
|
||||
def _extract_absorbed_into_declarations(
|
||||
tool_calls: List[Dict[str, Any]],
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""Walk this run's tool calls and extract model-declared absorption targets.
|
||||
|
||||
The curator prompt requires every ``skill_manage(action='delete')`` call
|
||||
to pass ``absorbed_into=<umbrella>`` when consolidating, or
|
||||
``absorbed_into=""`` when truly pruning. This is the single authoritative
|
||||
signal for classification — the model's own declaration at the moment of
|
||||
deletion, which beats both post-hoc YAML summary parsing and substring
|
||||
heuristics on other tool calls.
|
||||
|
||||
Returns ``{skill_name: {"into": "<umbrella>" | "", "declared": True}}``.
|
||||
Entries with ``into == ""`` are explicit prunings.
|
||||
Skills without a ``skill_manage(delete)`` call, or with one that omitted
|
||||
``absorbed_into``, are not in the returned dict — caller falls back to
|
||||
the existing heuristic/YAML logic for those (backward compat with older
|
||||
curator runs and any callers that don't populate the arg).
|
||||
"""
|
||||
out: Dict[str, Dict[str, Any]] = {}
|
||||
for tc in tool_calls or []:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
if tc.get("name") != "skill_manage":
|
||||
continue
|
||||
raw = tc.get("arguments") or ""
|
||||
args: Dict[str, Any] = {}
|
||||
if isinstance(raw, dict):
|
||||
args = raw
|
||||
elif isinstance(raw, str):
|
||||
try:
|
||||
args = json.loads(raw)
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(args, dict):
|
||||
continue
|
||||
if args.get("action") != "delete":
|
||||
continue
|
||||
name = args.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
continue
|
||||
# absorbed_into must be present (even empty string is meaningful);
|
||||
# missing key means the model didn't declare intent.
|
||||
if "absorbed_into" not in args:
|
||||
continue
|
||||
target = args.get("absorbed_into")
|
||||
if target is None:
|
||||
continue
|
||||
if not isinstance(target, str):
|
||||
continue
|
||||
out[name.strip()] = {"into": target.strip(), "declared": True}
|
||||
return out
|
||||
|
||||
|
||||
def _reconcile_classification(
|
||||
removed: List[str],
|
||||
heuristic: Dict[str, List[Dict[str, Any]]],
|
||||
model_block: Dict[str, List[Dict[str, str]]],
|
||||
destinations: Set[str],
|
||||
absorbed_declarations: Optional[Dict[str, Dict[str, Any]]] = None,
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""Merge heuristic (tool-call evidence) with the model's structured block.
|
||||
|
||||
Rules:
|
||||
Rules (evaluated in order; first match wins):
|
||||
- **Model-declared `absorbed_into` at delete time is authoritative.** Any
|
||||
entry in ``absorbed_declarations`` beats every other signal. This is
|
||||
the model telling us directly, at the moment of deletion, what it did.
|
||||
``into != ""`` and target exists → consolidated. ``into == ""`` →
|
||||
pruned. ``into != ""`` but target doesn't exist → hallucination; fall
|
||||
through to the usual signals.
|
||||
- Model-declared consolidation wins when its ``into`` target exists
|
||||
in ``destinations`` (survived or newly-created). This gives the
|
||||
model authority over intent + rationale.
|
||||
|
|
@ -666,6 +732,8 @@ def _reconcile_classification(
|
|||
model_cons = {e["from"]: e for e in model_block.get("consolidations", [])}
|
||||
model_pruned = {e["name"]: e for e in model_block.get("prunings", [])}
|
||||
|
||||
declared = absorbed_declarations or {}
|
||||
|
||||
consolidated: List[Dict[str, Any]] = []
|
||||
pruned: List[Dict[str, Any]] = []
|
||||
|
||||
|
|
@ -673,6 +741,36 @@ def _reconcile_classification(
|
|||
mc = model_cons.get(name)
|
||||
mp = model_pruned.get(name)
|
||||
hc = heur_cons.get(name)
|
||||
dec = declared.get(name)
|
||||
|
||||
# Authoritative: model declared `absorbed_into` at the delete call.
|
||||
if dec is not None:
|
||||
into_claim = dec.get("into", "")
|
||||
if into_claim and into_claim in destinations:
|
||||
entry: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"into": into_claim,
|
||||
"source": "absorbed_into (model-declared at delete)",
|
||||
"reason": (mc.get("reason") or "") if mc else "",
|
||||
}
|
||||
if hc and hc.get("evidence"):
|
||||
entry["evidence"] = hc["evidence"]
|
||||
consolidated.append(entry)
|
||||
continue
|
||||
if into_claim == "":
|
||||
# Explicit prune declaration
|
||||
pruned.append({
|
||||
"name": name,
|
||||
"source": "absorbed_into=\"\" (model-declared prune)",
|
||||
"reason": (mp.get("reason") or "") if mp else "",
|
||||
})
|
||||
continue
|
||||
# into_claim is non-empty but target doesn't exist: the model
|
||||
# named a nonexistent umbrella at delete time. The tool already
|
||||
# rejects this at the skill_manage layer, so we shouldn't see it
|
||||
# in practice — but if it slips through (e.g. the umbrella was
|
||||
# deleted LATER in the same run), fall through to the usual
|
||||
# signals rather than trusting a broken reference.
|
||||
|
||||
# Model says consolidated — trust it if the destination is real.
|
||||
if mc and mc.get("into") in destinations:
|
||||
|
|
@ -808,11 +906,20 @@ def _write_run_report(
|
|||
)
|
||||
model_block = _parse_structured_summary(llm_meta.get("final", "") or "")
|
||||
destinations = set(after_names) | set(added or [])
|
||||
# Authoritative signal: extract per-delete `absorbed_into` declarations
|
||||
# from this run's tool calls. These beat both the YAML summary block and
|
||||
# the substring heuristic — the model is telling us directly, at the
|
||||
# moment of deletion, whether each archived skill was consolidated
|
||||
# (into=<umbrella>) or pruned (into="").
|
||||
absorbed_declarations = _extract_absorbed_into_declarations(
|
||||
llm_meta.get("tool_calls", []) or []
|
||||
)
|
||||
classification = _reconcile_classification(
|
||||
removed=removed,
|
||||
heuristic=heuristic,
|
||||
model_block=model_block,
|
||||
destinations=destinations,
|
||||
absorbed_declarations=absorbed_declarations,
|
||||
)
|
||||
consolidated = classification["consolidated"]
|
||||
pruned = classification["pruned"]
|
||||
|
|
|
|||
|
|
@ -21,6 +21,18 @@ It DOES include:
|
|||
pointer — otherwise the curator would immediately re-fire on the next
|
||||
tick)
|
||||
- ``.bundled_manifest`` (so protection markers stay consistent)
|
||||
|
||||
Alongside the skills tarball, each snapshot also captures a copy of
|
||||
``~/.hermes/cron/jobs.json`` as ``cron-jobs.json`` when it exists. Cron
|
||||
jobs reference skills by name in their ``skills``/``skill`` fields; the
|
||||
curator's consolidation pass rewrites those in place via
|
||||
``cron.jobs.rewrite_skill_refs()``. Without capturing the pre-run state,
|
||||
rolling back the skills tree would leave cron jobs pointing at the
|
||||
umbrella skills even though the narrow skills they were originally
|
||||
configured with have been restored. We store the whole jobs.json for
|
||||
fidelity but rollback only touches the ``skills``/``skill`` fields — the
|
||||
rest (schedule, next_run_at, enabled, prompt, etc.) is live state and
|
||||
we leave it alone.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -63,6 +75,60 @@ def _skills_dir() -> Path:
|
|||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _cron_jobs_file() -> Path:
|
||||
"""Source path for the live cron jobs store (``~/.hermes/cron/jobs.json``)."""
|
||||
return get_hermes_home() / "cron" / "jobs.json"
|
||||
|
||||
|
||||
CRON_JOBS_FILENAME = "cron-jobs.json"
|
||||
|
||||
|
||||
def _backup_cron_jobs_into(dest: Path) -> Dict[str, Any]:
|
||||
"""Copy the live cron jobs.json into ``dest`` as ``cron-jobs.json``.
|
||||
|
||||
Returns a small dict describing what was captured so the caller can
|
||||
fold it into the manifest. Never raises — if the cron file is missing
|
||||
or unreadable, the return dict has ``backed_up=False`` and the reason,
|
||||
and the snapshot proceeds without cron data (the snapshot is still
|
||||
useful for rolling back skills).
|
||||
"""
|
||||
src = _cron_jobs_file()
|
||||
info: Dict[str, Any] = {"backed_up": False, "jobs_count": 0}
|
||||
if not src.exists():
|
||||
info["reason"] = "no cron/jobs.json present"
|
||||
return info
|
||||
try:
|
||||
raw = src.read_text(encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read cron/jobs.json for backup: %s", e)
|
||||
info["reason"] = f"read error: {e}"
|
||||
return info
|
||||
# Count jobs as a nice diagnostic — but don't fail the snapshot if the
|
||||
# file is unparseable; just store the raw text and let rollback deal
|
||||
# with it (or not, if it's corrupted). jobs.json wraps the list as
|
||||
# `{"jobs": [...], "updated_at": ...}` — we count via that shape, and
|
||||
# fall back to bare-list shape just in case the format ever changes.
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
inner = parsed.get("jobs")
|
||||
if isinstance(inner, list):
|
||||
info["jobs_count"] = len(inner)
|
||||
elif isinstance(parsed, list):
|
||||
info["jobs_count"] = len(parsed)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
info["jobs_count"] = 0
|
||||
info["parse_warning"] = "jobs.json was not valid JSON at snapshot time"
|
||||
try:
|
||||
(dest / CRON_JOBS_FILENAME).write_text(raw, encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.debug("Failed to write cron backup file: %s", e)
|
||||
info["reason"] = f"write error: {e}"
|
||||
return info
|
||||
info["backed_up"] = True
|
||||
return info
|
||||
|
||||
|
||||
def _utc_id(now: Optional[datetime] = None) -> str:
|
||||
"""UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
|
||||
if now is None:
|
||||
|
|
@ -116,7 +182,8 @@ def _count_skill_files(base: Path) -> int:
|
|||
|
||||
|
||||
def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
||||
skills_counted: int) -> None:
|
||||
skills_counted: int,
|
||||
cron_info: Optional[Dict[str, Any]] = None) -> None:
|
||||
manifest = {
|
||||
"id": dest.name,
|
||||
"reason": reason,
|
||||
|
|
@ -125,6 +192,15 @@ def _write_manifest(dest: Path, reason: str, archive_path: Path,
|
|||
"archive_bytes": archive_path.stat().st_size,
|
||||
"skill_files": skills_counted,
|
||||
}
|
||||
if cron_info is not None:
|
||||
manifest["cron_jobs"] = {
|
||||
"backed_up": bool(cron_info.get("backed_up", False)),
|
||||
"jobs_count": int(cron_info.get("jobs_count", 0)),
|
||||
}
|
||||
if not cron_info.get("backed_up"):
|
||||
manifest["cron_jobs"]["reason"] = cron_info.get("reason", "not captured")
|
||||
if cron_info.get("parse_warning"):
|
||||
manifest["cron_jobs"]["parse_warning"] = cron_info["parse_warning"]
|
||||
(dest / "manifest.json").write_text(
|
||||
json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
|
||||
)
|
||||
|
|
@ -181,7 +257,14 @@ def snapshot_skills(reason: str = "manual") -> Optional[Path]:
|
|||
# arcname: store paths relative to skills/ so extraction
|
||||
# drops cleanly back into the skills dir.
|
||||
tf.add(str(entry), arcname=entry.name, recursive=True)
|
||||
_write_manifest(dest, reason, archive, _count_skill_files(skills))
|
||||
# Capture cron/jobs.json alongside the tarball. Never fails the
|
||||
# snapshot — the skills side is the core guarantee; cron is
|
||||
# additive. We still record in the manifest whether it was
|
||||
# captured so rollback can surface "no cron data in this snapshot".
|
||||
cron_info = _backup_cron_jobs_into(dest)
|
||||
_write_manifest(dest, reason, archive,
|
||||
_count_skill_files(skills),
|
||||
cron_info=cron_info)
|
||||
except (OSError, tarfile.TarError) as e:
|
||||
logger.debug("Curator snapshot failed: %s", e, exc_info=True)
|
||||
# Clean up partial snapshot
|
||||
|
|
@ -298,6 +381,149 @@ def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
|
|||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _restore_cron_skill_links(snapshot_dir: Path) -> Dict[str, Any]:
|
||||
"""Reconcile backed-up cron skill links into the live ``cron/jobs.json``.
|
||||
|
||||
We do NOT overwrite the whole cron file. Only the ``skills`` and
|
||||
``skill`` fields are restored, and only on jobs that still exist in the
|
||||
current file (matched by ``id``). Everything else about the job —
|
||||
schedule, next_run_at, last_run_at, enabled, prompt, workdir, hooks —
|
||||
is live state that the user/scheduler has modified since the snapshot;
|
||||
overwriting it would regress unrelated cron activity.
|
||||
|
||||
Rules:
|
||||
- Jobs present in backup AND live, with differing skills → skills restored.
|
||||
- Jobs present in backup AND live, with matching skills → no-op.
|
||||
- Jobs present in backup but gone from live (user deleted the job
|
||||
after the snapshot) → skipped, noted in the return report.
|
||||
- Jobs present in live but not in backup (user created a new cron
|
||||
job after the snapshot) → left untouched.
|
||||
|
||||
Never raises; failures are captured in the return dict. Writes through
|
||||
``cron.jobs`` to pick up the same lock + atomic-write path that tick()
|
||||
uses, so we don't race the scheduler.
|
||||
"""
|
||||
report: Dict[str, Any] = {
|
||||
"attempted": False,
|
||||
"restored": [],
|
||||
"skipped_missing": [],
|
||||
"unchanged": 0,
|
||||
"error": None,
|
||||
}
|
||||
backup_file = snapshot_dir / CRON_JOBS_FILENAME
|
||||
if not backup_file.exists():
|
||||
report["error"] = f"snapshot has no {CRON_JOBS_FILENAME}"
|
||||
return report
|
||||
|
||||
try:
|
||||
backup_text = backup_file.read_text(encoding="utf-8")
|
||||
backup_parsed = json.loads(backup_text)
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
report["error"] = f"failed to load backed-up jobs: {e}"
|
||||
return report
|
||||
# jobs.json on disk is `{"jobs": [...], "updated_at": ...}`; accept both
|
||||
# that shape and a bare list for forward compat.
|
||||
if isinstance(backup_parsed, dict):
|
||||
backup_jobs = backup_parsed.get("jobs")
|
||||
elif isinstance(backup_parsed, list):
|
||||
backup_jobs = backup_parsed
|
||||
else:
|
||||
backup_jobs = None
|
||||
if not isinstance(backup_jobs, list):
|
||||
report["error"] = "backed-up cron-jobs.json has no jobs list"
|
||||
return report
|
||||
|
||||
# Build a lookup of the backed-up skill state keyed by job id.
|
||||
# We only need the two skill-ish fields (legacy single and modern list).
|
||||
backup_by_id: Dict[str, Dict[str, Any]] = {}
|
||||
for job in backup_jobs:
|
||||
if not isinstance(job, dict):
|
||||
continue
|
||||
jid = job.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
backup_by_id[jid] = {
|
||||
"skills": job.get("skills"),
|
||||
"skill": job.get("skill"),
|
||||
"name": job.get("name") or jid,
|
||||
}
|
||||
|
||||
if not backup_by_id:
|
||||
report["attempted"] = True # we tried but there was nothing to do
|
||||
return report
|
||||
|
||||
# Load and rewrite the live jobs under the scheduler's lock.
|
||||
try:
|
||||
from cron.jobs import load_jobs, save_jobs, _jobs_file_lock
|
||||
except ImportError as e:
|
||||
report["error"] = f"cron module unavailable: {e}"
|
||||
return report
|
||||
|
||||
report["attempted"] = True
|
||||
try:
|
||||
with _jobs_file_lock:
|
||||
live_jobs = load_jobs()
|
||||
changed = False
|
||||
|
||||
live_ids = set()
|
||||
for live in live_jobs:
|
||||
if not isinstance(live, dict):
|
||||
continue
|
||||
jid = live.get("id")
|
||||
if not isinstance(jid, str) or not jid:
|
||||
continue
|
||||
live_ids.add(jid)
|
||||
|
||||
backup = backup_by_id.get(jid)
|
||||
if backup is None:
|
||||
continue # live job didn't exist at snapshot time
|
||||
|
||||
cur_skills = live.get("skills")
|
||||
cur_skill = live.get("skill")
|
||||
bkp_skills = backup.get("skills")
|
||||
bkp_skill = backup.get("skill")
|
||||
|
||||
if cur_skills == bkp_skills and cur_skill == bkp_skill:
|
||||
report["unchanged"] += 1
|
||||
continue
|
||||
|
||||
# Restore. Preserve absence (don't force the key to appear
|
||||
# if the backup didn't have it either).
|
||||
if bkp_skills is None:
|
||||
live.pop("skills", None)
|
||||
else:
|
||||
live["skills"] = bkp_skills
|
||||
if bkp_skill is None:
|
||||
live.pop("skill", None)
|
||||
else:
|
||||
live["skill"] = bkp_skill
|
||||
|
||||
report["restored"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
"from": {"skills": cur_skills, "skill": cur_skill},
|
||||
"to": {"skills": bkp_skills, "skill": bkp_skill},
|
||||
})
|
||||
changed = True
|
||||
|
||||
# Jobs in backup but not in live = user deleted them after snapshot
|
||||
for jid, backup in backup_by_id.items():
|
||||
if jid not in live_ids:
|
||||
report["skipped_missing"].append({
|
||||
"job_id": jid,
|
||||
"job_name": backup.get("name") or jid,
|
||||
})
|
||||
|
||||
if changed:
|
||||
save_jobs(live_jobs)
|
||||
except Exception as e: # noqa: BLE001 — rollback must not die mid-restore
|
||||
logger.debug("Cron skill-link restore failed: %s", e, exc_info=True)
|
||||
report["error"] = f"restore failed mid-flight: {e}"
|
||||
|
||||
return report
|
||||
|
||||
|
||||
|
||||
def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
|
||||
"""Restore ``~/.hermes/skills/`` from a snapshot.
|
||||
|
||||
|
|
@ -408,8 +634,35 @@ def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]
|
|||
except OSError:
|
||||
pass
|
||||
|
||||
logger.info("Curator rollback: restored from %s", target.name)
|
||||
return (True, f"restored from snapshot {target.name}", target)
|
||||
# Reconcile cron skill-links. Surgical: only the skills/skill fields
|
||||
# on jobs matched by id. Everything else in jobs.json is live state
|
||||
# (schedule, next_run_at, enabled, prompt, etc.) and we leave it
|
||||
# alone. Failures here don't fail the overall rollback — the skills
|
||||
# tree is already restored, which is the main guarantee.
|
||||
cron_report = _restore_cron_skill_links(target)
|
||||
|
||||
summary_bits = [f"restored from snapshot {target.name}"]
|
||||
if cron_report.get("attempted"):
|
||||
restored_n = len(cron_report.get("restored") or [])
|
||||
skipped_n = len(cron_report.get("skipped_missing") or [])
|
||||
if cron_report.get("error"):
|
||||
summary_bits.append(f"cron links: error — {cron_report['error']}")
|
||||
elif restored_n == 0 and skipped_n == 0 and cron_report.get("unchanged", 0) == 0:
|
||||
# Attempted but nothing matched — empty snapshot or no overlapping ids.
|
||||
pass
|
||||
else:
|
||||
parts = []
|
||||
if restored_n:
|
||||
parts.append(f"{restored_n} job(s) had skill links restored")
|
||||
if skipped_n:
|
||||
parts.append(f"{skipped_n} backed-up job(s) no longer exist (skipped)")
|
||||
if cron_report.get("unchanged"):
|
||||
parts.append(f"{cron_report['unchanged']} already matched")
|
||||
summary_bits.append("cron links: " + ", ".join(parts))
|
||||
|
||||
logger.info("Curator rollback: restored from %s (cron_report=%s)",
|
||||
target.name, cron_report)
|
||||
return (True, "; ".join(summary_bits), target)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ can invoke skills via /skill-name commands.
|
|||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
|
@ -20,10 +21,35 @@ from agent.skill_preprocessing import (
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_skill_commands_platform: Optional[str] = None
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
|
||||
def _resolve_skill_commands_platform() -> Optional[str]:
|
||||
"""Return the current platform scope used for disabled-skill filtering.
|
||||
|
||||
Used to detect when the active platform has shifted so
|
||||
:func:`get_skill_commands` can drop a stale cache that was populated
|
||||
for a different platform's ``skills.platform_disabled`` view (#14536).
|
||||
|
||||
Resolves from (in order) ``HERMES_PLATFORM`` env var and
|
||||
``HERMES_SESSION_PLATFORM`` from the gateway session context. Returns
|
||||
``None`` when no platform scope is active (e.g. classic CLI, RL
|
||||
rollouts, standalone scripts).
|
||||
"""
|
||||
try:
|
||||
from gateway.session_context import get_session_env
|
||||
|
||||
resolved_platform = (
|
||||
os.getenv("HERMES_PLATFORM")
|
||||
or get_session_env("HERMES_SESSION_PLATFORM")
|
||||
)
|
||||
except Exception:
|
||||
resolved_platform = os.getenv("HERMES_PLATFORM")
|
||||
return resolved_platform or None
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
|
|
@ -218,7 +244,8 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
|||
Returns:
|
||||
Dict mapping "/skill-name" to {name, description, skill_md_path, skill_dir}.
|
||||
"""
|
||||
global _skill_commands
|
||||
global _skill_commands, _skill_commands_platform
|
||||
_skill_commands_platform = _resolve_skill_commands_platform()
|
||||
_skill_commands = {}
|
||||
try:
|
||||
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
|
||||
|
|
@ -278,8 +305,16 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
|
|||
|
||||
|
||||
def get_skill_commands() -> Dict[str, Dict[str, Any]]:
|
||||
"""Return the current skill commands mapping (scan first if empty)."""
|
||||
if not _skill_commands:
|
||||
"""Return the current skill commands mapping (scan first if empty).
|
||||
|
||||
Rescans when the active platform scope changes (e.g. a gateway
|
||||
process serving Telegram and Discord concurrently) so each platform
|
||||
sees its own ``skills.platform_disabled`` view (#14536).
|
||||
"""
|
||||
if (
|
||||
not _skill_commands
|
||||
or _skill_commands_platform != _resolve_skill_commands_platform()
|
||||
):
|
||||
scan_skill_commands()
|
||||
return _skill_commands
|
||||
|
||||
|
|
|
|||
|
|
@ -121,6 +121,18 @@ model:
|
|||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Response Caching (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
# Cache identical API responses at the OpenRouter edge for free instant replays.
|
||||
# When enabled, identical requests (same model, messages, parameters) return
|
||||
# cached responses with zero billing. Separate from Anthropic prompt caching.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
#
|
||||
# openrouter:
|
||||
# response_cache: true # Enable response caching (default: true)
|
||||
# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300)
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
|
|
|
|||
11
cli.py
11
cli.py
|
|
@ -2928,7 +2928,14 @@ class HermesCLI:
|
|||
|
||||
def _expand_ref(match):
|
||||
path = Path(match.group(1))
|
||||
return path.read_text(encoding="utf-8") if path.exists() else match.group(0)
|
||||
# Use try/except instead of path.exists() to avoid TOCTOU race:
|
||||
# the paste file may be deleted between check and read, causing
|
||||
# the input to be silently dropped (#17666).
|
||||
try:
|
||||
return path.read_text(encoding="utf-8")
|
||||
except (OSError, IOError):
|
||||
logger.warning("Paste file gone or unreadable, returning placeholder: %s", path)
|
||||
return match.group(0)
|
||||
|
||||
return paste_ref_re.sub(_expand_ref, text)
|
||||
|
||||
|
|
@ -11584,7 +11591,7 @@ class HermesCLI:
|
|||
pass # Non-fatal — don't break the main loop
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
logger.warning("process_loop unhandled error (msg may be lost): %s", e)
|
||||
|
||||
# Start processing thread
|
||||
process_thread = threading.Thread(target=process_loop, daemon=True)
|
||||
|
|
|
|||
|
|
@ -123,9 +123,19 @@ _LOCK_FILE = _LOCK_DIR / ".tick.lock"
|
|||
|
||||
|
||||
def _resolve_origin(job: dict) -> Optional[dict]:
|
||||
"""Extract origin info from a job, preserving any extra routing metadata."""
|
||||
"""Extract origin info from a job, preserving any extra routing metadata.
|
||||
|
||||
Treats non-dict origins (free-form provenance strings, ints, lists from
|
||||
migration scripts or hand-edited jobs.json) as missing instead of
|
||||
crashing with ``AttributeError`` on ``origin.get(...)``. Without this
|
||||
guard, a job tagged with e.g. ``"combined-digest-replaces-x-and-y"``
|
||||
crashed every fire attempt with
|
||||
``'str' object has no attribute 'get'`` — ``mark_job_run`` recorded the
|
||||
failure, but the next tick re-loaded the same poisoned origin and
|
||||
crashed identically until the field was patched manually (#18722).
|
||||
"""
|
||||
origin = job.get("origin")
|
||||
if not origin:
|
||||
if not isinstance(origin, dict):
|
||||
return None
|
||||
platform = origin.get("platform")
|
||||
chat_id = origin.get("chat_id")
|
||||
|
|
@ -147,6 +157,19 @@ def _get_home_target_chat_id(platform_name: str) -> str:
|
|||
return value
|
||||
|
||||
|
||||
def _get_home_target_thread_id(platform_name: str) -> Optional[str]:
|
||||
"""Return the optional thread/topic ID for a platform home target."""
|
||||
env_var = _HOME_TARGET_ENV_VARS.get(platform_name.lower())
|
||||
if not env_var:
|
||||
return None
|
||||
value = os.getenv(f"{env_var}_THREAD_ID", "").strip()
|
||||
if not value:
|
||||
legacy = _LEGACY_HOME_TARGET_ENV_VARS.get(env_var)
|
||||
if legacy:
|
||||
value = os.getenv(f"{legacy}_THREAD_ID", "").strip()
|
||||
return value or None
|
||||
|
||||
|
||||
def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[dict]:
|
||||
"""Resolve one concrete auto-delivery target for a cron job."""
|
||||
|
||||
|
|
@ -175,7 +198,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
|||
return {
|
||||
"platform": platform_name,
|
||||
"chat_id": chat_id,
|
||||
"thread_id": None,
|
||||
"thread_id": _get_home_target_thread_id(platform_name),
|
||||
}
|
||||
return None
|
||||
|
||||
|
|
@ -229,7 +252,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
|||
return {
|
||||
"platform": platform_name,
|
||||
"chat_id": chat_id,
|
||||
"thread_id": None,
|
||||
"thread_id": _get_home_target_thread_id(platform_name),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -186,18 +186,24 @@ class HomeChannel:
|
|||
Default destination for a platform.
|
||||
|
||||
When a cron job specifies deliver="telegram" without a specific chat ID,
|
||||
messages are sent to this home channel.
|
||||
messages are sent to this home channel. Thread-aware platforms may also
|
||||
store a thread/topic ID so the bare platform target routes to the exact
|
||||
conversation where /sethome was run.
|
||||
"""
|
||||
platform: Platform
|
||||
chat_id: str
|
||||
name: str # Human-readable name for display
|
||||
thread_id: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
result = {
|
||||
"platform": self.platform.value,
|
||||
"chat_id": self.chat_id,
|
||||
"name": self.name,
|
||||
}
|
||||
if self.thread_id:
|
||||
result["thread_id"] = self.thread_id
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "HomeChannel":
|
||||
|
|
@ -205,6 +211,7 @@ class HomeChannel:
|
|||
platform=Platform(data["platform"]),
|
||||
chat_id=str(data["chat_id"]),
|
||||
name=data.get("name", "Home"),
|
||||
thread_id=str(data["thread_id"]) if data.get("thread_id") else None,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1071,6 +1078,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.TELEGRAM,
|
||||
chat_id=telegram_home,
|
||||
name=os.getenv("TELEGRAM_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("TELEGRAM_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Discord
|
||||
|
|
@ -1087,6 +1095,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.DISCORD,
|
||||
chat_id=discord_home,
|
||||
name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("DISCORD_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Reply threading mode for Discord (off/first/all)
|
||||
|
|
@ -1108,6 +1117,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.WHATSAPP,
|
||||
chat_id=whatsapp_home,
|
||||
name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Slack
|
||||
|
|
@ -1135,6 +1145,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.SLACK,
|
||||
chat_id=slack_home,
|
||||
name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""),
|
||||
thread_id=os.getenv("SLACK_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Signal
|
||||
|
|
@ -1155,6 +1166,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.SIGNAL,
|
||||
chat_id=signal_home,
|
||||
name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("SIGNAL_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Mattermost
|
||||
|
|
@ -1174,6 +1186,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.MATTERMOST,
|
||||
chat_id=mattermost_home,
|
||||
name=os.getenv("MATTERMOST_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("MATTERMOST_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Matrix
|
||||
|
|
@ -1205,6 +1218,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.MATRIX,
|
||||
chat_id=matrix_home,
|
||||
name=os.getenv("MATRIX_HOME_ROOM_NAME", "Home"),
|
||||
thread_id=os.getenv("MATRIX_HOME_ROOM_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Home Assistant
|
||||
|
|
@ -1238,6 +1252,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.EMAIL,
|
||||
chat_id=email_home,
|
||||
name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
|
||||
thread_id=os.getenv("EMAIL_HOME_ADDRESS_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# SMS (Twilio)
|
||||
|
|
@ -1253,6 +1268,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.SMS,
|
||||
chat_id=sms_home,
|
||||
name=os.getenv("SMS_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("SMS_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# API Server
|
||||
|
|
@ -1315,6 +1331,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.DINGTALK,
|
||||
chat_id=dingtalk_home,
|
||||
name=os.getenv("DINGTALK_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("DINGTALK_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Feishu / Lark
|
||||
|
|
@ -1342,6 +1359,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.FEISHU,
|
||||
chat_id=feishu_home,
|
||||
name=os.getenv("FEISHU_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("FEISHU_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WeCom (Enterprise WeChat)
|
||||
|
|
@ -1364,6 +1382,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.WECOM,
|
||||
chat_id=wecom_home,
|
||||
name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WECOM_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WeCom callback mode (self-built apps)
|
||||
|
|
@ -1422,6 +1441,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.WEIXIN,
|
||||
chat_id=weixin_home,
|
||||
name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WEIXIN_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# BlueBubbles (iMessage)
|
||||
|
|
@ -1445,6 +1465,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id=bluebubbles_home,
|
||||
name=os.getenv("BLUEBUBBLES_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("BLUEBUBBLES_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# QQ (Official Bot API v2)
|
||||
|
|
@ -1482,6 +1503,11 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.QQBOT,
|
||||
chat_id=qq_home,
|
||||
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
|
||||
thread_id=(
|
||||
os.getenv("QQBOT_HOME_CHANNEL_THREAD_ID")
|
||||
or os.getenv("QQ_HOME_CHANNEL_THREAD_ID")
|
||||
or None
|
||||
),
|
||||
)
|
||||
|
||||
# Yuanbao — YUANBAO_APP_ID preferred
|
||||
|
|
@ -1512,6 +1538,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
|||
platform=Platform.YUANBAO,
|
||||
chat_id=yuanbao_home,
|
||||
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("YUANBAO_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
|
||||
if yuanbao_dm_policy:
|
||||
|
|
|
|||
84
gateway/platforms/_http_client_limits.py
Normal file
84
gateway/platforms/_http_client_limits.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
"""Shared HTTP client factory for long-lived platform adapters.
|
||||
|
||||
Gateway messaging platforms (QQ Bot, Feishu, WeCom, DingTalk, Signal,
|
||||
BlueBubbles, WeCom-callback) keep a persistent ``httpx.AsyncClient``
|
||||
alive for the adapter's lifetime. That amortises TLS/connection setup
|
||||
across many API calls, but it also means the process's file-descriptor
|
||||
pressure is sensitive to how aggressively the pool recycles idle keep-
|
||||
alive connections.
|
||||
|
||||
httpx's default ``keepalive_expiry`` is 5 seconds. On macOS behind
|
||||
Cloudflare Warp (and other transparent proxies), peer-initiated FIN can
|
||||
sit in ``CLOSE_WAIT`` longer than that before the local socket actually
|
||||
drains — which, multiplied across 7 long-lived adapters plus the LLM
|
||||
client and MCP clients, walks straight into the default 256 fd limit.
|
||||
See #18451.
|
||||
|
||||
``platform_httpx_limits()`` returns a tighter ``httpx.Limits`` the
|
||||
adapter factories use instead of the httpx default. The values chosen:
|
||||
|
||||
* ``max_keepalive_connections=10`` — plenty for any single adapter;
|
||||
platform APIs rarely parallelise beyond this.
|
||||
* ``keepalive_expiry=2.0`` — close idle sockets aggressively so a
|
||||
proxy's lingering CLOSE_WAIT window can't starve the process.
|
||||
|
||||
Override via ``HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY`` /
|
||||
``HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE`` env vars when tuning under load.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError: # pragma: no cover — optional dep
|
||||
httpx = None # type: ignore[assignment]
|
||||
|
||||
|
||||
_DEFAULT_KEEPALIVE_EXPIRY_S = 2.0
|
||||
_DEFAULT_MAX_KEEPALIVE = 10
|
||||
|
||||
|
||||
def platform_httpx_limits() -> "httpx.Limits | None":
|
||||
"""Return ``httpx.Limits`` tuned for persistent platform-adapter clients.
|
||||
|
||||
Returns ``None`` when httpx isn't importable, so callers can fall
|
||||
back to httpx's built-in default without a hard dependency on this
|
||||
helper being reachable.
|
||||
"""
|
||||
if httpx is None:
|
||||
return None
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = os.environ.get(name, "").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
val = int(raw)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
return val if val > 0 else default
|
||||
|
||||
keepalive_expiry = _env_float(
|
||||
"HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", _DEFAULT_KEEPALIVE_EXPIRY_S
|
||||
)
|
||||
max_keepalive = _env_int(
|
||||
"HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", _DEFAULT_MAX_KEEPALIVE
|
||||
)
|
||||
|
||||
return httpx.Limits(
|
||||
max_keepalive_connections=max_keepalive,
|
||||
# Leave max_connections at httpx default (100) — plenty of headroom.
|
||||
keepalive_expiry=keepalive_expiry,
|
||||
)
|
||||
|
|
@ -2489,15 +2489,20 @@ class BasePlatformAdapter(ABC):
|
|||
|
||||
try:
|
||||
response = await self._message_handler(event)
|
||||
# Old adapter task (if any) is cancelled AFTER the runner has
|
||||
# fully handled the command — keeps ordering deterministic.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
_text, _eph_ttl = self._unwrap_ephemeral(response)
|
||||
# Send the response BEFORE cancelling the old task so the send
|
||||
# cannot be affected by task-cancellation side effects (race
|
||||
# condition fix — issue #18912). Previously the send happened
|
||||
# after cancel_session_processing, which could silently drop the
|
||||
# "/new" confirmation when an agent was actively running.
|
||||
if _text:
|
||||
logger.info(
|
||||
"[%s] Sending command '/%s' response (%d chars) to %s",
|
||||
self.name,
|
||||
cmd,
|
||||
len(_text),
|
||||
event.source.chat_id,
|
||||
)
|
||||
_r = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=_text,
|
||||
|
|
@ -2510,6 +2515,13 @@ class BasePlatformAdapter(ABC):
|
|||
message_id=_r.message_id,
|
||||
ttl_seconds=_eph_ttl,
|
||||
)
|
||||
# Old adapter task (if any) is cancelled AFTER the response has
|
||||
# been sent — keeps ordering deterministic and avoids the race.
|
||||
await self.cancel_session_processing(
|
||||
session_key,
|
||||
release_guard=False,
|
||||
discard_pending=False,
|
||||
)
|
||||
except Exception:
|
||||
# On failure, restore the original guard if one still exists so
|
||||
# we don't leave the session in a half-reset state.
|
||||
|
|
|
|||
|
|
@ -162,7 +162,9 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
|||
return False
|
||||
from aiohttp import web
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
|
||||
try:
|
||||
await self._api_get("/api/v1/ping")
|
||||
info = await self._api_get("/api/v1/server/info")
|
||||
|
|
|
|||
|
|
@ -228,7 +228,11 @@ class DingTalkAdapter(BasePlatformAdapter):
|
|||
return False
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0, limits=platform_httpx_limits(),
|
||||
)
|
||||
|
||||
credential = dingtalk_stream.Credential(
|
||||
self._client_id, self._client_secret
|
||||
|
|
|
|||
|
|
@ -497,6 +497,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering
|
||||
self.gateway_runner = None # Set by gateway/run.py for cross-platform delivery
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave
|
||||
|
|
@ -613,6 +614,21 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# so LLM output or echoed user content can't ping the whole
|
||||
# server; override per DISCORD_ALLOW_MENTION_* env vars or the
|
||||
# discord.allow_mentions.* block in config.yaml.
|
||||
|
||||
# Close any existing client to prevent zombie websocket connections
|
||||
# on reconnect (see #18187). Without this, the old client remains
|
||||
# connected to Discord gateway and both fire on_message, causing
|
||||
# double responses.
|
||||
if self._client is not None:
|
||||
try:
|
||||
if not self._client.is_closed():
|
||||
await self._client.close()
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to close previous Discord client", self.name)
|
||||
finally:
|
||||
self._client = None
|
||||
self._ready_event.clear()
|
||||
|
||||
self._client = commands.Bot(
|
||||
command_prefix="!", # Not really used, we handle raw messages
|
||||
intents=intents,
|
||||
|
|
@ -1914,6 +1930,225 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
return True
|
||||
return False
|
||||
|
||||
# ── Slash command authorization ─────────────────────────────────────
|
||||
# Slash commands (``_run_simple_slash`` and ``_handle_thread_create_slash``)
|
||||
# are a separate Discord interaction surface from regular messages and
|
||||
# historically ran with NO authorization check — bypassing every gate
|
||||
# ``on_message`` enforces (DISCORD_ALLOWED_USERS, DISCORD_ALLOWED_ROLES,
|
||||
# DISCORD_ALLOWED_CHANNELS, DISCORD_IGNORED_CHANNELS). Any guild member
|
||||
# could invoke ``/background``, ``/restart``, ``/sethome``, etc. as the
|
||||
# operator. ``_check_slash_authorization`` mirrors the on_message gates
|
||||
# one-for-one so the slash surface honors the same trust boundary.
|
||||
#
|
||||
# By design, this is a no-op for deployments with no allowlist env vars
|
||||
# set — ``_is_allowed_user`` returns True and the channel checks early-out
|
||||
# — preserving the existing "single-tenant, all guild members trusted"
|
||||
# default. Deployments that DO set any DISCORD_ALLOWED_* var get slash
|
||||
# parity with on_message.
|
||||
|
||||
def _evaluate_slash_authorization(
|
||||
self, interaction: "discord.Interaction",
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""Evaluate slash authorization without producing any response.
|
||||
|
||||
Returns ``(allowed, reason)``. ``reason`` is populated only when
|
||||
``allowed`` is False. This is the shared core used by both the
|
||||
responding wrapper (``_check_slash_authorization``) and side-effect-
|
||||
free callers like the ``/skill`` autocomplete callback, which must
|
||||
return an empty list for unauthorized users instead of leaking an
|
||||
ephemeral rejection per-keystroke.
|
||||
|
||||
Fail-closed semantics for malformed payloads: when an allowlist is
|
||||
configured but the interaction is missing the data needed to
|
||||
evaluate it (no channel id with channel policy active, no user
|
||||
with user/role policy active), the gate REJECTS rather than
|
||||
falling through. Without these guards a guild interaction that
|
||||
happens to deserialize without a channel id would silently bypass
|
||||
``DISCORD_ALLOWED_CHANNELS`` and a payload missing ``user`` would
|
||||
raise ``AttributeError`` in the user check below, surfacing as
|
||||
an opaque interaction failure rather than a clean rejection.
|
||||
"""
|
||||
chan_obj = getattr(interaction, "channel", None)
|
||||
in_dm = isinstance(chan_obj, discord.DMChannel) if chan_obj is not None else False
|
||||
|
||||
# ── Channel scope (mirrors on_message lines 3374-3388) ──
|
||||
# DMs aren't channel-gated — DMs follow on_message's DM lockdown
|
||||
# path which has its own user-allowlist enforcement.
|
||||
if not in_dm:
|
||||
chan_id_raw = getattr(interaction, "channel_id", None) or getattr(
|
||||
chan_obj, "id", None,
|
||||
)
|
||||
channel_ids: set = set()
|
||||
if chan_id_raw is not None:
|
||||
channel_ids.add(str(chan_id_raw))
|
||||
# Mirror on_message: also test the parent channel for threads
|
||||
# so per-channel allow/deny lists work consistently.
|
||||
if isinstance(chan_obj, discord.Thread):
|
||||
parent_id = self._get_parent_channel_id(chan_obj)
|
||||
if parent_id:
|
||||
channel_ids.add(str(parent_id))
|
||||
|
||||
allowed_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_raw:
|
||||
allowed = {c.strip() for c in allowed_raw.split(",") if c.strip()}
|
||||
if "*" not in allowed:
|
||||
if not channel_ids:
|
||||
# Channel policy is configured but the interaction
|
||||
# has no resolvable channel id. Fail closed.
|
||||
return (
|
||||
False,
|
||||
"channel id missing with DISCORD_ALLOWED_CHANNELS configured",
|
||||
)
|
||||
if not (channel_ids & allowed):
|
||||
return (False, "channel not in DISCORD_ALLOWED_CHANNELS")
|
||||
|
||||
# Ignored beats allowed: even when a thread's parent channel
|
||||
# is on the allowlist, an explicit DISCORD_IGNORED_CHANNELS
|
||||
# entry on the thread or its parent rejects the interaction.
|
||||
ignored_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
if ignored_raw and channel_ids:
|
||||
ignored = {c.strip() for c in ignored_raw.split(",") if c.strip()}
|
||||
if "*" in ignored or (channel_ids & ignored):
|
||||
return (False, "channel in DISCORD_IGNORED_CHANNELS")
|
||||
|
||||
# ── User / role allowlist (mirrors on_message line 681) ──
|
||||
user = getattr(interaction, "user", None)
|
||||
allowed_users = getattr(self, "_allowed_user_ids", set()) or set()
|
||||
allowed_roles = getattr(self, "_allowed_role_ids", set()) or set()
|
||||
if user is None or getattr(user, "id", None) is None:
|
||||
# No identifiable user. With any user/role allowlist
|
||||
# configured, fail closed rather than raise AttributeError
|
||||
# on ``interaction.user.id`` below. With no allowlist this
|
||||
# is the existing "no allowlist = everyone" backwards-compat.
|
||||
if allowed_users or allowed_roles:
|
||||
return (False, "missing interaction.user with allowlist configured")
|
||||
return (True, None)
|
||||
|
||||
user_id = str(user.id)
|
||||
if not self._is_allowed_user(user_id, author=user):
|
||||
return (
|
||||
False,
|
||||
"user not in DISCORD_ALLOWED_USERS / DISCORD_ALLOWED_ROLES",
|
||||
)
|
||||
|
||||
return (True, None)
|
||||
|
||||
async def _check_slash_authorization(
|
||||
self, interaction: "discord.Interaction", command_text: str,
|
||||
) -> bool:
|
||||
"""Mirror on_message's user/role/channel gates onto a slash invocation.
|
||||
|
||||
Returns True to proceed. Returns False *after* sending an ephemeral
|
||||
rejection, logging a warning, and scheduling a cross-platform admin
|
||||
alert — the caller must stop on False (the interaction has already
|
||||
been responded to).
|
||||
"""
|
||||
allowed, reason = self._evaluate_slash_authorization(interaction)
|
||||
if allowed:
|
||||
return True
|
||||
return await self._reject_slash(
|
||||
interaction, command_text, reason=reason or "unauthorized",
|
||||
)
|
||||
|
||||
async def _reject_slash(
|
||||
self, interaction: "discord.Interaction", command_text: str, *, reason: str,
|
||||
) -> bool:
|
||||
"""Send ephemeral reject + log warning + schedule admin alert. Returns False.
|
||||
|
||||
Tolerates a missing ``interaction.user`` -- the fail-closed branch
|
||||
in ``_evaluate_slash_authorization`` deliberately routes here for
|
||||
malformed payloads (no user) when an allowlist is configured, and
|
||||
``str(interaction.user.id)`` would raise AttributeError before the
|
||||
ephemeral rejection could be sent.
|
||||
"""
|
||||
user = getattr(interaction, "user", None)
|
||||
if user is not None:
|
||||
user_id = str(getattr(user, "id", "?"))
|
||||
user_name = getattr(user, "name", "?")
|
||||
else:
|
||||
user_id = "?"
|
||||
user_name = "?"
|
||||
chan_id = getattr(interaction, "channel_id", None) or getattr(
|
||||
getattr(interaction, "channel", None), "id", None,
|
||||
)
|
||||
guild_id = getattr(interaction, "guild_id", None)
|
||||
|
||||
logger.warning(
|
||||
"[Discord] Unauthorized slash attempt: user=%s id=%s channel=%s "
|
||||
"guild=%s cmd=%r reason=%r",
|
||||
user_name, user_id, chan_id, guild_id, command_text, reason,
|
||||
)
|
||||
|
||||
try:
|
||||
await interaction.response.send_message(
|
||||
"You're not authorized to use this command.",
|
||||
ephemeral=True,
|
||||
)
|
||||
except Exception as e:
|
||||
# Interaction may already be responded to (e.g. caller deferred
|
||||
# before the auth check, or Discord retried). Best-effort only.
|
||||
logger.debug("[Discord] Could not send unauthorized ephemeral: %s", e)
|
||||
|
||||
# Fire-and-forget: don't block the interaction handler on Telegram I/O.
|
||||
try:
|
||||
asyncio.create_task(self._notify_unauthorized_slash(
|
||||
user_name, user_id, chan_id, guild_id, command_text, reason,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.debug("[Discord] Could not schedule admin notify task: %s", e)
|
||||
|
||||
return False
|
||||
|
||||
async def _notify_unauthorized_slash(
|
||||
self, user_name: str, user_id: str, chan_id, guild_id,
|
||||
command_text: str, reason: str,
|
||||
) -> None:
|
||||
"""Best-effort cross-platform alert to the gateway operator.
|
||||
|
||||
Tries TELEGRAM first (most operators set TELEGRAM_HOME_CHANNEL),
|
||||
then SLACK. Silently no-ops if no other platform is configured
|
||||
with a home channel.
|
||||
|
||||
A soft send failure -- adapter.send() returning a result with
|
||||
``success=False`` rather than raising -- continues the fallback
|
||||
chain. Treating a SendResult(success=False) as delivered would
|
||||
mean a Telegram outage that the adapter politely surfaces (e.g.
|
||||
rate-limit, auth failure) silently swallows the alert without
|
||||
attempting Slack. Hard exceptions still take the same path via
|
||||
the except branch below.
|
||||
"""
|
||||
runner = getattr(self, "gateway_runner", None)
|
||||
if not runner:
|
||||
return
|
||||
for target in (Platform.TELEGRAM, Platform.SLACK):
|
||||
try:
|
||||
adapter = runner.adapters.get(target)
|
||||
if not adapter:
|
||||
continue
|
||||
home = runner.config.get_home_channel(target)
|
||||
if not home or not getattr(home, "chat_id", None):
|
||||
continue
|
||||
msg = (
|
||||
"⚠️ Unauthorized Discord slash attempt\n"
|
||||
f"User: {user_name} ({user_id})\n"
|
||||
f"Channel: {chan_id} (guild {guild_id})\n"
|
||||
f"Command: {command_text}\n"
|
||||
f"Reason: {reason}"
|
||||
)
|
||||
result = await adapter.send(str(home.chat_id), msg)
|
||||
# Only return on confirmed delivery. SendResult(success=False)
|
||||
# -> continue to the next platform.
|
||||
if getattr(result, "success", None) is False:
|
||||
logger.debug(
|
||||
"[Discord] Admin notify via %s returned success=False"
|
||||
" (error=%r); falling through",
|
||||
target, getattr(result, "error", None),
|
||||
)
|
||||
continue
|
||||
return
|
||||
except Exception as e:
|
||||
logger.debug("[Discord] Admin notify via %s failed: %s", target, e)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -2301,6 +2536,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
except Exception:
|
||||
pass # logging must never block command dispatch
|
||||
|
||||
# Auth gate — must run before defer() so an ephemeral rejection can
|
||||
# be delivered on the still-unresponded interaction.
|
||||
if not await self._check_slash_authorization(interaction, command_text):
|
||||
return
|
||||
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
event = self._build_slash_event(interaction, command_text)
|
||||
await self.handle_message(event)
|
||||
|
|
@ -2445,7 +2685,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
message: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
# defer() is performed inside the handler *after* the auth gate
|
||||
# so a rejected invoker can receive an ephemeral rejection.
|
||||
await self._handle_thread_create_slash(interaction, name, message, auto_archive_duration)
|
||||
|
||||
@tree.command(name="queue", description="Queue a prompt for the next turn (doesn't interrupt)")
|
||||
|
|
@ -2566,6 +2807,54 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# supporting up to 25 categories × 25 skills = 625 skills.
|
||||
self._register_skill_group(tree)
|
||||
|
||||
# Optional defense-in-depth: hide every slash command from non-admin
|
||||
# guild members in Discord's slash picker. Server-side authorization
|
||||
# (``_check_slash_authorization``) is the actual gate; this is purely
|
||||
# UX so users don't see commands they can't invoke. Off by default
|
||||
# to preserve the slash UX for deployments that intentionally allow
|
||||
# everyone in the guild.
|
||||
if os.getenv("DISCORD_HIDE_SLASH_COMMANDS", "false").strip().lower() in (
|
||||
"true", "1", "yes", "on",
|
||||
):
|
||||
self._apply_owner_only_visibility(tree)
|
||||
|
||||
def _apply_owner_only_visibility(self, tree) -> None:
|
||||
"""Set default_member_permissions=0 on every registered slash command.
|
||||
|
||||
Discord interprets ``Permissions(0)`` as "requires no permissions",
|
||||
which paradoxically means the command is hidden from every guild
|
||||
member except those with the Administrator permission. Server admins
|
||||
can re-grant per user/role via Server Settings → Integrations →
|
||||
<bot> → Permissions.
|
||||
|
||||
Authoritative gate is ``_check_slash_authorization`` on every
|
||||
invocation, which catches stale clients, role grants made by
|
||||
mistake, and direct API calls bypassing Discord's UI hide.
|
||||
"""
|
||||
try:
|
||||
no_perms = discord.Permissions(0)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"[Discord] _apply_owner_only_visibility: cannot build Permissions(0): %s",
|
||||
e,
|
||||
)
|
||||
return
|
||||
applied = 0
|
||||
for cmd in tree.get_commands():
|
||||
try:
|
||||
cmd.default_permissions = no_perms
|
||||
applied += 1
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"[Discord] Could not set default_permissions on %r: %s",
|
||||
getattr(cmd, "name", "?"), e,
|
||||
)
|
||||
logger.info(
|
||||
"[Discord] Hid %d slash command(s) from non-admin guild members "
|
||||
"(opt-in defense in depth via DISCORD_HIDE_SLASH_COMMANDS).",
|
||||
applied,
|
||||
)
|
||||
|
||||
def _register_skill_group(self, tree) -> None:
|
||||
"""Register a single ``/skill`` command with autocomplete on the name.
|
||||
|
||||
|
|
@ -2584,40 +2873,32 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
hidden skills. The slash picker also becomes more discoverable —
|
||||
Discord live-filters by the user's typed prefix against both the
|
||||
skill name and its description.
|
||||
|
||||
The entries list and lookup dict are stored on ``self`` rather
|
||||
than captured in closure variables so :meth:`refresh_skill_group`
|
||||
can repopulate them when the user runs ``/reload-skills`` without
|
||||
needing to touch the Discord slash-command tree or trigger a
|
||||
``tree.sync()`` call.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
existing_names = set()
|
||||
try:
|
||||
existing_names = {cmd.name for cmd in tree.get_commands()}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reuse the existing collector for consistent filtering
|
||||
# (per-platform disabled, hub-excluded, name clamping), then
|
||||
# flatten — the category grouping was only useful for the
|
||||
# nested layout.
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=existing_names,
|
||||
)
|
||||
entries: list[tuple[str, str, str]] = list(uncategorized)
|
||||
for cat_skills in categories.values():
|
||||
entries.extend(cat_skills)
|
||||
# Populate the instance-level entries/lookup so the
|
||||
# autocomplete + handler callbacks below always read the
|
||||
# freshest state. refresh_skill_group() re-runs the same
|
||||
# collector and mutates these two attributes in place.
|
||||
self._skill_entries: list[tuple[str, str, str]] = []
|
||||
self._skill_lookup: dict[str, tuple[str, str]] = {}
|
||||
self._skill_group_reserved_names: set[str] = set(existing_names)
|
||||
self._refresh_skill_catalog_state()
|
||||
|
||||
if not entries:
|
||||
if not self._skill_entries:
|
||||
return
|
||||
|
||||
# Stable alphabetical order so the autocomplete suggestion
|
||||
# list is predictable across restarts.
|
||||
entries.sort(key=lambda t: t[0])
|
||||
|
||||
# name -> (description, cmd_key) — used by both the autocomplete
|
||||
# callback and the handler for O(1) dispatch.
|
||||
skill_lookup: dict[str, tuple[str, str]] = {
|
||||
n: (d, k) for n, d, k in entries
|
||||
}
|
||||
|
||||
async def _autocomplete_name(
|
||||
interaction: "discord.Interaction", current: str,
|
||||
) -> list:
|
||||
|
|
@ -2627,10 +2908,29 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
"/skill pdf" surfaces skills whose description mentions
|
||||
PDFs even if the name doesn't. Discord caps this list at
|
||||
25 entries per query.
|
||||
|
||||
Authorization: a quiet pre-check evaluates the slash
|
||||
allowlists and returns ``[]`` for unauthorized users so
|
||||
the installed skill catalog is not leaked to anyone who
|
||||
can see the command in the picker. Returning a generic
|
||||
empty list here is intentional — sending a per-keystroke
|
||||
ephemeral rejection would produce a barrage of error
|
||||
popups during typing.
|
||||
|
||||
Reads ``self._skill_entries`` so a ``/reload-skills`` run
|
||||
since process start shows up on the very next keystroke.
|
||||
"""
|
||||
try:
|
||||
allowed, _reason = self._evaluate_slash_authorization(interaction)
|
||||
except Exception:
|
||||
# Defensive: never raise from autocomplete. Fail
|
||||
# closed by returning an empty suggestion list.
|
||||
return []
|
||||
if not allowed:
|
||||
return []
|
||||
q = (current or "").strip().lower()
|
||||
choices: list = []
|
||||
for name, desc, _key in entries:
|
||||
for name, desc, _key in self._skill_entries:
|
||||
if not q or q in name.lower() or (desc and q in desc.lower()):
|
||||
if desc:
|
||||
label = f"{name} — {desc}"
|
||||
|
|
@ -2654,7 +2954,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
async def _skill_handler(
|
||||
interaction: "discord.Interaction", name: str, args: str = "",
|
||||
):
|
||||
entry = skill_lookup.get(name)
|
||||
# Authorize BEFORE any skill lookup so that known and
|
||||
# unknown skill names produce identical rejections for
|
||||
# unauthorized users (no probing the installed catalog
|
||||
# via "Unknown skill: <name>" responses).
|
||||
if not await self._check_slash_authorization(interaction, "/skill"):
|
||||
return
|
||||
entry = self._skill_lookup.get(name)
|
||||
if not entry:
|
||||
await interaction.response.send_message(
|
||||
f"Unknown skill: `{name}`. Start typing for "
|
||||
|
|
@ -2676,16 +2982,74 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
|
||||
logger.info(
|
||||
"[%s] Registered /skill command with %d skill(s) via autocomplete",
|
||||
self.name, len(entries),
|
||||
self.name, len(self._skill_entries),
|
||||
)
|
||||
if hidden:
|
||||
if self._skill_group_hidden_count:
|
||||
logger.info(
|
||||
"[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
|
||||
self.name, hidden,
|
||||
self.name, self._skill_group_hidden_count,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)
|
||||
|
||||
def _refresh_skill_catalog_state(self) -> None:
|
||||
"""Re-scan disk for skills and repopulate ``self._skill_entries``.
|
||||
|
||||
Called once from :meth:`_register_skill_group` at startup and
|
||||
again from :meth:`refresh_skill_group` whenever the user runs
|
||||
``/reload-skills``. No Discord API calls are made — autocomplete
|
||||
and the handler both read from these instance attributes
|
||||
directly, so an in-place mutation is sufficient.
|
||||
"""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
reserved = getattr(self, "_skill_group_reserved_names", set())
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(reserved),
|
||||
)
|
||||
entries: list[tuple[str, str, str]] = list(uncategorized)
|
||||
for cat_skills in categories.values():
|
||||
entries.extend(cat_skills)
|
||||
# Stable alphabetical order so the autocomplete suggestion
|
||||
# list is predictable across restarts.
|
||||
entries.sort(key=lambda t: t[0])
|
||||
|
||||
self._skill_entries = entries
|
||||
self._skill_lookup = {n: (d, k) for n, d, k in entries}
|
||||
self._skill_group_hidden_count = hidden
|
||||
|
||||
def refresh_skill_group(self) -> tuple[int, int]:
|
||||
"""Rescan skills and update the live ``/skill`` autocomplete state.
|
||||
|
||||
Invoked by :meth:`gateway.run.GatewayOrchestrator._handle_reload_skills_command`
|
||||
after :func:`agent.skill_commands.reload_skills` has refreshed
|
||||
the in-process skill-command registry. Without this call, the
|
||||
``/skill`` autocomplete dropdown keeps showing the list captured
|
||||
at process start — new skills stay invisible and deleted skills
|
||||
return an "Unknown skill" error when clicked.
|
||||
|
||||
Because autocomplete options are fetched dynamically by Discord,
|
||||
we only need to mutate the entries/lookup attributes read by the
|
||||
callbacks — no ``tree.sync()`` is required.
|
||||
|
||||
Returns ``(new_count, hidden_count)``.
|
||||
"""
|
||||
try:
|
||||
self._refresh_skill_catalog_state()
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"[%s] Failed to refresh /skill autocomplete after reload: %s",
|
||||
self.name, exc,
|
||||
)
|
||||
return (len(getattr(self, "_skill_entries", [])), 0)
|
||||
logger.info(
|
||||
"[%s] Refreshed /skill autocomplete: %d skill(s) available (%d filtered)",
|
||||
self.name,
|
||||
len(self._skill_entries),
|
||||
self._skill_group_hidden_count,
|
||||
)
|
||||
return (len(self._skill_entries), self._skill_group_hidden_count)
|
||||
|
||||
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
|
||||
"""Build a MessageEvent from a Discord slash command interaction."""
|
||||
is_dm = isinstance(interaction.channel, discord.DMChannel)
|
||||
|
|
@ -2743,6 +3107,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
auto_archive_duration: int = 1440,
|
||||
) -> None:
|
||||
"""Create a Discord thread from a slash command and start a session in it."""
|
||||
if not await self._check_slash_authorization(interaction, "/thread"):
|
||||
return
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
result = await self._create_thread(
|
||||
interaction,
|
||||
name=name,
|
||||
|
|
@ -3037,6 +3404,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
view = ExecApprovalView(
|
||||
session_key=session_key,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
|
|
@ -3075,6 +3443,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
session_key=session_key,
|
||||
confirm_id=confirm_id,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
|
|
@ -3109,6 +3478,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
view = UpdatePromptView(
|
||||
session_key=session_key,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
return SendResult(success=True, message_id=str(msg.id))
|
||||
|
|
@ -3166,6 +3536,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
session_key=session_key,
|
||||
on_model_selected=on_model_selected,
|
||||
allowed_user_ids=self._allowed_user_ids,
|
||||
allowed_role_ids=self._allowed_role_ids,
|
||||
)
|
||||
|
||||
msg = await channel.send(embed=embed, view=view)
|
||||
|
|
@ -3721,6 +4092,72 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||
# Discord UI Components (outside the adapter class)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _component_check_auth(
|
||||
interaction,
|
||||
allowed_user_ids: Optional[set],
|
||||
allowed_role_ids: Optional[set],
|
||||
) -> bool:
|
||||
"""Shared user-or-role OR semantics for component view button clicks.
|
||||
|
||||
Mirrors ``DiscordAdapter._is_allowed_user`` / the slash and on_message
|
||||
gates so every Discord interaction surface honors the same trust
|
||||
boundary. Component views (ExecApprovalView, SlashConfirmView,
|
||||
UpdatePromptView, ModelPickerView) used to receive only
|
||||
``allowed_user_ids``: in role-only deployments
|
||||
(DISCORD_ALLOWED_ROLES set, DISCORD_ALLOWED_USERS empty) the user
|
||||
set was empty and the legacy "no allowlist = allow everyone" branch
|
||||
let any guild member click the buttons -- approving exec commands,
|
||||
cancelling slash confirmations, switching the model.
|
||||
|
||||
Behavior:
|
||||
|
||||
- both allowlists empty -> allow (preserves existing no-allowlist
|
||||
deployments, no regression)
|
||||
- user is in user allowlist -> allow
|
||||
- role allowlist set + user has a role in it -> allow
|
||||
- role allowlist set + interaction.user has no resolvable
|
||||
``roles`` attribute (e.g. DM context with a role policy active)
|
||||
-> reject (fail closed)
|
||||
- otherwise -> reject
|
||||
"""
|
||||
user_set = allowed_user_ids or set()
|
||||
role_set = allowed_role_ids or set()
|
||||
has_users = bool(user_set)
|
||||
has_roles = bool(role_set)
|
||||
if not has_users and not has_roles:
|
||||
return True
|
||||
|
||||
user = getattr(interaction, "user", None)
|
||||
if user is None:
|
||||
return False
|
||||
|
||||
if has_users:
|
||||
try:
|
||||
uid = str(user.id)
|
||||
except AttributeError:
|
||||
uid = ""
|
||||
if uid and uid in user_set:
|
||||
return True
|
||||
|
||||
if has_roles:
|
||||
roles_attr = getattr(user, "roles", None)
|
||||
if roles_attr is None:
|
||||
# Role policy is configured but the interaction doesn't
|
||||
# carry role data (DM-context Member, raw User payload).
|
||||
# Fail closed: a user without a resolvable role list cannot
|
||||
# satisfy a role allowlist.
|
||||
return False
|
||||
try:
|
||||
user_role_ids = {getattr(r, "id", None) for r in roles_attr}
|
||||
except TypeError:
|
||||
return False
|
||||
if user_role_ids & role_set:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
if DISCORD_AVAILABLE:
|
||||
|
||||
class ExecApprovalView(discord.ui.View):
|
||||
|
|
@ -3733,17 +4170,23 @@ if DISCORD_AVAILABLE:
|
|||
Only users in the allowed list can click. Times out after 5 minutes.
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300) # 5-minute timeout
|
||||
self.session_key = session_key
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
"""Verify the user clicking is authorized."""
|
||||
if not self.allowed_user_ids:
|
||||
return True # No allowlist = anyone can approve
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
|
|
@ -3835,17 +4278,24 @@ if DISCORD_AVAILABLE:
|
|||
5 minutes (matches the gateway primitive's timeout).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
confirm_id: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.confirm_id = confirm_id
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _resolve(
|
||||
self, interaction: discord.Interaction, choice: str,
|
||||
|
|
@ -3923,16 +4373,22 @@ if DISCORD_AVAILABLE:
|
|||
5-minute timeout on its side).
|
||||
"""
|
||||
|
||||
def __init__(self, session_key: str, allowed_user_ids: set):
|
||||
def __init__(
|
||||
self,
|
||||
session_key: str,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=300)
|
||||
self.session_key = session_key
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
async def _respond(
|
||||
self, interaction: discord.Interaction, answer: str,
|
||||
|
|
@ -4009,6 +4465,7 @@ if DISCORD_AVAILABLE:
|
|||
session_key: str,
|
||||
on_model_selected,
|
||||
allowed_user_ids: set,
|
||||
allowed_role_ids: Optional[set] = None,
|
||||
):
|
||||
super().__init__(timeout=120)
|
||||
self.providers = providers
|
||||
|
|
@ -4017,15 +4474,16 @@ if DISCORD_AVAILABLE:
|
|||
self.session_key = session_key
|
||||
self.on_model_selected = on_model_selected
|
||||
self.allowed_user_ids = allowed_user_ids
|
||||
self.allowed_role_ids = allowed_role_ids or set()
|
||||
self.resolved = False
|
||||
self._selected_provider: str = ""
|
||||
|
||||
self._build_provider_select()
|
||||
|
||||
def _check_auth(self, interaction: discord.Interaction) -> bool:
|
||||
if not self.allowed_user_ids:
|
||||
return True
|
||||
return str(interaction.user.id) in self.allowed_user_ids
|
||||
return _component_check_auth(
|
||||
interaction, self.allowed_user_ids, self.allowed_role_ids,
|
||||
)
|
||||
|
||||
def _build_provider_select(self):
|
||||
"""Build the provider dropdown menu."""
|
||||
|
|
|
|||
|
|
@ -2922,13 +2922,18 @@ class FeishuAdapter(BasePlatformAdapter):
|
|||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
# Snapshot Content-Type and body while the client context is
|
||||
# still active so pooled connections fully release on exit.
|
||||
# See #18451.
|
||||
content_type_hdr = str(response.headers.get("Content-Type", ""))
|
||||
body = response.content
|
||||
filename = self._derive_remote_filename(
|
||||
file_url,
|
||||
content_type=str(response.headers.get("Content-Type", "")),
|
||||
content_type=content_type_hdr,
|
||||
default_name=preferred_name,
|
||||
default_ext=default_ext,
|
||||
)
|
||||
cached_path = cache_document_from_bytes(response.content, filename)
|
||||
cached_path = cache_document_from_bytes(body, filename)
|
||||
return cached_path, filename
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):
|
|||
|
||||
async def _ws_connect(self) -> bool:
|
||||
"""Establish WebSocket connection and authenticate."""
|
||||
ws_url = self._hass_url.replace("http://", "ws://").replace("https://", "wss://")
|
||||
ws_url = self._hass_url.replace("https://", "wss://").replace("http://", "ws://")
|
||||
ws_url = f"{ws_url}/api/websocket"
|
||||
|
||||
self._session = aiohttp.ClientSession(
|
||||
|
|
|
|||
|
|
@ -243,10 +243,14 @@ class QQAdapter(BasePlatformAdapter):
|
|||
return False
|
||||
|
||||
try:
|
||||
# Tighter keepalive pool so idle CLOSE_WAIT sockets drain
|
||||
# faster behind proxies like Cloudflare Warp (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0,
|
||||
follow_redirects=True,
|
||||
event_hooks={"response": [_ssrf_redirect_guard]},
|
||||
limits=platform_httpx_limits(),
|
||||
)
|
||||
|
||||
# 1. Get access token
|
||||
|
|
|
|||
|
|
@ -248,7 +248,9 @@ class SignalAdapter(BasePlatformAdapter):
|
|||
except Exception as e:
|
||||
logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)
|
||||
|
||||
self.client = httpx.AsyncClient(timeout=30.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self.client = httpx.AsyncClient(timeout=30.0, limits=platform_httpx_limits())
|
||||
try:
|
||||
# Health check — verify signal-cli daemon is reachable
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -528,6 +528,21 @@ class SlackAdapter(BasePlatformAdapter):
|
|||
return False
|
||||
lock_acquired = True
|
||||
|
||||
# Close any previous handler before creating a new one so that
|
||||
# calling connect() a second time (e.g. during a gateway restart or
|
||||
# in-process reconnect attempt) does not leave a zombie Socket Mode
|
||||
# connection alive. Both the old and new connections would otherwise
|
||||
# receive every Slack event and dispatch it twice, producing double
|
||||
# responses — the same bug that affected DiscordAdapter (#18187).
|
||||
if self._handler is not None:
|
||||
try:
|
||||
await self._handler.close_async()
|
||||
except Exception:
|
||||
logger.debug("[%s] Failed to close previous Slack handler", self.name)
|
||||
finally:
|
||||
self._handler = None
|
||||
self._app = None
|
||||
|
||||
# First token is the primary — used for AsyncApp / Socket Mode
|
||||
primary_token = bot_tokens[0]
|
||||
self._app = AsyncApp(token=primary_token)
|
||||
|
|
|
|||
|
|
@ -512,6 +512,17 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self.name, attempt,
|
||||
)
|
||||
self._polling_network_error_count = 0
|
||||
# start_polling() returning is necessary but not sufficient:
|
||||
# PTB's Updater can be left in a state where `running` is True
|
||||
# but the underlying long-poll task is wedged on a stale httpx
|
||||
# connection and never makes progress. No error_callback fires
|
||||
# in that state, so the reconnect ladder won't advance on its
|
||||
# own. Schedule a deferred probe to detect the wedge and
|
||||
# re-enter the ladder if needed.
|
||||
if not self.has_fatal_error:
|
||||
probe = asyncio.ensure_future(self._verify_polling_after_reconnect())
|
||||
self._background_tasks.add(probe)
|
||||
probe.add_done_callback(self._background_tasks.discard)
|
||||
except Exception as retry_err:
|
||||
logger.warning("[%s] Telegram polling reconnect failed: %s", self.name, retry_err)
|
||||
# start_polling failed — polling is dead and no further error
|
||||
|
|
@ -523,6 +534,50 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self._background_tasks.add(task)
|
||||
task.add_done_callback(self._background_tasks.discard)
|
||||
|
||||
async def _verify_polling_after_reconnect(self) -> None:
|
||||
"""Heartbeat probe scheduled after a successful reconnect.
|
||||
|
||||
PTB's Updater can survive a botched stop()+start_polling() cycle
|
||||
with `running=True` but a wedged consumer task. No error callback
|
||||
fires, so the reconnect ladder doesn't advance on its own. This
|
||||
probe detects the wedge by:
|
||||
|
||||
1. Sleeping HEARTBEAT_PROBE_DELAY so a healthy long-poll has time
|
||||
to complete at least one cycle.
|
||||
2. Verifying `Updater.running` is still True.
|
||||
3. Probing the bot endpoint with a tight asyncio timeout. A
|
||||
wedged httpx pool fails this probe; a healthy one returns
|
||||
well under the timeout.
|
||||
|
||||
On any failure, re-enter the reconnect ladder so the existing
|
||||
MAX_NETWORK_RETRIES path can ultimately escalate to fatal-error.
|
||||
"""
|
||||
HEARTBEAT_PROBE_DELAY = 60
|
||||
PROBE_TIMEOUT = 10
|
||||
|
||||
await asyncio.sleep(HEARTBEAT_PROBE_DELAY)
|
||||
|
||||
if self.has_fatal_error:
|
||||
return
|
||||
if not (self._app and self._app.updater and self._app.updater.running):
|
||||
logger.warning(
|
||||
"[%s] Updater not running %ds after reconnect — treating as wedged",
|
||||
self.name, HEARTBEAT_PROBE_DELAY,
|
||||
)
|
||||
await self._handle_polling_network_error(
|
||||
RuntimeError("Updater not running after reconnect heartbeat")
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(self._app.bot.get_me(), PROBE_TIMEOUT)
|
||||
except Exception as probe_err:
|
||||
logger.warning(
|
||||
"[%s] Polling heartbeat probe failed %ds after reconnect: %s",
|
||||
self.name, HEARTBEAT_PROBE_DELAY, probe_err,
|
||||
)
|
||||
await self._handle_polling_network_error(probe_err)
|
||||
|
||||
async def _handle_polling_conflict(self, error: Exception) -> None:
|
||||
if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
|
||||
return
|
||||
|
|
|
|||
|
|
@ -206,7 +206,11 @@ class WeComAdapter(BasePlatformAdapter):
|
|||
return False
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=30.0, follow_redirects=True)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(
|
||||
timeout=30.0, follow_redirects=True, limits=platform_httpx_limits(),
|
||||
)
|
||||
await self._open_connection()
|
||||
self._mark_connected()
|
||||
self._listen_task = asyncio.create_task(self._listen_loop())
|
||||
|
|
|
|||
|
|
@ -119,7 +119,9 @@ class WecomCallbackAdapter(BasePlatformAdapter):
|
|||
pass
|
||||
|
||||
try:
|
||||
self._http_client = httpx.AsyncClient(timeout=20.0)
|
||||
# Tighter keepalive so idle CLOSE_WAIT drains promptly (#18451).
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
self._http_client = httpx.AsyncClient(timeout=20.0, limits=platform_httpx_limits())
|
||||
self._app = web.Application()
|
||||
self._app.router.add_get("/health", self._handle_health)
|
||||
self._app.router.add_get(self._path, self._handle_verify)
|
||||
|
|
|
|||
|
|
@ -2030,7 +2030,9 @@ async def send_weixin_direct(
|
|||
|
||||
live_adapter = _LIVE_ADAPTERS.get(resolved_token)
|
||||
send_session = getattr(live_adapter, '_send_session', None)
|
||||
if live_adapter is not None and send_session is not None and not send_session.closed:
|
||||
if (live_adapter is not None and send_session is not None
|
||||
and not send_session.closed
|
||||
and send_session._loop is asyncio.get_running_loop()):
|
||||
last_result: Optional[SendResult] = None
|
||||
cleaned = live_adapter.format_message(message)
|
||||
if cleaned:
|
||||
|
|
|
|||
|
|
@ -185,6 +185,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
self._bridge_log: Optional[Path] = None
|
||||
self._poll_task: Optional[asyncio.Task] = None
|
||||
self._http_session: Optional["aiohttp.ClientSession"] = None
|
||||
# Set to True by disconnect() before we SIGTERM our child bridge so
|
||||
# _check_managed_bridge_exit() can distinguish an intentional
|
||||
# shutdown-time exit (returncode -15 / -2 / 0) from a real crash.
|
||||
# Without this, every graceful gateway shutdown/restart would log
|
||||
# "Fatal whatsapp adapter error" plus dispatch a fatal-error
|
||||
# notification before the normal "✓ whatsapp disconnected" fires.
|
||||
self._shutting_down: bool = False
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
|
|
@ -555,6 +562,21 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
if returncode is None:
|
||||
return None
|
||||
|
||||
# Planned shutdown: disconnect() sets _shutting_down before it sends
|
||||
# SIGTERM to the bridge, so a returncode of -15 (SIGTERM), -2 (SIGINT),
|
||||
# or 0 (clean exit) at that point is expected, not a crash. Treat it
|
||||
# as informational and skip the fatal-error path.
|
||||
# getattr-with-default keeps tests that construct the adapter via
|
||||
# ``WhatsAppAdapter.__new__`` (bypassing __init__) working without
|
||||
# every _make_adapter() helper having to seed the attribute.
|
||||
if getattr(self, "_shutting_down", False) and returncode in (0, -2, -15):
|
||||
logger.info(
|
||||
"[%s] Bridge exited during shutdown (code %d).",
|
||||
self.name,
|
||||
returncode,
|
||||
)
|
||||
return None
|
||||
|
||||
message = f"WhatsApp bridge process exited unexpectedly (code {returncode})."
|
||||
if not self.has_fatal_error:
|
||||
logger.error("[%s] %s", self.name, message)
|
||||
|
|
@ -565,6 +587,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
|
||||
async def disconnect(self) -> None:
|
||||
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
|
||||
# Flip the shutdown flag BEFORE signalling the child so the exit-check
|
||||
# path (which runs from other tasks like send() and the poll loop)
|
||||
# doesn't race us and report the intentional termination as fatal.
|
||||
self._shutting_down = True
|
||||
if self._bridge_process:
|
||||
try:
|
||||
try:
|
||||
|
|
@ -876,11 +902,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
|||
try:
|
||||
import aiohttp
|
||||
|
||||
await self._http_session.post(
|
||||
# Must wrap in `async with` — a bare `await session.post(...)`
|
||||
# leaves the response object alive until GC, holding its TCP
|
||||
# socket in CLOSE_WAIT. See #18451.
|
||||
async with self._http_session.post(
|
||||
f"http://127.0.0.1:{self._bridge_port}/typing",
|
||||
json={"chatId": chat_id},
|
||||
timeout=aiohttp.ClientTimeout(total=5)
|
||||
)
|
||||
):
|
||||
pass
|
||||
except Exception:
|
||||
pass # Ignore typing indicator failures
|
||||
|
||||
|
|
|
|||
393
gateway/run.py
393
gateway/run.py
|
|
@ -15,6 +15,7 @@ Usage:
|
|||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -282,6 +283,16 @@ def _home_target_env_var(platform_name: str) -> str:
|
|||
)
|
||||
|
||||
|
||||
def _home_thread_env_var(platform_name: str) -> str:
|
||||
"""Return the optional thread/topic env var for a platform home target."""
|
||||
return f"{_home_target_env_var(platform_name)}_THREAD_ID"
|
||||
|
||||
|
||||
def _restart_notification_pending() -> bool:
|
||||
"""Return True when a /restart completion marker is waiting to be delivered."""
|
||||
return (_hermes_home / ".restart_notify.json").exists()
|
||||
|
||||
|
||||
_ensure_ssl_certs()
|
||||
|
||||
# Add parent directory to path
|
||||
|
|
@ -406,37 +417,37 @@ if _config_path.exists():
|
|||
os.environ[_env_map["base_url"]] = _base_url
|
||||
if _api_key:
|
||||
os.environ[_env_map["api_key"]] = _api_key
|
||||
# config.yaml is the documented, authoritative source for these
|
||||
# settings — it unconditionally wins over .env values. Previously
|
||||
# the guards below read `if X not in os.environ` and let stale
|
||||
# .env entries (e.g. HERMES_MAX_ITERATIONS=60 written by an old
|
||||
# `hermes setup` run) silently shadow the user's current config.
|
||||
# See PR #18413 / the 60-vs-500 max_turns incident.
|
||||
_agent_cfg = _cfg.get("agent", {})
|
||||
if _agent_cfg and isinstance(_agent_cfg, dict):
|
||||
if "max_turns" in _agent_cfg:
|
||||
os.environ["HERMES_MAX_ITERATIONS"] = str(_agent_cfg["max_turns"])
|
||||
# Bridge agent.gateway_timeout → HERMES_AGENT_TIMEOUT env var.
|
||||
# Env var from .env takes precedence (already in os.environ).
|
||||
if "gateway_timeout" in _agent_cfg and "HERMES_AGENT_TIMEOUT" not in os.environ:
|
||||
if "gateway_timeout" in _agent_cfg:
|
||||
os.environ["HERMES_AGENT_TIMEOUT"] = str(_agent_cfg["gateway_timeout"])
|
||||
if "gateway_timeout_warning" in _agent_cfg and "HERMES_AGENT_TIMEOUT_WARNING" not in os.environ:
|
||||
if "gateway_timeout_warning" in _agent_cfg:
|
||||
os.environ["HERMES_AGENT_TIMEOUT_WARNING"] = str(_agent_cfg["gateway_timeout_warning"])
|
||||
if "gateway_notify_interval" in _agent_cfg and "HERMES_AGENT_NOTIFY_INTERVAL" not in os.environ:
|
||||
if "gateway_notify_interval" in _agent_cfg:
|
||||
os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
|
||||
if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
|
||||
if "restart_drain_timeout" in _agent_cfg:
|
||||
os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
|
||||
if (
|
||||
"gateway_auto_continue_freshness" in _agent_cfg
|
||||
and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
|
||||
):
|
||||
if "gateway_auto_continue_freshness" in _agent_cfg:
|
||||
os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
|
||||
_agent_cfg["gateway_auto_continue_freshness"]
|
||||
)
|
||||
_display_cfg = _cfg.get("display", {})
|
||||
if _display_cfg and isinstance(_display_cfg, dict):
|
||||
if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
|
||||
if "busy_input_mode" in _display_cfg:
|
||||
os.environ["HERMES_GATEWAY_BUSY_INPUT_MODE"] = str(_display_cfg["busy_input_mode"])
|
||||
if "busy_ack_enabled" in _display_cfg and "HERMES_GATEWAY_BUSY_ACK_ENABLED" not in os.environ:
|
||||
if "busy_ack_enabled" in _display_cfg:
|
||||
os.environ["HERMES_GATEWAY_BUSY_ACK_ENABLED"] = str(_display_cfg["busy_ack_enabled"])
|
||||
# Timezone: bridge config.yaml → HERMES_TIMEZONE env var.
|
||||
# HERMES_TIMEZONE from .env takes precedence (already in os.environ).
|
||||
_tz_cfg = _cfg.get("timezone", "")
|
||||
if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ:
|
||||
if _tz_cfg and isinstance(_tz_cfg, str):
|
||||
os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip()
|
||||
# Security settings
|
||||
_security_cfg = _cfg.get("security", {})
|
||||
|
|
@ -444,8 +455,24 @@ if _config_path.exists():
|
|||
_redact = _security_cfg.get("redact_secrets")
|
||||
if _redact is not None:
|
||||
os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower()
|
||||
except Exception:
|
||||
pass # Non-fatal; gateway can still run with .env values
|
||||
except Exception as _bridge_err:
|
||||
# Previously this was silent (`except Exception: pass`), which
|
||||
# hid partial bridge failures and let .env defaults shadow
|
||||
# config.yaml values — users observed max_turns=500 in config
|
||||
# but a 60-iteration cap in practice. Surface the failure to
|
||||
# stderr so operators see it even though `logger` is not yet
|
||||
# initialized at module-import time (logger is defined further
|
||||
# down this module).
|
||||
print(
|
||||
f" Warning: config.yaml → env bridge failed: "
|
||||
f"{type(_bridge_err).__name__}: {_bridge_err}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" Gateway will fall back to .env values, which may not match "
|
||||
"your current config.yaml. Run `hermes doctor` to investigate.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Apply IPv4 preference if configured (before any HTTP clients are created).
|
||||
try:
|
||||
|
|
@ -490,6 +517,8 @@ from gateway.config import (
|
|||
Platform,
|
||||
_BUILTIN_PLATFORM_VALUES,
|
||||
GatewayConfig,
|
||||
HomeChannel,
|
||||
PlatformConfig,
|
||||
load_gateway_config,
|
||||
)
|
||||
from gateway.session import (
|
||||
|
|
@ -673,11 +702,69 @@ def _is_control_interrupt_message(message: Optional[str]) -> bool:
|
|||
return normalized in _CONTROL_INTERRUPT_MESSAGES
|
||||
|
||||
|
||||
def _skill_slug_from_frontmatter(skill_md: Path) -> tuple[str | None, str | None]:
|
||||
"""Derive the /command slug and declared frontmatter name from a SKILL.md.
|
||||
|
||||
Matches the exact normalization used by
|
||||
:func:`agent.skill_commands.scan_skill_commands` so the slug here is the
|
||||
same string a user types after the leading ``/`` (e.g. a skill with
|
||||
frontmatter ``name: Stable Diffusion Image Generation`` resolves to
|
||||
``stable-diffusion-image-generation`` — NOT the parent directory name,
|
||||
which is commonly shorter/different, e.g. ``stable-diffusion``).
|
||||
|
||||
Using the directory name silently broke :func:`_check_unavailable_skill`
|
||||
for every skill whose directory name drifted from its frontmatter name
|
||||
(19 such skills on a standard install as of 2026-05), causing a generic
|
||||
"unknown command" response where a "disabled — enable with …" or
|
||||
"not installed — install with …" hint was expected.
|
||||
|
||||
Returns ``(slug, declared_name)`` or ``(None, None)`` when the file
|
||||
can't be read or lacks a ``name:`` in its frontmatter.
|
||||
"""
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
return None, None
|
||||
if not content.startswith("---"):
|
||||
return None, None
|
||||
end = content.find("\n---", 3)
|
||||
if end < 0:
|
||||
return None, None
|
||||
declared_name: str | None = None
|
||||
for line in content[3:end].splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("name:"):
|
||||
raw = line.split(":", 1)[1].strip()
|
||||
# Strip YAML quote wrappers if present
|
||||
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
|
||||
raw = raw[1:-1]
|
||||
declared_name = raw.strip()
|
||||
break
|
||||
if not declared_name:
|
||||
return None, None
|
||||
slug = declared_name.lower().replace(" ", "-").replace("_", "-")
|
||||
# Mirror _SKILL_INVALID_CHARS and _SKILL_MULTI_HYPHEN from skill_commands
|
||||
import re as _re
|
||||
slug = _re.sub(r"[^a-z0-9-]", "", slug)
|
||||
slug = _re.sub(r"-{2,}", "-", slug).strip("-")
|
||||
if not slug:
|
||||
return None, declared_name
|
||||
return slug, declared_name
|
||||
|
||||
|
||||
def _check_unavailable_skill(command_name: str) -> str | None:
|
||||
"""Check if a command matches a known-but-inactive skill.
|
||||
|
||||
Returns a helpful message if the skill exists but is disabled or only
|
||||
available as an optional install. Returns None if no match found.
|
||||
|
||||
The slug for each on-disk skill is derived from its frontmatter ``name:``
|
||||
(via :func:`_skill_slug_from_frontmatter`), NOT from its containing
|
||||
directory name — because the two can differ (e.g. directory
|
||||
``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``
|
||||
yields slug ``stable-diffusion-image-generation``). Matching on
|
||||
directory name would miss that slug entirely and fall through to the
|
||||
generic "unknown command" path.
|
||||
"""
|
||||
# Normalize: command uses hyphens, skill names may use hyphens or underscores
|
||||
normalized = command_name.lower().replace("_", "-")
|
||||
|
|
@ -693,8 +780,12 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
|||
for skill_md in skills_dir.rglob("SKILL.md"):
|
||||
if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
|
||||
continue
|
||||
name = skill_md.parent.name.lower().replace("_", "-")
|
||||
if name == normalized and name in disabled:
|
||||
slug, declared_name = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug or not declared_name:
|
||||
continue
|
||||
# disabled is keyed by the declared frontmatter name (what
|
||||
# skills.disabled / skills.platform_disabled store).
|
||||
if slug == normalized and declared_name in disabled:
|
||||
return (
|
||||
f"The **{command_name}** skill is installed but disabled.\n"
|
||||
f"Enable it with: `hermes skills config`"
|
||||
|
|
@ -706,8 +797,10 @@ def _check_unavailable_skill(command_name: str) -> str | None:
|
|||
optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
|
||||
if optional_dir.exists():
|
||||
for skill_md in optional_dir.rglob("SKILL.md"):
|
||||
name = skill_md.parent.name.lower().replace("_", "-")
|
||||
if name == normalized:
|
||||
slug, _declared = _skill_slug_from_frontmatter(skill_md)
|
||||
if not slug:
|
||||
continue
|
||||
if slug == normalized:
|
||||
# Build install path: official/<category>/<name>
|
||||
rel = skill_md.parent.relative_to(optional_dir)
|
||||
parts = list(rel.parts)
|
||||
|
|
@ -2176,15 +2269,13 @@ class GatewayRunner:
|
|||
logger.debug("Failed interrupting agent during shutdown: %s", e)
|
||||
|
||||
async def _notify_active_sessions_of_shutdown(self) -> None:
|
||||
"""Send a notification to every chat with an active agent.
|
||||
"""Send shutdown/restart notifications to active chats and home channels.
|
||||
|
||||
Called at the very start of stop() — adapters are still connected so
|
||||
messages can be delivered. Best-effort: individual send failures are
|
||||
messages can be delivered. Best-effort: individual send failures are
|
||||
logged and swallowed so they never block the shutdown sequence.
|
||||
"""
|
||||
active = self._snapshot_running_agents()
|
||||
if not active:
|
||||
return
|
||||
|
||||
action = "restarting" if self._restart_requested else "shutting down"
|
||||
hint = (
|
||||
|
|
@ -2195,7 +2286,7 @@ class GatewayRunner:
|
|||
)
|
||||
msg = f"⚠️ Gateway {action} — {hint}"
|
||||
|
||||
notified: set = set()
|
||||
notified: set[tuple[str, str, Optional[str]]] = set()
|
||||
for session_key in active:
|
||||
source = None
|
||||
try:
|
||||
|
|
@ -2212,7 +2303,7 @@ class GatewayRunner:
|
|||
|
||||
if source is not None:
|
||||
platform_str = source.platform.value
|
||||
chat_id = source.chat_id
|
||||
chat_id = str(source.chat_id)
|
||||
thread_id = source.thread_id
|
||||
else:
|
||||
# Fall back to parsing the session key when no persisted
|
||||
|
|
@ -2224,9 +2315,10 @@ class GatewayRunner:
|
|||
chat_id = _parsed["chat_id"]
|
||||
thread_id = _parsed.get("thread_id")
|
||||
|
||||
# Deduplicate: one notification per chat, even if multiple
|
||||
# sessions (different users/threads) share the same chat.
|
||||
dedup_key = (platform_str, chat_id)
|
||||
# Deduplicate only identical delivery targets. Thread/topic-aware
|
||||
# platforms can share a parent chat while still routing to distinct
|
||||
# destinations via metadata.
|
||||
dedup_key = (platform_str, chat_id, str(thread_id) if thread_id else None)
|
||||
if dedup_key in notified:
|
||||
continue
|
||||
|
||||
|
|
@ -2240,10 +2332,19 @@ class GatewayRunner:
|
|||
# correct forum topic / thread.
|
||||
metadata = {"thread_id": thread_id} if thread_id else None
|
||||
|
||||
await adapter.send(chat_id, msg, metadata=metadata)
|
||||
result = await adapter.send(chat_id, msg, metadata=metadata)
|
||||
if result is not None and getattr(result, "success", True) is False:
|
||||
logger.debug(
|
||||
"Failed to send shutdown notification to %s:%s: %s",
|
||||
platform_str,
|
||||
chat_id,
|
||||
getattr(result, "error", "send returned success=False"),
|
||||
)
|
||||
continue
|
||||
|
||||
notified.add(dedup_key)
|
||||
logger.info(
|
||||
"Sent shutdown notification to %s:%s",
|
||||
"Sent shutdown notification to active chat %s:%s",
|
||||
platform_str, chat_id,
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -2252,6 +2353,44 @@ class GatewayRunner:
|
|||
platform_str, chat_id, e,
|
||||
)
|
||||
|
||||
for platform, adapter in self.adapters.items():
|
||||
home = self.config.get_home_channel(platform)
|
||||
if not home or not home.chat_id:
|
||||
continue
|
||||
|
||||
dedup_key = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
|
||||
if dedup_key in notified:
|
||||
continue
|
||||
|
||||
try:
|
||||
metadata = {"thread_id": home.thread_id} if home.thread_id else None
|
||||
if metadata:
|
||||
result = await adapter.send(str(home.chat_id), msg, metadata=metadata)
|
||||
else:
|
||||
result = await adapter.send(str(home.chat_id), msg)
|
||||
if result is not None and getattr(result, "success", True) is False:
|
||||
logger.debug(
|
||||
"Failed to send shutdown notification to home channel %s:%s: %s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
getattr(result, "error", "send returned success=False"),
|
||||
)
|
||||
continue
|
||||
|
||||
notified.add(dedup_key)
|
||||
logger.info(
|
||||
"Sent shutdown notification to home channel %s:%s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Failed to send shutdown notification to home channel %s:%s: %s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
e,
|
||||
)
|
||||
|
||||
def _finalize_shutdown_agents(self, active_agents: Dict[str, Any]) -> None:
|
||||
for agent in active_agents.values():
|
||||
try:
|
||||
|
|
@ -2519,6 +2658,18 @@ class GatewayRunner:
|
|||
"""
|
||||
logger.info("Starting Hermes Gateway...")
|
||||
logger.info("Session storage: %s", self.config.sessions_dir)
|
||||
# Log the resolved max_iterations budget so operators can verify the
|
||||
# config.yaml → env bridge did the right thing at a glance (instead
|
||||
# of silently running at a stale .env value for weeks).
|
||||
try:
|
||||
_effective_max_iter = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||
logger.info(
|
||||
"Agent budget: max_iterations=%d (agent.max_turns from config.yaml, "
|
||||
"or HERMES_MAX_ITERATIONS from .env, or default 90)",
|
||||
_effective_max_iter,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.profiles import get_active_profile_name
|
||||
_profile = get_active_profile_name()
|
||||
|
|
@ -2662,7 +2813,7 @@ class GatewayRunner:
|
|||
try:
|
||||
suspended = self.session_store.suspend_recently_active()
|
||||
if suspended:
|
||||
logger.info("Suspended %d in-flight session(s) from previous run", suspended)
|
||||
logger.info("Marked %d in-flight session(s) as resumable from previous run", suspended)
|
||||
except Exception as e:
|
||||
logger.warning("Session suspension on startup failed: %s", e)
|
||||
|
||||
|
|
@ -2860,8 +3011,28 @@ class GatewayRunner:
|
|||
):
|
||||
self._schedule_update_notification_watch()
|
||||
|
||||
# Give freshly connected platform adapters a brief moment to settle
|
||||
# before sending restart/startup lifecycle messages. In practice this
|
||||
# helps Discord thread deliveries right after reconnect.
|
||||
if connected_count > 0:
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
# Notify the chat that initiated /restart that the gateway is back.
|
||||
await self._send_restart_notification()
|
||||
restart_notification_pending = _restart_notification_pending()
|
||||
delivered_restart_target = await self._send_restart_notification()
|
||||
|
||||
# Broadcast a lightweight "gateway is back" message to configured
|
||||
# home channels only when this startup is resuming from /restart. If a
|
||||
# /restart requester already received a direct completion notice in the
|
||||
# same chat, skip the generic broadcast there to avoid duplicates while
|
||||
# still allowing a home-channel fallback when the direct send fails.
|
||||
if restart_notification_pending or delivered_restart_target is not None:
|
||||
skip_home_targets = (
|
||||
{delivered_restart_target} if delivered_restart_target else None
|
||||
)
|
||||
await self._send_home_channel_startup_notifications(
|
||||
skip_targets=skip_home_targets,
|
||||
)
|
||||
|
||||
# Drain any recovered process watchers (from crash recovery checkpoint)
|
||||
try:
|
||||
|
|
@ -3889,7 +4060,9 @@ class GatewayRunner:
|
|||
if not check_discord_requirements():
|
||||
logger.warning("Discord: discord.py not installed")
|
||||
return None
|
||||
return DiscordAdapter(config)
|
||||
adapter = DiscordAdapter(config)
|
||||
adapter.gateway_runner = self # For cross-platform admin alerts on unauthorized slash
|
||||
return adapter
|
||||
|
||||
elif platform == Platform.WHATSAPP:
|
||||
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
|
||||
|
|
@ -7792,24 +7965,33 @@ class GatewayRunner:
|
|||
msg = decision.get("message") or ""
|
||||
|
||||
# Send the status line back to the user so they see the judge's
|
||||
# verdict. Fire-and-forget via the adapter.
|
||||
# verdict. Fire-and-forget via the adapter's ``send()`` method —
|
||||
# adapters expose ``send(chat_id, content, reply_to, metadata)``,
|
||||
# not a ``send_message(source, msg)`` wrapper, so an earlier
|
||||
# ``hasattr(adapter, "send_message")`` gate here was dead code and
|
||||
# users never saw ``✓ Goal achieved`` / ``⏸ budget exhausted``
|
||||
# verdicts.
|
||||
if msg and source is not None:
|
||||
try:
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter and hasattr(adapter, "send_message"):
|
||||
if adapter is not None and hasattr(adapter, "send"):
|
||||
import asyncio as _asyncio
|
||||
coro = adapter.send_message(source, msg)
|
||||
thread_meta = (
|
||||
{"thread_id": source.thread_id} if source.thread_id else None
|
||||
)
|
||||
coro = adapter.send(
|
||||
chat_id=source.chat_id,
|
||||
content=msg,
|
||||
metadata=thread_meta,
|
||||
)
|
||||
if _asyncio.iscoroutine(coro):
|
||||
try:
|
||||
loop = _asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(coro)
|
||||
else:
|
||||
loop.run_until_complete(coro)
|
||||
loop = _asyncio.get_running_loop()
|
||||
loop.create_task(coro)
|
||||
except RuntimeError:
|
||||
# No event loop in this thread — schedule on the main one.
|
||||
# No running loop in this thread — best effort.
|
||||
try:
|
||||
_asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
_asyncio.run(coro)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
|
|
@ -7872,14 +8054,33 @@ class GatewayRunner:
|
|||
chat_name = source.chat_name or chat_id
|
||||
|
||||
env_key = _home_target_env_var(platform_name)
|
||||
thread_env_key = _home_thread_env_var(platform_name)
|
||||
thread_id = source.thread_id
|
||||
|
||||
# Save to .env so it persists across restarts
|
||||
try:
|
||||
from hermes_cli.config import save_env_value
|
||||
save_env_value(env_key, str(chat_id))
|
||||
# Keep thread/topic routing explicit and clear stale values when
|
||||
# /sethome is run from the parent chat instead of a thread.
|
||||
save_env_value(thread_env_key, str(thread_id or ""))
|
||||
except Exception as e:
|
||||
return f"Failed to save home channel: {e}"
|
||||
|
||||
# Keep the running gateway config in sync too. The pre-restart
|
||||
# notification path reads self.config before the process reloads env.
|
||||
if source.platform:
|
||||
platform_config = self.config.platforms.setdefault(
|
||||
source.platform,
|
||||
PlatformConfig(enabled=True),
|
||||
)
|
||||
platform_config.home_channel = HomeChannel(
|
||||
platform=source.platform,
|
||||
chat_id=str(chat_id),
|
||||
name=chat_name,
|
||||
thread_id=str(thread_id) if thread_id else None,
|
||||
)
|
||||
|
||||
return (
|
||||
f"✅ Home channel set to **{chat_name}** (ID: {chat_id}).\n"
|
||||
f"Cron jobs and cross-platform messages will be delivered here."
|
||||
|
|
@ -9623,6 +9824,28 @@ class GatewayRunner:
|
|||
removed = result.get("removed", []) # [{"name", "description"}, ...]
|
||||
total = result.get("total", 0)
|
||||
|
||||
# Let each connected adapter refresh any platform-side state
|
||||
# that cached the skill list at startup. Today that's the
|
||||
# Discord /skill autocomplete (registered once per connect);
|
||||
# without this call, new skills stay invisible in the
|
||||
# dropdown and deleted skills error out when clicked. Other
|
||||
# adapters that don't override refresh_skill_group (Telegram's
|
||||
# BotCommand menu, Slack subcommand map, etc.) are silently
|
||||
# skipped — the in-process reload above is enough for them.
|
||||
for adapter in list(self.adapters.values()):
|
||||
refresh = getattr(adapter, "refresh_skill_group", None)
|
||||
if not callable(refresh):
|
||||
continue
|
||||
try:
|
||||
maybe = refresh()
|
||||
if inspect.isawaitable(maybe):
|
||||
await maybe
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Adapter %s refresh_skill_group raised: %s",
|
||||
getattr(adapter, "name", adapter), exc,
|
||||
)
|
||||
|
||||
lines = ["🔄 **Skills Reloaded**\n"]
|
||||
if not added and not removed:
|
||||
lines.append("No new skills detected.")
|
||||
|
|
@ -10341,11 +10564,11 @@ class GatewayRunner:
|
|||
|
||||
return True
|
||||
|
||||
async def _send_restart_notification(self) -> None:
|
||||
async def _send_restart_notification(self) -> Optional[tuple[str, str, Optional[str]]]:
|
||||
"""Notify the chat that initiated /restart that the gateway is back."""
|
||||
notify_path = _hermes_home / ".restart_notify.json"
|
||||
if not notify_path.exists():
|
||||
return
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(notify_path.read_text())
|
||||
|
|
@ -10354,7 +10577,7 @@ class GatewayRunner:
|
|||
thread_id = data.get("thread_id")
|
||||
|
||||
if not platform_str or not chat_id:
|
||||
return
|
||||
return None
|
||||
|
||||
platform = Platform(platform_str)
|
||||
adapter = self.adapters.get(platform)
|
||||
|
|
@ -10363,24 +10586,94 @@ class GatewayRunner:
|
|||
"Restart notification skipped: %s adapter not connected",
|
||||
platform_str,
|
||||
)
|
||||
return
|
||||
return None
|
||||
|
||||
metadata = {"thread_id": thread_id} if thread_id else None
|
||||
await adapter.send(
|
||||
chat_id,
|
||||
result = await adapter.send(
|
||||
str(chat_id),
|
||||
"♻ Gateway restarted successfully. Your session continues.",
|
||||
metadata=metadata,
|
||||
)
|
||||
# adapter.send() catches provider errors (e.g. "Chat not found")
|
||||
# and returns SendResult(success=False) rather than raising, so
|
||||
# we must inspect the result before claiming success — otherwise
|
||||
# the log line is misleading and hides real delivery failures.
|
||||
if result is not None and getattr(result, "success", True) is False:
|
||||
logger.warning(
|
||||
"Restart notification to %s:%s was not delivered: %s",
|
||||
platform_str,
|
||||
chat_id,
|
||||
getattr(result, "error", "send returned success=False"),
|
||||
)
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
"Sent restart notification to %s:%s",
|
||||
platform_str,
|
||||
chat_id,
|
||||
)
|
||||
return str(platform_str), str(chat_id), str(thread_id) if thread_id else None
|
||||
except Exception as e:
|
||||
logger.warning("Restart notification failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
notify_path.unlink(missing_ok=True)
|
||||
|
||||
async def _send_home_channel_startup_notifications(
|
||||
self,
|
||||
*,
|
||||
skip_targets: Optional[set[tuple[str, str, Optional[str]]]] = None,
|
||||
) -> set[tuple[str, str, Optional[str]]]:
|
||||
"""Notify configured home channels that the gateway is back online.
|
||||
|
||||
The notification is best-effort and sent once per connected platform
|
||||
home channel. ``skip_targets`` lets startup avoid duplicate messages
|
||||
when a more specific restart notification is queued for the same chat.
|
||||
"""
|
||||
delivered: set[tuple[str, str, Optional[str]]] = set()
|
||||
skipped = skip_targets or set()
|
||||
message = "♻️ Gateway online — Hermes is back and ready."
|
||||
|
||||
for platform, adapter in self.adapters.items():
|
||||
home = self.config.get_home_channel(platform)
|
||||
if not home or not home.chat_id:
|
||||
continue
|
||||
|
||||
target = (platform.value, str(home.chat_id), str(home.thread_id) if home.thread_id else None)
|
||||
if target in skipped or target in delivered:
|
||||
continue
|
||||
|
||||
try:
|
||||
metadata = {"thread_id": home.thread_id} if home.thread_id else None
|
||||
if metadata:
|
||||
result = await adapter.send(str(home.chat_id), message, metadata=metadata)
|
||||
else:
|
||||
result = await adapter.send(str(home.chat_id), message)
|
||||
if result is not None and getattr(result, "success", True) is False:
|
||||
logger.warning(
|
||||
"Home-channel startup notification failed for %s:%s: %s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
getattr(result, "error", "send returned success=False"),
|
||||
)
|
||||
continue
|
||||
|
||||
delivered.add(target)
|
||||
logger.info(
|
||||
"Sent home-channel startup notification to %s:%s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Home-channel startup notification failed for %s:%s: %s",
|
||||
platform.value,
|
||||
home.chat_id,
|
||||
exc,
|
||||
)
|
||||
|
||||
return delivered
|
||||
|
||||
def _set_session_env(self, context: SessionContext) -> list:
|
||||
"""Set session context variables for the current async task.
|
||||
|
||||
|
|
|
|||
|
|
@ -1086,19 +1086,22 @@ class SessionStore:
|
|||
return len(removed_keys)
|
||||
|
||||
def suspend_recently_active(self, max_age_seconds: int = 120) -> int:
|
||||
"""Mark recently-active sessions as suspended.
|
||||
"""Mark recently-active sessions as resumable after an unexpected exit.
|
||||
|
||||
Called on gateway startup to prevent sessions that were likely
|
||||
in-flight when the gateway last exited from being blindly resumed
|
||||
(#7536). Only suspends sessions updated within *max_age_seconds*
|
||||
to avoid resetting long-idle sessions that are harmless to resume.
|
||||
Returns the number of sessions that were suspended.
|
||||
Called on gateway startup after a crash or fast restart to preserve
|
||||
in-flight sessions instead of destroying their conversation history
|
||||
(#7536). Only marks sessions updated within *max_age_seconds* to
|
||||
avoid touching long-idle sessions. Sets ``resume_pending=True`` so
|
||||
the next incoming message on the same session_key auto-resumes from
|
||||
the existing transcript.
|
||||
|
||||
Entries flagged ``resume_pending=True`` are skipped — those were
|
||||
marked intentionally by the drain-timeout path as recoverable.
|
||||
Terminal escalation for genuinely stuck ``resume_pending`` sessions
|
||||
is handled by the existing ``.restart_failure_counts`` stuck-loop
|
||||
counter, which runs after this method on startup.
|
||||
Entries already flagged ``resume_pending=True`` are skipped. Entries
|
||||
explicitly ``suspended=True`` (from /stop or stuck-loop escalation)
|
||||
are also skipped. Terminal escalation for genuinely stuck sessions
|
||||
is still handled by the existing ``.restart_failure_counts`` counter
|
||||
(threshold 3), which runs after this method and sets ``suspended=True``.
|
||||
|
||||
Returns the number of sessions marked resumable.
|
||||
"""
|
||||
from datetime import timedelta
|
||||
|
||||
|
|
@ -1110,7 +1113,9 @@ class SessionStore:
|
|||
if entry.resume_pending:
|
||||
continue
|
||||
if not entry.suspended and entry.updated_at >= cutoff:
|
||||
entry.suspended = True
|
||||
entry.resume_pending = True
|
||||
entry.resume_reason = "restart_interrupted"
|
||||
entry.last_resume_marked_at = _now()
|
||||
count += 1
|
||||
if count:
|
||||
self._save()
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
|
@ -21,6 +22,8 @@ from typing import Any
|
|||
|
||||
from utils import is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# prompt_toolkit is an optional CLI dependency — only needed for
|
||||
# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test
|
||||
# environments that lack it must still be able to import this module
|
||||
|
|
@ -499,9 +502,9 @@ def _sanitize_telegram_name(raw: str) -> str:
|
|||
|
||||
|
||||
def _clamp_command_names(
|
||||
entries: list[tuple[str, str]],
|
||||
entries: list[tuple[str, ...]],
|
||||
reserved: set[str],
|
||||
) -> list[tuple[str, str]]:
|
||||
) -> list[tuple[str, ...]]:
|
||||
"""Enforce 32-char command name limit with collision avoidance.
|
||||
|
||||
Both Telegram and Discord cap slash command names at 32 characters.
|
||||
|
|
@ -509,10 +512,15 @@ def _clamp_command_names(
|
|||
(against *reserved* names or earlier entries in the same batch), the name is
|
||||
shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
|
||||
If all 10 digit slots are taken the entry is silently dropped.
|
||||
|
||||
Accepts tuples of any length >= 2. Extra elements beyond ``(name, desc)``
|
||||
(e.g. ``cmd_key``) are passed through unchanged, so callers can attach
|
||||
metadata that survives the rename.
|
||||
"""
|
||||
used: set[str] = set(reserved)
|
||||
result: list[tuple[str, str]] = []
|
||||
for name, desc in entries:
|
||||
result: list[tuple] = []
|
||||
for entry in entries:
|
||||
name, desc, *extra = entry
|
||||
if len(name) > _CMD_NAME_LIMIT:
|
||||
candidate = name[:_CMD_NAME_LIMIT]
|
||||
if candidate in used:
|
||||
|
|
@ -528,7 +536,7 @@ def _clamp_command_names(
|
|||
if name in used:
|
||||
continue
|
||||
used.add(name)
|
||||
result.append((name, desc))
|
||||
result.append((name, desc, *extra))
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -611,13 +619,26 @@ def _collect_gateway_skill_entries(
|
|||
try:
|
||||
from agent.skill_commands import get_skill_commands
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
_skills_dir = str(SKILLS_DIR.resolve())
|
||||
_hub_dir = str((SKILLS_DIR / ".hub").resolve())
|
||||
_hub_dir = str((SKILLS_DIR / ".hub").resolve()).rstrip("/") + "/"
|
||||
# Build set of allowed directory prefixes: local skills dir + any
|
||||
# user-configured ``skills.external_dirs``. Ensure each prefix ends
|
||||
# with ``/`` so ``/my-skills`` does not also match ``/my-skills-extra``.
|
||||
# Without this widening, external skills are visible in
|
||||
# ``hermes skills list`` and the agent's ``/skill-name`` dispatch but
|
||||
# silently excluded from gateway slash menus (#8110).
|
||||
_allowed_prefixes = [_skills_dir.rstrip("/") + "/"]
|
||||
_allowed_prefixes.extend(
|
||||
str(d).rstrip("/") + "/" for d in get_external_skills_dirs()
|
||||
)
|
||||
skill_cmds = get_skill_commands()
|
||||
for cmd_key in sorted(skill_cmds):
|
||||
info = skill_cmds[cmd_key]
|
||||
skill_path = info.get("skill_md_path", "")
|
||||
if not skill_path.startswith(_skills_dir):
|
||||
if not skill_path:
|
||||
continue
|
||||
if not any(skill_path.startswith(prefix) for prefix in _allowed_prefixes):
|
||||
continue
|
||||
if skill_path.startswith(_hub_dir):
|
||||
continue
|
||||
|
|
@ -635,17 +656,15 @@ def _collect_gateway_skill_entries(
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Clamp names; _clamp_command_names works on (name, desc) pairs so we
|
||||
# need to zip/unzip.
|
||||
skill_pairs = [(n, d) for n, d, _ in skill_triples]
|
||||
key_by_pair = {(n, d): k for n, d, k in skill_triples}
|
||||
skill_pairs = _clamp_command_names(skill_pairs, reserved_names)
|
||||
# Clamp names; cmd_key is passed through as extra payload so it survives
|
||||
# any clamp-induced renames.
|
||||
skill_triples = _clamp_command_names(skill_triples, reserved_names)
|
||||
|
||||
# Skills fill remaining slots — only tier that gets trimmed
|
||||
remaining = max(0, max_slots - len(all_entries))
|
||||
hidden_count = max(0, len(skill_pairs) - remaining)
|
||||
for n, d in skill_pairs[:remaining]:
|
||||
all_entries.append((n, d, key_by_pair.get((n, d), "")))
|
||||
hidden_count = max(0, len(skill_triples) - remaining)
|
||||
for n, d, k in skill_triples[:remaining]:
|
||||
all_entries.append((n, d, k))
|
||||
|
||||
return all_entries[:max_slots], hidden_count
|
||||
|
||||
|
|
@ -721,24 +740,40 @@ def discord_skill_commands(
|
|||
def discord_skill_commands_by_category(
|
||||
reserved_names: set[str],
|
||||
) -> tuple[dict[str, list[tuple[str, str, str]]], list[tuple[str, str, str]], int]:
|
||||
"""Return skill entries organized by category for Discord ``/skill`` subcommand groups.
|
||||
"""Return skill entries organized by category for Discord ``/skill`` autocomplete.
|
||||
|
||||
Skills whose directory is nested at least 2 levels under ``SKILLS_DIR``
|
||||
Skills whose directory is nested at least 2 levels under a scan root
|
||||
(e.g. ``creative/ascii-art/SKILL.md``) are grouped by their top-level
|
||||
category. Root-level skills (e.g. ``dogfood/SKILL.md``) are returned as
|
||||
*uncategorized* — the caller should register them as direct subcommands
|
||||
of the ``/skill`` group.
|
||||
*uncategorized*.
|
||||
|
||||
The same filtering as :func:`discord_skill_commands` is applied: hub
|
||||
skills excluded, per-platform disabled excluded, names clamped.
|
||||
Scan roots include the local ``SKILLS_DIR`` **and** any configured
|
||||
``skills.external_dirs`` — matching the widened filter applied to the
|
||||
flat ``discord_skill_commands()`` collector in #18741. Without this
|
||||
parity, external-dir skills are visible via ``hermes skills list`` and
|
||||
the agent's ``/skill-name`` dispatch but silently absent from Discord's
|
||||
``/skill`` autocomplete.
|
||||
|
||||
Filtering mirrors :func:`discord_skill_commands`: hub skills excluded,
|
||||
per-platform disabled excluded, names clamped to 32 chars, descriptions
|
||||
clamped to 100 chars.
|
||||
|
||||
The legacy 25-group × 25-subcommand caps (from the old nested
|
||||
``/skill <cat> <name>`` layout) are **not** applied — the live caller
|
||||
(``_register_skill_group`` in ``gateway/platforms/discord.py``, refactored
|
||||
in PR #11580) flattens these results and feeds them into a single
|
||||
autocomplete callback, which scales to thousands of entries without any
|
||||
per-command payload concerns. ``hidden_count`` is retained in the return
|
||||
tuple for backward compatibility and still reports skills dropped for
|
||||
other reasons (32-char clamp collision vs a reserved name).
|
||||
|
||||
Returns:
|
||||
``(categories, uncategorized, hidden_count)``
|
||||
|
||||
- *categories*: ``{category_name: [(name, description, cmd_key), ...]}``
|
||||
- *uncategorized*: ``[(name, description, cmd_key), ...]``
|
||||
- *hidden_count*: skills dropped due to Discord group limits
|
||||
(25 subcommand groups, 25 subcommands per group)
|
||||
- *hidden_count*: skills dropped due to name clamp collisions
|
||||
against already-registered command names.
|
||||
"""
|
||||
from pathlib import Path as _P
|
||||
|
||||
|
|
@ -752,14 +787,33 @@ def discord_skill_commands_by_category(
|
|||
# Collect raw skill data --------------------------------------------------
|
||||
categories: dict[str, list[tuple[str, str, str]]] = {}
|
||||
uncategorized: list[tuple[str, str, str]] = []
|
||||
_names_used: set[str] = set(reserved_names)
|
||||
# Map clamped-32-char-name → what it came from, so we can emit an
|
||||
# actionable warning on collision. Reserved (gateway-builtin) command
|
||||
# names are marked with a sentinel so the warning distinguishes
|
||||
# "skill collided with a reserved command" from "two skills collided
|
||||
# on the 32-char clamp" — the latter is the rename-worthy case.
|
||||
_names_used: dict[str, str] = {n: "<reserved>" for n in reserved_names}
|
||||
hidden = 0
|
||||
|
||||
try:
|
||||
from agent.skill_commands import get_skill_commands
|
||||
from agent.skill_utils import get_external_skills_dirs
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
||||
_skills_dir = SKILLS_DIR.resolve()
|
||||
_hub_dir = (SKILLS_DIR / ".hub").resolve()
|
||||
# Build list of (resolved_root, is_local) tuples. Each external dir
|
||||
# becomes its own scan root for category derivation — a skill at
|
||||
# ``<external>/mlops/foo/SKILL.md`` is still categorized as "mlops".
|
||||
_scan_roots: list[_P] = [_skills_dir]
|
||||
try:
|
||||
for ext in get_external_skills_dirs():
|
||||
try:
|
||||
_scan_roots.append(_P(ext).resolve())
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
skill_cmds = get_skill_commands()
|
||||
|
||||
for cmd_key in sorted(skill_cmds):
|
||||
|
|
@ -768,33 +822,72 @@ def discord_skill_commands_by_category(
|
|||
if not skill_path:
|
||||
continue
|
||||
sp = _P(skill_path).resolve()
|
||||
# Skip skills outside SKILLS_DIR or from the hub
|
||||
if not str(sp).startswith(str(_skills_dir)):
|
||||
continue
|
||||
# Hub skills are loaded via the skill hub, not surfaced as
|
||||
# slash commands.
|
||||
if str(sp).startswith(str(_hub_dir)):
|
||||
continue
|
||||
# Accept skill if it lives under any scan root; record the
|
||||
# matching root so we can derive the category correctly.
|
||||
matched_root: _P | None = None
|
||||
for root in _scan_roots:
|
||||
try:
|
||||
sp.relative_to(root)
|
||||
except ValueError:
|
||||
continue
|
||||
matched_root = root
|
||||
break
|
||||
if matched_root is None:
|
||||
continue
|
||||
|
||||
skill_name = info.get("name", "")
|
||||
if skill_name in _platform_disabled:
|
||||
continue
|
||||
|
||||
raw_name = cmd_key.lstrip("/")
|
||||
# Clamp to 32 chars (Discord limit)
|
||||
# Clamp to 32 chars (Discord per-command name limit)
|
||||
discord_name = raw_name[:32]
|
||||
if discord_name in _names_used:
|
||||
# Two skills whose first 32 chars are identical. One wins
|
||||
# (the first one seen, which is alphabetical because the
|
||||
# caller iterates ``sorted(skill_cmds)``); the other is
|
||||
# dropped from Discord's /skill autocomplete.
|
||||
#
|
||||
# Silently counting this as ``hidden`` (the old behavior)
|
||||
# meant skill authors had no way to discover the drop —
|
||||
# their skill just didn't appear in the picker. Emit a
|
||||
# WARNING naming both sides so the author can rename the
|
||||
# losing skill's frontmatter name to something with a
|
||||
# distinct 32-char prefix.
|
||||
prior = _names_used[discord_name]
|
||||
if prior == "<reserved>":
|
||||
logger.warning(
|
||||
"Discord /skill: %r (from %r) collides on its 32-char "
|
||||
"clamp with a reserved gateway command name %r — the "
|
||||
"skill will not appear in the /skill autocomplete. "
|
||||
"Rename the skill's frontmatter ``name:`` to differ "
|
||||
"in its first 32 chars.",
|
||||
discord_name, cmd_key, discord_name,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Discord /skill: %r and %r both clamp to %r on "
|
||||
"Discord's 32-char command-name limit — only %r "
|
||||
"will appear in the /skill autocomplete. Rename "
|
||||
"one skill's frontmatter ``name:`` to differ in "
|
||||
"its first 32 chars.",
|
||||
prior, cmd_key, discord_name, prior,
|
||||
)
|
||||
hidden += 1
|
||||
continue
|
||||
_names_used.add(discord_name)
|
||||
_names_used[discord_name] = cmd_key
|
||||
|
||||
desc = info.get("description", "")
|
||||
if len(desc) > 100:
|
||||
desc = desc[:97] + "..."
|
||||
|
||||
# Determine category from the relative path within SKILLS_DIR.
|
||||
# e.g. creative/ascii-art/SKILL.md → parts = ("creative", "ascii-art")
|
||||
try:
|
||||
rel = sp.parent.relative_to(_skills_dir)
|
||||
except ValueError:
|
||||
continue
|
||||
# Determine category from the relative path within the matched
|
||||
# scan root. e.g. creative/ascii-art/SKILL.md → ("creative", ...)
|
||||
rel = sp.parent.relative_to(matched_root)
|
||||
parts = rel.parts
|
||||
if len(parts) >= 2:
|
||||
cat = parts[0]
|
||||
|
|
@ -804,28 +897,7 @@ def discord_skill_commands_by_category(
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Enforce Discord limits: 25 subcommand groups, 25 subcommands each ------
|
||||
_MAX_GROUPS = 25
|
||||
_MAX_PER_GROUP = 25
|
||||
|
||||
trimmed_categories: dict[str, list[tuple[str, str, str]]] = {}
|
||||
group_count = 0
|
||||
for cat in sorted(categories):
|
||||
if group_count >= _MAX_GROUPS:
|
||||
hidden += len(categories[cat])
|
||||
continue
|
||||
entries = categories[cat][:_MAX_PER_GROUP]
|
||||
hidden += max(0, len(categories[cat]) - _MAX_PER_GROUP)
|
||||
trimmed_categories[cat] = entries
|
||||
group_count += 1
|
||||
|
||||
# Uncategorized skills also count against the 25 top-level limit
|
||||
remaining_slots = _MAX_GROUPS - group_count
|
||||
if len(uncategorized) > remaining_slots:
|
||||
hidden += len(uncategorized) - remaining_slots
|
||||
uncategorized = uncategorized[:remaining_slots]
|
||||
|
||||
return trimmed_categories, uncategorized, hidden
|
||||
return categories, uncategorized, hidden
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -400,7 +400,12 @@ DEFAULT_CONFIG = {
|
|||
# The gateway stops accepting new work, waits for running agents
|
||||
# to finish, then interrupts any remaining runs after the timeout.
|
||||
# 0 = no drain, interrupt immediately.
|
||||
"restart_drain_timeout": 60,
|
||||
#
|
||||
# 180s is calibrated for realistic in-flight agent turns: a typical
|
||||
# coding conversation mid-reasoning runs 60–150s per call, so a 60s
|
||||
# budget routinely interrupted legitimate work on /restart. Raise
|
||||
# further in config.yaml if you run very-long-reasoning models.
|
||||
"restart_drain_timeout": 180,
|
||||
# Max app-level retry attempts for API errors (connection drops,
|
||||
# provider timeouts, 5xx, etc.) before the agent surfaces the
|
||||
# failure. The OpenAI SDK already does its own low-level retries
|
||||
|
|
@ -639,6 +644,18 @@ DEFAULT_CONFIG = {
|
|||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# OpenRouter-specific settings.
|
||||
# response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
|
||||
# When enabled, identical requests return cached responses for free (zero billing).
|
||||
# This is separate from Anthropic prompt caching and works alongside it.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
# response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
|
||||
# Default 300 (5 minutes). Only used when response_cache is enabled.
|
||||
"openrouter": {
|
||||
"response_cache": True,
|
||||
"response_cache_ttl": 300,
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
|
|
@ -825,7 +842,7 @@ DEFAULT_CONFIG = {
|
|||
# Voices: alloy, echo, fable, onyx, nova, shimmer
|
||||
},
|
||||
"xai": {
|
||||
"voice_id": "eve",
|
||||
"voice_id": "eve", # or custom voice ID — see https://docs.x.ai/developers/model-capabilities/audio/custom-voices
|
||||
"language": "en",
|
||||
"sample_rate": 24000,
|
||||
"bit_rate": 128000,
|
||||
|
|
|
|||
|
|
@ -302,9 +302,21 @@ def _cmd_rollback(args) -> int:
|
|||
print(f" reason: {manifest.get('reason', '?')}")
|
||||
print(f" created_at: {manifest.get('created_at', '?')}")
|
||||
print(f" skill files: {manifest.get('skill_files', '?')}")
|
||||
cron = manifest.get("cron_jobs") or {}
|
||||
if isinstance(cron, dict):
|
||||
if cron.get("backed_up"):
|
||||
print(
|
||||
f" cron jobs: {cron.get('jobs_count', 0)} "
|
||||
f"(will be restored for skill-link fields only)"
|
||||
)
|
||||
else:
|
||||
reason = cron.get("reason", "not captured")
|
||||
print(f" cron jobs: not in snapshot ({reason})")
|
||||
print(
|
||||
"\nThis will replace the current ~/.hermes/skills/ tree (a safety "
|
||||
"snapshot of the current state is taken first so this is undoable)."
|
||||
"snapshot of the current state is taken first so this is undoable). "
|
||||
"Cron jobs that still exist will have their skills/skill fields "
|
||||
"restored from the snapshot; all other cron fields are left alone."
|
||||
)
|
||||
|
||||
if not getattr(args, "yes", False):
|
||||
|
|
|
|||
|
|
@ -263,8 +263,11 @@ def run_doctor(args):
|
|||
if env_path.exists():
|
||||
check_ok(f"{_DHH}/.env file exists")
|
||||
|
||||
# Check for common issues
|
||||
content = env_path.read_text()
|
||||
# Check for common issues. Pin encoding to UTF-8 because .env files are
|
||||
# written as UTF-8 everywhere in the codebase, while Path.read_text()
|
||||
# defaults to the system locale — which crashes on non-UTF-8 Windows
|
||||
# locales (e.g. GBK) as soon as the file contains any non-ASCII byte.
|
||||
content = env_path.read_text(encoding="utf-8")
|
||||
if _has_provider_env_config(content):
|
||||
check_ok("API key or custom endpoint configured")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -289,7 +289,7 @@ def _has_any_provider_configured() -> bool:
|
|||
env_file = get_env_path()
|
||||
if env_file.exists():
|
||||
try:
|
||||
for line in env_file.read_text().splitlines():
|
||||
for line in env_file.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -361,7 +361,7 @@ def _write_env_vars(env_path: Path, env_writes: dict) -> None:
|
|||
|
||||
existing_lines = []
|
||||
if env_path.exists():
|
||||
existing_lines = env_path.read_text().splitlines()
|
||||
existing_lines = env_path.read_text(encoding="utf-8").splitlines()
|
||||
|
||||
updated_keys = set()
|
||||
new_lines = []
|
||||
|
|
|
|||
|
|
@ -1057,6 +1057,45 @@ def list_authenticated_providers(
|
|||
if normed:
|
||||
_builtin_endpoints.add(normed)
|
||||
|
||||
def _has_fast_aws_sdk_signal() -> bool:
|
||||
"""Return True when explicit AWS auth config is present.
|
||||
|
||||
This intentionally avoids botocore's full credential chain. Provider
|
||||
picker/model-switch discovery can run for non-Bedrock providers, and
|
||||
botocore may otherwise probe EC2 IMDS (169.254.169.254) on local
|
||||
machines before returning no credentials.
|
||||
"""
|
||||
if os.environ.get("AWS_BEARER_TOKEN_BEDROCK", "").strip():
|
||||
return True
|
||||
if (
|
||||
os.environ.get("AWS_ACCESS_KEY_ID", "").strip()
|
||||
and os.environ.get("AWS_SECRET_ACCESS_KEY", "").strip()
|
||||
):
|
||||
return True
|
||||
return any(
|
||||
os.environ.get(name, "").strip()
|
||||
for name in (
|
||||
"AWS_PROFILE",
|
||||
"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI",
|
||||
"AWS_CONTAINER_CREDENTIALS_FULL_URI",
|
||||
"AWS_WEB_IDENTITY_TOKEN_FILE",
|
||||
)
|
||||
)
|
||||
|
||||
def _has_aws_sdk_creds_for_listing(slug: str) -> bool:
|
||||
"""Credential check for AWS SDK providers in non-runtime discovery."""
|
||||
slug_norm = str(slug or "").strip().lower()
|
||||
current_norm = str(current_provider or "").strip().lower()
|
||||
if _has_fast_aws_sdk_signal():
|
||||
return True
|
||||
if slug_norm != current_norm:
|
||||
return False
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
return bool(has_aws_credentials())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
data = fetch_models_dev()
|
||||
|
||||
# Build curated model lists keyed by hermes provider ID
|
||||
|
|
@ -1184,7 +1223,9 @@ def list_authenticated_providers(
|
|||
|
||||
# Check if credentials exist
|
||||
has_creds = False
|
||||
if overlay.extra_env_vars:
|
||||
if overlay.auth_type == "aws_sdk":
|
||||
has_creds = _has_aws_sdk_creds_for_listing(hermes_slug)
|
||||
elif overlay.extra_env_vars:
|
||||
has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
|
||||
# Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
|
||||
if not has_creds and overlay.auth_type == "api_key":
|
||||
|
|
@ -1324,11 +1365,7 @@ def list_authenticated_providers(
|
|||
# credentials come from the boto3 credential chain (env vars,
|
||||
# ~/.aws/credentials, instance roles, etc.)
|
||||
if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
_cp_has_creds = has_aws_credentials()
|
||||
except Exception:
|
||||
pass
|
||||
_cp_has_creds = _has_aws_sdk_creds_for_listing(_cp.slug)
|
||||
|
||||
if not _cp_has_creds:
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -1190,6 +1190,13 @@ def _setup_tts_provider(config: dict):
|
|||
"Falling back to Edge TTS."
|
||||
)
|
||||
selected = "edge"
|
||||
if selected == "xai":
|
||||
print()
|
||||
voice_id = prompt("xAI voice_id (Enter for 'eve', or paste a custom voice ID)")
|
||||
if voice_id and voice_id.strip():
|
||||
config.setdefault("tts", {}).setdefault("xai", {})["voice_id"] = voice_id.strip()
|
||||
print_success(f"xAI voice_id set to: {voice_id.strip()}")
|
||||
|
||||
|
||||
elif selected == "minimax":
|
||||
existing = get_env_value("MINIMAX_API_KEY")
|
||||
|
|
@ -1643,7 +1650,11 @@ def setup_terminal_backend(config: dict):
|
|||
def _apply_default_agent_settings(config: dict):
|
||||
"""Apply recommended defaults for all agent settings without prompting."""
|
||||
config.setdefault("agent", {})["max_turns"] = 90
|
||||
save_env_value("HERMES_MAX_ITERATIONS", "90")
|
||||
# config.yaml is the authoritative source for max_turns; the gateway
|
||||
# bridges it into HERMES_MAX_ITERATIONS at startup. We no longer write
|
||||
# to .env to avoid the dual-source inconsistency that caused the
|
||||
# 60-vs-500 bug (stale .env entry silently shadowing config.yaml).
|
||||
remove_env_value("HERMES_MAX_ITERATIONS")
|
||||
|
||||
config.setdefault("display", {})["tool_progress"] = "all"
|
||||
|
||||
|
|
@ -1673,9 +1684,10 @@ def setup_agent_settings(config: dict):
|
|||
print()
|
||||
|
||||
# ── Max Iterations ──
|
||||
current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
|
||||
cfg_get(config, "agent", "max_turns", default=90)
|
||||
)
|
||||
# config.yaml is authoritative; read from there. If a legacy .env
|
||||
# entry is still around (from pre-PR#18413 setups), prefer the
|
||||
# config value so we don't surface a stale number to the user.
|
||||
current_max = str(cfg_get(config, "agent", "max_turns", default=90))
|
||||
print_info("Maximum tool-calling iterations per conversation.")
|
||||
print_info("Higher = more complex tasks, but costs more tokens.")
|
||||
print_info(
|
||||
|
|
@ -1686,9 +1698,13 @@ def setup_agent_settings(config: dict):
|
|||
try:
|
||||
max_iter = int(max_iter_str)
|
||||
if max_iter > 0:
|
||||
save_env_value("HERMES_MAX_ITERATIONS", str(max_iter))
|
||||
# Write to config.yaml (authoritative) only. Also clean up any
|
||||
# stale .env entry from earlier setup runs — the gateway's
|
||||
# bridge in gateway/run.py now unconditionally derives
|
||||
# HERMES_MAX_ITERATIONS from agent.max_turns at startup.
|
||||
config.setdefault("agent", {})["max_turns"] = max_iter
|
||||
config.pop("max_turns", None)
|
||||
remove_env_value("HERMES_MAX_ITERATIONS")
|
||||
print_success(f"Max iterations set to {max_iter}")
|
||||
except ValueError:
|
||||
print_warning("Invalid number, keeping current value")
|
||||
|
|
|
|||
|
|
@ -1822,7 +1822,7 @@ def _reconfigure_tool(config: dict):
|
|||
cat = TOOL_CATEGORIES.get(ts_key)
|
||||
reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
|
||||
if cat or reqs:
|
||||
if _toolset_has_keys(ts_key, config):
|
||||
if _toolset_has_keys(ts_key, config) or _toolset_enabled_for_reconfigure(ts_key, config):
|
||||
configurable.append((ts_key, ts_label))
|
||||
|
||||
if not configurable:
|
||||
|
|
@ -1848,6 +1848,28 @@ def _reconfigure_tool(config: dict):
|
|||
save_config(config)
|
||||
|
||||
|
||||
def _toolset_enabled_for_reconfigure(ts_key: str, config: dict) -> bool:
|
||||
"""Return True if a configurable toolset is enabled anywhere.
|
||||
|
||||
Reconfigure must include enabled-but-unconfigured categories so users can
|
||||
finish provider/API-key setup without disabling and re-enabling the toolset.
|
||||
"""
|
||||
for platform in PLATFORMS:
|
||||
if not _toolset_allowed_for_platform(ts_key, platform):
|
||||
continue
|
||||
try:
|
||||
enabled = _get_platform_tools(
|
||||
config,
|
||||
platform,
|
||||
include_default_mcp_servers=False,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if ts_key in enabled:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
|
||||
"""Reconfigure a tool category - provider selection + API key update."""
|
||||
icon = cat.get("icon", "")
|
||||
|
|
|
|||
|
|
@ -8,14 +8,64 @@ import os
|
|||
from pathlib import Path
|
||||
|
||||
|
||||
_profile_fallback_warned: bool = False
|
||||
|
||||
|
||||
def get_hermes_home() -> Path:
|
||||
"""Return the Hermes home directory (default: ~/.hermes).
|
||||
|
||||
Reads HERMES_HOME env var, falls back to ~/.hermes.
|
||||
This is the single source of truth — all other copies should import this.
|
||||
|
||||
When ``HERMES_HOME`` is unset but an ``active_profile`` file indicates
|
||||
a non-default profile is active, logs a loud one-shot warning to
|
||||
``errors.log`` so cross-profile data corruption is diagnosable instead
|
||||
of silent. Behavior is unchanged otherwise — we still return
|
||||
``~/.hermes`` — because raising here would brick 30+ module-level
|
||||
callers that import this at load time. Subprocess spawners are
|
||||
expected to propagate ``HERMES_HOME`` explicitly (see the systemd
|
||||
template in ``hermes_cli/gateway.py`` and the kanban dispatcher in
|
||||
``hermes_cli/kanban_db.py``). See https://github.com/NousResearch/hermes-agent/issues/18594.
|
||||
"""
|
||||
val = os.environ.get("HERMES_HOME", "").strip()
|
||||
return Path(val) if val else Path.home() / ".hermes"
|
||||
if val:
|
||||
return Path(val)
|
||||
|
||||
# Guard: if a non-default profile is sticky-active, warn once that
|
||||
# the fallback to the default profile is almost certainly wrong.
|
||||
global _profile_fallback_warned
|
||||
if not _profile_fallback_warned:
|
||||
try:
|
||||
# Inline the default-root resolution from get_default_hermes_root()
|
||||
# to stay import-safe (this function is called from module scope
|
||||
# in 30+ files; we cannot afford to trigger logging setup here).
|
||||
active_path = (Path.home() / ".hermes" / "active_profile")
|
||||
active = active_path.read_text().strip() if active_path.exists() else ""
|
||||
except (UnicodeDecodeError, OSError):
|
||||
active = ""
|
||||
if active and active != "default":
|
||||
_profile_fallback_warned = True
|
||||
# Write directly to stderr. We intentionally do NOT route this
|
||||
# through ``logging`` because (a) this function is called at
|
||||
# module-import time from 30+ sites, often before logging is
|
||||
# configured, and (b) root-logger propagation would double-emit
|
||||
# on consoles where a StreamHandler is already attached.
|
||||
import sys
|
||||
msg = (
|
||||
f"[HERMES_HOME fallback] HERMES_HOME is unset but active "
|
||||
f"profile is {active!r}. Falling back to ~/.hermes, which "
|
||||
f"is the DEFAULT profile — not {active!r}. Any data this "
|
||||
f"process writes will land in the wrong profile. The "
|
||||
f"subprocess spawner should pass HERMES_HOME explicitly "
|
||||
f"(see issue #18594)."
|
||||
)
|
||||
try:
|
||||
sys.stderr.write(msg + "\n")
|
||||
sys.stderr.flush()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return Path.home() / ".hermes"
|
||||
|
||||
|
||||
def get_default_hermes_root() -> Path:
|
||||
|
|
|
|||
206
optional-skills/creative/kanban-video-orchestrator/SKILL.md
Normal file
206
optional-skills/creative/kanban-video-orchestrator/SKILL.md
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
---
|
||||
name: kanban-video-orchestrator
|
||||
description: Plan, set up, and monitor a multi-agent video production pipeline backed by Hermes Kanban. Use when the user wants to make ANY video — narrative film, product/marketing, music video, explainer, ASCII/terminal art, abstract/generative loop, comic, 3D, real-time/installation — and the work warrants decomposition into specialized profiles (writer, designer, animator, renderer, voice, editor, etc.) coordinated through a kanban board. Performs adaptive discovery to scope the brief, designs an appropriate team for the requested style, generates the setup script that creates Hermes profiles + initial kanban task, then helps monitor execution and intervene when tasks stall or fail. Routes scenes to whichever Hermes rendering / audio / design skill fits each beat (`ascii-video`, `manim-video`, `p5js`, `comfyui`, `touchdesigner-mcp`, `blender-mcp`, `pixel-art`, `baoyu-comic`, `claude-design`, `excalidraw`, `songsee`, `heartmula`, …) plus external APIs for TTS, image-gen, and image-to-video as needed.
|
||||
version: 1.0.0
|
||||
author: [SHL0MS, alt-glitch]
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [video, kanban, multi-agent, orchestration, production-pipeline]
|
||||
related_skills: [kanban-orchestrator, kanban-worker, ascii-video, manim-video, p5js, comfyui, touchdesigner-mcp, blender-mcp, pixel-art, ascii-art, songwriting-and-ai-music, heartmula, songsee, spotify, youtube-content, claude-design, excalidraw, architecture-diagram, concept-diagrams, baoyu-comic, baoyu-infographic, humanizer, gif-search, meme-generation]
|
||||
credits: |
|
||||
The single-project workspace layout, profile-config patching pattern,
|
||||
SOUL.md-per-profile model, TEAM.md task-graph convention, and
|
||||
`--workspace dir:<path>` discipline are adapted from alt-glitch's
|
||||
original multi-agent video pipeline at
|
||||
https://github.com/NousResearch/kanban-video-pipeline.
|
||||
---
|
||||
|
||||
# Kanban Video Orchestrator
|
||||
|
||||
Wrap any video request — from a 15-second product teaser to a 5-minute narrative
|
||||
short to a music video to an ASCII loop — in a Hermes Kanban pipeline that
|
||||
decomposes the work to specialized agent profiles.
|
||||
|
||||
This skill does **not** render anything itself. It is a meta-pipeline that:
|
||||
|
||||
1. **Scopes** the request through targeted discovery
|
||||
2. **Designs** an appropriate team (which roles, which tools per role) based on the style
|
||||
3. **Generates** a setup script that creates Hermes profiles, project workspace, and the initial kanban task
|
||||
4. **Hands off** to the director profile, which decomposes via the kanban
|
||||
5. **Monitors** execution, helps intervene when tasks stall or fail
|
||||
|
||||
The actual rendering happens inside the kanban once it's running, via whichever
|
||||
existing skills + tools fit the scenes — `ascii-video`, `manim-video`, `p5js`,
|
||||
`comfyui`, `touchdesigner-mcp`, `blender-mcp`, `songwriting-and-ai-music`,
|
||||
`heartmula`, external APIs, or plain Python with PIL + ffmpeg.
|
||||
|
||||
## When NOT to use this skill
|
||||
|
||||
- The video is one continuous procedural project that needs no specialists. Just write the code directly.
|
||||
- The user wants a quick one-shot conversion (e.g. "convert this mp4 to a GIF") — use ffmpeg directly.
|
||||
- The output is a static image, GIF, or audio-only artifact — use the matching specific skill (`ascii-art`, `gifs`, `meme-generation`, `songwriting-and-ai-music`).
|
||||
- The work fits a single existing skill cleanly (e.g. a pure ASCII video — just use `ascii-video`).
|
||||
|
||||
## Workflow
|
||||
|
||||
```
|
||||
DISCOVER → BRIEF → TEAM DESIGN → SETUP → EXECUTE → MONITOR
|
||||
```
|
||||
|
||||
### Step 1 — Discover (ask the right questions)
|
||||
|
||||
The discovery process is **adaptive**: ask only what is actually needed. Always
|
||||
start with three questions to identify the broad shape:
|
||||
|
||||
- **What is the video?** (one-sentence brief)
|
||||
- **How long?** (5-30s teaser / 30-90s short / 90s-3min explainer / 3-10min film / longer)
|
||||
- **What aspect ratio + target platform?** (1:1 / 9:16 / 16:9; X, IG, YouTube, internal, etc.)
|
||||
|
||||
From the answer, classify the style category. The style determines which
|
||||
follow-up questions to ask. **Do not ask all questions at once.** Ask 2-4 at a
|
||||
time, listen, then proceed. Make reasonable assumptions whenever the user
|
||||
implies an answer.
|
||||
|
||||
For complete intake patterns and per-style question banks, see
|
||||
**[references/intake.md](references/intake.md)**.
|
||||
|
||||
### Step 2 — Brief
|
||||
|
||||
Once enough is known, produce a structured `brief.md` using the template in
|
||||
`assets/brief.md.tmpl`. Stages:
|
||||
|
||||
1. **Concept** — the one-sentence pitch + emotional north star
|
||||
2. **Scope** — duration, aspect, platform, deadline
|
||||
3. **Style** — visual references, brand constraints, tone
|
||||
4. **Scenes** — beat-by-beat breakdown (durations, content, target tool)
|
||||
5. **Audio** — narration / music / SFX / silent (per scene if needed)
|
||||
6. **Deliverables** — file format, resolution, optional alternates (vertical cut, GIF, etc.)
|
||||
|
||||
Show the brief to the user for confirmation before designing the team. **The
|
||||
brief is the contract** — every downstream task references it.
|
||||
|
||||
### Step 3 — Team design
|
||||
|
||||
Pick role archetypes from the library that fit this video. **Compose, don't
|
||||
clone.** Most videos need 4-7 profiles. The director is always present; the
|
||||
rest are picked by what the brief actually requires.
|
||||
|
||||
For the role library and per-style team compositions, see
|
||||
**[references/role-archetypes.md](references/role-archetypes.md)**.
|
||||
|
||||
For mapping role → which Hermes skills + toolsets it loads, see
|
||||
**[references/tool-matrix.md](references/tool-matrix.md)**.
|
||||
|
||||
### Step 4 — Setup
|
||||
|
||||
Generate a setup script (`setup.sh`) and run it. The script:
|
||||
|
||||
1. Creates the project workspace (`~/projects/video-pipeline/<slug>/`)
|
||||
2. Copies any provided assets into `taste/`, `audio/`, `assets/`
|
||||
3. Creates each Hermes profile via `hermes profile create --clone`
|
||||
4. Writes per-profile `SOUL.md` (personality + role definition)
|
||||
5. Configures profile YAML (toolsets, always_load skills, cwd)
|
||||
6. Writes `brief.md`, `TEAM.md`, and `taste/` content
|
||||
7. Fires the initial `hermes kanban create` task assigned to the director
|
||||
|
||||
Use `scripts/bootstrap_pipeline.py` to generate setup.sh from a brief +
|
||||
team-design JSON. See **[references/kanban-setup.md](references/kanban-setup.md)**
|
||||
for the setup script structure, profile config patterns, and the critical
|
||||
"shared workspace" rule.
|
||||
|
||||
### Step 5 — Execute
|
||||
|
||||
Run `setup.sh`. Then provide the user with monitoring commands:
|
||||
|
||||
```bash
|
||||
hermes kanban watch --tenant <project-tenant> # live events
|
||||
hermes kanban list --tenant <project-tenant> # board snapshot
|
||||
hermes dashboard # visual board UI
|
||||
```
|
||||
|
||||
The director profile takes over from here, decomposing the work and routing
|
||||
tasks to specialist profiles via the kanban toolset.
|
||||
|
||||
### Step 6 — Monitor and intervene
|
||||
|
||||
Stay engaged — the kanban runs autonomously but a stuck task or bad output
|
||||
needs human (or AI) judgment.
|
||||
|
||||
Monitoring patterns: poll `kanban list` periodically, inspect any RUNNING task
|
||||
that exceeds its expected duration with `kanban show <id>`, and check
|
||||
heartbeats. When a worker's output fails review, the standard interventions are:
|
||||
|
||||
1. Comment on the worker's task with specific feedback (`kanban_comment`)
|
||||
2. Create a re-run task with the original as parent
|
||||
3. Adjust the brief's scope and let the director re-decompose
|
||||
|
||||
For diagnostic patterns, intervention recipes, and the "task is stuck"
|
||||
playbook, see **[references/monitoring.md](references/monitoring.md)**.
|
||||
|
||||
## Reference: worked examples
|
||||
|
||||
Six concrete pipelines covering very different video styles — narrative film,
|
||||
product/marketing, music video, math/algorithm explainer, ASCII video, real-time
|
||||
installation — showing how the same workflow yields very different teams and
|
||||
task graphs. See **[references/examples.md](references/examples.md)**.
|
||||
|
||||
## Critical rules
|
||||
|
||||
1. **Discovery before action.** Never start generating a brief or team without
|
||||
asking at least the three baseline questions. A bad brief cascades through
|
||||
the entire pipeline.
|
||||
|
||||
2. **Match the team to the video.** Don't reuse the same 4-profile setup for
|
||||
every job. A music video that doesn't have a beat-analysis profile will
|
||||
misfire. A narrative film that doesn't have a writer profile will produce
|
||||
incoherent scenes. See `references/role-archetypes.md`.
|
||||
|
||||
3. **One workspace per project.** All profiles for a given video share the same
|
||||
`dir:` workspace. Tasks pass artifacts via shared filesystem and structured
|
||||
handoffs. **Every** `kanban_create` call passes
|
||||
`workspace_kind="dir"` + `workspace_path="<absolute project path>"`.
|
||||
|
||||
4. **Tenant every project.** Use a project-specific tenant
|
||||
(`--tenant <project-slug>`). Keeps the dashboard scoped and prevents
|
||||
cross-pollination with other ongoing kanbans.
|
||||
|
||||
5. **Respect existing skills.** When a scene fits an existing skill, the
|
||||
relevant renderer should load that skill via `--skill <name>` on its task
|
||||
or `always_load` in its profile. Do not re-derive what a skill already
|
||||
provides.
|
||||
|
||||
6. **The director never executes.** Even with the full `kanban + terminal +
|
||||
file` toolset, the director's `SOUL.md` rules forbid it from executing
|
||||
work itself. It decomposes and routes only — every concrete task becomes
|
||||
a `hermes kanban create` call to a specialist profile. The
|
||||
`kanban-orchestrator` skill spells this out further.
|
||||
|
||||
7. **Don't over-decompose.** A 30-second product video does NOT need 20 tasks.
|
||||
Aim for the smallest task graph that still parallelizes well and exposes the
|
||||
right human-review gates.
|
||||
|
||||
8. **Verify API keys BEFORE firing.** External APIs (TTS, image-gen,
|
||||
image-to-video) need keys in `~/.hermes/.env` or the user's secret store.
|
||||
A worker that hits a missing-key error wastes a task slot. The setup
|
||||
script's `check_key` helper aborts cleanly if a required key is missing.
|
||||
|
||||
## File map
|
||||
|
||||
```
|
||||
SKILL.md ← this file (workflow + rules)
|
||||
references/
|
||||
intake.md ← discovery question banks per style
|
||||
role-archetypes.md ← role library (writer, designer, animator, …)
|
||||
tool-matrix.md ← skill + toolset mapping per role
|
||||
kanban-setup.md ← setup script structure & profile config
|
||||
monitoring.md ← watch + intervene patterns
|
||||
examples.md ← six worked pipelines
|
||||
assets/
|
||||
brief.md.tmpl ← brief skeleton
|
||||
setup.sh.tmpl ← setup script skeleton
|
||||
soul.md.tmpl ← profile personality skeleton
|
||||
scripts/
|
||||
bootstrap_pipeline.py ← generate setup.sh from brief + team JSON
|
||||
monitor.py ← polling + intervention helpers
|
||||
```
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
# Video Brief — {{TITLE}}
|
||||
|
||||
> Slug: `{{SLUG}}` · Tenant: `{{TENANT}}` · Project workspace: `{{WORKSPACE}}`
|
||||
|
||||
## 1. Concept
|
||||
|
||||
**One-line pitch.** {{ONE_LINE_PITCH}}
|
||||
|
||||
**Emotional north star.** {{EMOTIONAL_NORTH_STAR}}
|
||||
*(What should the viewer feel walking away?)*
|
||||
|
||||
## 2. Scope
|
||||
|
||||
| | |
|
||||
|---|---|
|
||||
| Duration | {{DURATION_S}} seconds |
|
||||
| Aspect ratio | {{ASPECT}} |
|
||||
| Resolution | {{RESOLUTION}} |
|
||||
| Frame rate | {{FPS}} fps |
|
||||
| Target platforms | {{PLATFORMS}} |
|
||||
| Deadline | {{DEADLINE}} |
|
||||
| Quality bar | {{QUALITY_BAR}} *(rough draft / polished / archival)* |
|
||||
|
||||
## 3. Style
|
||||
|
||||
**Visual references.** {{VISUAL_REFS}}
|
||||
|
||||
**Tone.** {{TONE}}
|
||||
|
||||
**Brand constraints.** {{BRAND_CONSTRAINTS}}
|
||||
*(colors, typography, motion language; or "n/a")*
|
||||
|
||||
**Aesthetic rules.**
|
||||
{{AESTHETIC_RULES}}
|
||||
|
||||
## 4. Scenes
|
||||
|
||||
Beat-by-beat breakdown. Each scene gets a row.
|
||||
|
||||
| # | Time | Content | Target tool / skill | Audio | Notes |
|
||||
|---|------|---------|---------------------|-------|-------|
|
||||
| 1 | 0:00–0:0X | {{SCENE_1_CONTENT}} | {{SCENE_1_TOOL}} | {{SCENE_1_AUDIO}} | {{SCENE_1_NOTES}} |
|
||||
| 2 | 0:0X–0:0Y | ... | ... | ... | ... |
|
||||
|
||||
## 5. Audio
|
||||
|
||||
**Approach.** {{AUDIO_APPROACH}}
|
||||
*(narration / music-only / synced to track / silent / mixed)*
|
||||
|
||||
**Voiceover.** {{VO_DETAILS}}
|
||||
*(provider, voice, language, script source — "n/a" if no VO)*
|
||||
|
||||
**Music.** {{MUSIC_DETAILS}}
|
||||
*(provided track path / commission via Suno / commission via heartmula /
|
||||
license-free / "n/a")*
|
||||
|
||||
**SFX.** {{SFX_DETAILS}}
|
||||
*(generated, library, or "n/a")*
|
||||
|
||||
## 6. Deliverables
|
||||
|
||||
| Format | Resolution | Notes |
|
||||
|--------|-----------|-------|
|
||||
| {{PRIMARY_FORMAT}} | {{PRIMARY_RES}} | The main output |
|
||||
| {{ALT_FORMAT_1}} | {{ALT_RES_1}} | {{ALT_NOTES_1}} |
|
||||
|
||||
**Final filename.** `output/final.mp4`
|
||||
*(plus optional `output/final-9x16.mp4`, `output/captions.srt`, etc.)*
|
||||
|
||||
## 7. Constraints
|
||||
|
||||
- API keys required: {{API_KEYS_REQUIRED}}
|
||||
- External dependencies: {{EXT_DEPS}}
|
||||
- Source assets to incorporate: {{SOURCE_ASSETS}}
|
||||
|
||||
---
|
||||
|
||||
**This brief is the contract. The director and every downstream profile read
|
||||
it. If the brief changes, the kanban must be re-fired — don't edit live.**
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
#!/usr/bin/env bash
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Video Pipeline Setup — {{TITLE}}
|
||||
#
|
||||
# Generated by kanban-video-orchestrator skill.
|
||||
#
|
||||
# Slug: {{SLUG}}
|
||||
# Workspace: {{WORKSPACE}}
|
||||
# Tenant: {{TENANT}}
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
set -euo pipefail
|
||||
|
||||
PROJECT_SLUG="{{SLUG}}"
|
||||
WORKSPACE="$HOME/projects/video-pipeline/${PROJECT_SLUG}"
|
||||
TENANT="{{TENANT}}"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 1. Verify required API keys
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Checking required API keys ═══"
|
||||
|
||||
check_key() {
|
||||
local var="$1"
|
||||
local kc_account="${2:-hermes}"
|
||||
local kc_service="${3:-$1}"
|
||||
if grep -q "^${var}=" "$HOME/.hermes/.env" 2>/dev/null && \
|
||||
[ -n "$(grep "^${var}=" "$HOME/.hermes/.env" | cut -d= -f2-)" ]; then
|
||||
echo " ✓ ${var} (env)"
|
||||
return 0
|
||||
fi
|
||||
if command -v security >/dev/null 2>&1 && \
|
||||
security find-generic-password -a "${kc_account}" -s "${kc_service}" -w >/dev/null 2>&1; then
|
||||
echo " ✓ ${var} (Keychain ${kc_account}/${kc_service})"
|
||||
return 0
|
||||
fi
|
||||
echo " ✗ ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})"
|
||||
return 1
|
||||
}
|
||||
|
||||
# Customize this list per project — only check keys actually used:
|
||||
{{KEY_CHECKS}}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 2. Create project workspace
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Creating project workspace ═══"
|
||||
mkdir -p "$WORKSPACE"/{taste,audio/{voiceover,sfx},assets,scenes,checkpoints,tools,output}
|
||||
{{SCENE_DIRS}}
|
||||
echo " ✓ $WORKSPACE"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 3. Create Hermes profiles
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Creating Hermes profiles ═══"
|
||||
|
||||
{{PROFILE_CREATE_COMMANDS}}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 4. Configure profiles (toolsets, skills, cwd)
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Configuring profiles ═══"
|
||||
|
||||
configure_profile() {
|
||||
local profile="$1"
|
||||
local toolsets_json="$2" # JSON array string, e.g. '["kanban","terminal","file"]'
|
||||
local skills_json="$3" # JSON array string, e.g. '["kanban-worker","ascii-video"]'
|
||||
python3 - "$profile" "$toolsets_json" "$skills_json" "$WORKSPACE" <<'PY'
|
||||
"""Patch a Hermes profile config.yaml using PyYAML so we don't depend on the
|
||||
exact default-config string format. Validates the patch took effect and exits
|
||||
non-zero if anything's off."""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("ERROR: PyYAML required. pip install pyyaml", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
profile, toolsets_json, skills_json, workspace = sys.argv[1:5]
|
||||
toolsets = json.loads(toolsets_json)
|
||||
skills = json.loads(skills_json)
|
||||
|
||||
p = os.path.expanduser(f"~/.hermes/profiles/{profile}/config.yaml")
|
||||
if not os.path.exists(p):
|
||||
print(f" ✗ profile config not found: {p}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
with open(p) as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
|
||||
# Apply our changes — only the keys we actually want to set.
|
||||
cfg["toolsets"] = toolsets
|
||||
cfg.setdefault("skills", {})
|
||||
cfg["skills"]["always_load"] = skills
|
||||
|
||||
# Note: we do NOT touch cfg["approvals"] — that's a security-sensitive
|
||||
# setting (manual confirmation of tool calls). Workspace cwd is overridden
|
||||
# per-task by `--workspace dir:<path>` on `hermes kanban create`, so we
|
||||
# don't need to mutate cfg["terminal"]["cwd"] either.
|
||||
|
||||
with open(p, "w") as f:
|
||||
yaml.safe_dump(cfg, f, sort_keys=False)
|
||||
|
||||
# Validate
|
||||
with open(p) as f:
|
||||
after = yaml.safe_load(f)
|
||||
errors = []
|
||||
if after.get("toolsets") != toolsets:
|
||||
errors.append(f"toolsets mismatch: {after.get('toolsets')!r}")
|
||||
if after.get("skills", {}).get("always_load") != skills:
|
||||
errors.append(f"skills.always_load mismatch: {after.get('skills', {}).get('always_load')!r}")
|
||||
if errors:
|
||||
print(f" ✗ {profile}: " + "; ".join(errors), file=sys.stderr)
|
||||
sys.exit(1)
|
||||
PY
|
||||
if [ $? -ne 0 ]; then
|
||||
echo " ✗ failed to configure ${profile}" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo " ✓ ${profile}"
|
||||
}
|
||||
|
||||
{{PROFILE_CONFIG_COMMANDS}}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 5. Write SOUL.md per profile
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Writing profile personalities ═══"
|
||||
|
||||
{{SOUL_WRITES}}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 6. Copy brief, TEAM.md, and any provided assets
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Writing brief + taste ═══"
|
||||
|
||||
cat > "$WORKSPACE/brief.md" <<'BRIEF_EOF'
|
||||
{{BRIEF_CONTENTS}}
|
||||
BRIEF_EOF
|
||||
|
||||
cat > "$WORKSPACE/TEAM.md" <<'TEAM_EOF'
|
||||
{{TEAM_CONTENTS}}
|
||||
TEAM_EOF
|
||||
|
||||
{{TASTE_WRITES}}
|
||||
|
||||
{{ASSET_COPIES}}
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# 7. Fire the initial kanban task
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
echo "═══ Firing initial kanban task ═══"
|
||||
|
||||
hermes kanban create "Direct production of {{TITLE}}" \
|
||||
--assignee director \
|
||||
--workspace dir:"$WORKSPACE" \
|
||||
--tenant "$TENANT" \
|
||||
--priority 2 \
|
||||
--max-runtime 4h \
|
||||
--body "$(cat <<EOF
|
||||
Read brief.md, TEAM.md, and taste/.
|
||||
|
||||
Decompose into the team graph defined in TEAM.md.
|
||||
|
||||
All child tasks MUST use:
|
||||
workspace_kind="dir"
|
||||
workspace_path="$WORKSPACE"
|
||||
tenant="$TENANT"
|
||||
|
||||
Do not execute the work yourself — route every concrete subtask to the
|
||||
appropriate profile via kanban_create.
|
||||
EOF
|
||||
)"
|
||||
|
||||
echo ""
|
||||
echo "═══ Setup complete ═══"
|
||||
echo ""
|
||||
echo "Monitor with:"
|
||||
echo " hermes kanban watch --tenant $TENANT"
|
||||
echo " hermes kanban list --tenant $TENANT"
|
||||
echo " hermes dashboard"
|
||||
echo ""
|
||||
echo "Workspace: $WORKSPACE"
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
# {{ROLE_NAME}}
|
||||
|
||||
You are the **{{ROLE_NAME}}** for this video production.
|
||||
|
||||
## Project context
|
||||
|
||||
- **Brief:** read `brief.md` in your CWD
|
||||
- **Team graph:** read `TEAM.md` in your CWD
|
||||
- **Style spec:** read `taste/brand-guide.md` and `taste/emotional-dna.md` in
|
||||
your CWD
|
||||
|
||||
## What you do
|
||||
|
||||
{{ROLE_RESPONSIBILITIES}}
|
||||
|
||||
## Inputs you read
|
||||
|
||||
{{INPUTS_READ}}
|
||||
|
||||
## Outputs you produce
|
||||
|
||||
{{OUTPUTS_PRODUCED}}
|
||||
|
||||
## Tools and skills available
|
||||
|
||||
- **Toolsets:** {{TOOLSETS}}
|
||||
- **Skills loaded:** {{SKILLS}}
|
||||
- **External APIs / CLIs:** {{EXTERNAL_TOOLS}}
|
||||
|
||||
## Rules
|
||||
|
||||
{{ROLE_RULES}}
|
||||
|
||||
{{COMMON_RULES}}
|
||||
|
||||
## Common reference commands
|
||||
|
||||
{{COMMON_COMMANDS}}
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
# Worked Examples
|
||||
|
||||
Six concrete pipelines covering different video styles. Each shows the team
|
||||
composition, task graph, and skill/tool choices the orchestrator would make
|
||||
for that brief. **These are illustrative, not templates** — adapt to the
|
||||
actual brief.
|
||||
|
||||
## Example 1 — Narrative short film (text-to-image → image-to-video → cut)
|
||||
|
||||
**Brief:** A 90-second noir-style short. A detective walks through a rainy
|
||||
city. Voiceover narration. AI-generated visuals.
|
||||
|
||||
**Team:**
|
||||
- `director` — vision, decomposition, approval
|
||||
- `writer` — script + voiceover copy (loads `humanizer` for natural voice)
|
||||
- `storyboarder` — beat-by-beat shot list (loads `excalidraw`)
|
||||
- `image-generator` — generates each shot's still via local ComfyUI workflows
|
||||
(loads `comfyui`)
|
||||
- `image-to-video-generator` — animates each still (Runway/Kling, OR
|
||||
ComfyUI's AnimateDiff/WAN workflows via `comfyui`)
|
||||
- `voice-talent` — narration via ElevenLabs
|
||||
- `audio-mixer` — VO + ambient pad
|
||||
- `editor` — assembly + transitions
|
||||
- `reviewer` — final QA
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 writer script + voiceover.md (parent: T0)
|
||||
T2 storyboarder shot list with framing per beat (parent: T1)
|
||||
T3 image-generator one still per shot (~12 shots) (parent: T2)
|
||||
T4 image-to-video animate each still (parent: T3)
|
||||
T5 voice-talent generate narration audio (parent: T1)
|
||||
T6 audio-mixer mix VO + ambient (parent: T5)
|
||||
T7 editor cut + transitions + audio mux (parents: T4, T6)
|
||||
T8 reviewer final QA (parent: T7)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- Local ComfyUI via `comfyui` skill is preferred over external API for
|
||||
cost/control — but external APIs are fine if ComfyUI isn't installed
|
||||
- `editor` profile is ffmpeg-only, no Hermes skill required beyond
|
||||
`kanban-worker`
|
||||
- Storyboarder produces `storyboard.excalidraw` alongside the markdown
|
||||
|
||||
## Example 2 — Product / marketing teaser
|
||||
|
||||
**Brief:** A 30-second product teaser for a developer tool. Shows code +
|
||||
terminal + UI screen recordings, voiceover, CTA at end. Square 1:1.
|
||||
|
||||
**Team:**
|
||||
- `director`
|
||||
- `copywriter` — taglines, voiceover script, CTA (loads `humanizer`)
|
||||
- `concept-artist` — style frames (loads `claude-design` for UI mockups)
|
||||
- `renderer-motion-graphics` — animated UI sequences (Remotion CLI)
|
||||
- `renderer-ascii` — terminal-style demo scenes (loads `ascii-video`)
|
||||
- `voice-talent` — VO via ElevenLabs
|
||||
- `editor` — assembly + brand-color treatment
|
||||
- `audio-mixer` — VO + light music bed
|
||||
- `captioner` — burned subtitles for muted-autoplay platforms
|
||||
- `masterer` — produces 1:1 + 9:16 + 16:9 variants
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 copywriter copy.md + cta + vo script (parent: T0)
|
||||
T2 concept-artist visual-spec.md + style frames (parent: T1)
|
||||
T3a renderer-motion-graphics scene 1: UI sequence (parent: T2)
|
||||
T3b renderer-ascii scene 2: terminal demo (parent: T2)
|
||||
T3c renderer-motion-graphics scene 3: feature highlight (parent: T2)
|
||||
T3d renderer-motion-graphics scene 4: CTA card (parent: T2)
|
||||
T4 voice-talent narration (parent: T1)
|
||||
T5 audio-mixer VO + music bed (parent: T4)
|
||||
T6 editor cut + transitions (parents: T3*, T5)
|
||||
T7 captioner SRT + burned subtitles (parent: T6)
|
||||
T8 masterer 1:1, 9:16, 16:9 variants (parent: T7)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- Multiple specialized renderers (motion-graphics + ASCII) coexist
|
||||
- Captioner is included because muted autoplay is the norm on social
|
||||
- `claude-design` skill for UI mockups maps directly to the product video idiom
|
||||
|
||||
## Example 3 — Music video (synced to provided track)
|
||||
|
||||
**Brief:** A 3-minute music video for a provided lo-fi hip-hop track. Visuals
|
||||
should pulse with the beat. Generative + ASCII hybrid. Vertical 9:16.
|
||||
|
||||
**Team:**
|
||||
- `director`
|
||||
- `music-supervisor` — analyze track, emit `audio/beats.json` (loads `songsee`)
|
||||
- `storyboarder` — beat-aligned shot list (loads `excalidraw`)
|
||||
- `renderer-ascii` — ASCII scenes synced to bass kicks (loads `ascii-video`)
|
||||
- `renderer-p5js` — generative particle scenes synced to highs (loads `p5js`)
|
||||
- `editor` — beat-cut assembly using `beats.json`
|
||||
- `reviewer` — sync QA
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 music-supervisor analyze track → beats.json + spectrogram (parent: T0)
|
||||
T2 storyboarder shot list aligned to beats (parents: T1, T0)
|
||||
T3a renderer-ascii scene 1: bass-driven ASCII (parent: T2)
|
||||
T3b renderer-p5js scene 2: high-end particle field (parent: T2)
|
||||
... (more scenes)
|
||||
T4 editor cut to beats + mux track (parents: T3*, T1)
|
||||
T5 reviewer sync QA + final approval (parent: T4)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- `music-supervisor` runs FIRST — `beats.json` gates the renderers
|
||||
- `editor` uses `beats.json` directly to align cuts to bass kicks
|
||||
- No voice-talent — music is the audio
|
||||
- Two specialized renderers (`ascii-video` + `p5js`) for visual variety
|
||||
|
||||
## Example 4 — Math/algorithm explainer
|
||||
|
||||
**Brief:** A 2-minute explainer of an algorithm. 3Blue1Brown-style. Animated
|
||||
diagrams, equations, narration. Square 1:1.
|
||||
|
||||
**Team:**
|
||||
- `director`
|
||||
- `writer` — narration script (loads `humanizer`)
|
||||
- `cinematographer` — visual spec (loads `manim-video`)
|
||||
- `renderer-manim` — all animated scenes (loads `manim-video`)
|
||||
- `voice-talent` — narration via ElevenLabs
|
||||
- `editor` — assembly + audio mux
|
||||
- `captioner` — burned subtitles
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 writer script + narration (parent: T0)
|
||||
T2 cinematographer visual spec for all scenes (parent: T1)
|
||||
T3a-Tn renderer-manim scenes 1..N (parents: T2)
|
||||
T4 voice-talent narration audio (parent: T1)
|
||||
T5 editor cut + mux (parents: T3*, T4)
|
||||
T6 captioner SRT + burn (parent: T5)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- `manim-video` skill drives both the cinematographer (visual language) and
|
||||
the renderer (actual scene production)
|
||||
- The `manim-video` skill's reference docs (animation-design-thinking,
|
||||
scene-planning, equations) auto-load when needed via the renderer's pinned skill
|
||||
|
||||
## Example 5 — ASCII video, music-track-only
|
||||
|
||||
**Brief:** A 60-second pure-ASCII video reactive to an existing track. No
|
||||
voiceover, no other tools. Square 1:1.
|
||||
|
||||
**Team:**
|
||||
- `director`
|
||||
- `music-supervisor` — track analysis (loads `songsee`)
|
||||
- `renderer-ascii` — all visuals (loads `ascii-video`)
|
||||
- `editor` — assembly + audio mux
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 music-supervisor analyze track (parent: T0)
|
||||
T2a renderer-ascii scene 1 (parents: T1, T0)
|
||||
T2b renderer-ascii scene 2 (parents: T1, T0)
|
||||
T2c renderer-ascii scene 3 (parents: T1, T0)
|
||||
T3 editor stitch + mux audio (parents: T2*)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- Minimal team (4 profiles) for a focused single-tool project
|
||||
- No reviewer — short experimental piece, director approves directly
|
||||
- All scenes run through one `renderer-ascii` profile because the `ascii-video`
|
||||
skill covers everything
|
||||
|
||||
This example illustrates the rule: **don't over-decompose**. Three scenes
|
||||
through one renderer is fine. Don't spawn three renderer profiles.
|
||||
|
||||
## Example 6 — Real-time / installation art
|
||||
|
||||
**Brief:** A 2-minute audio-reactive visual for a gallery installation. Driven
|
||||
by an audio input feed. TouchDesigner-based. 16:9 4K.
|
||||
|
||||
**Team:**
|
||||
- `director`
|
||||
- `cinematographer` — visual language spec (loads `touchdesigner-mcp`)
|
||||
- `renderer-touchdesigner` — all visuals + record-to-disk
|
||||
(loads `touchdesigner-mcp`)
|
||||
- `audio-mixer` — final loudness pass on the captured audio (optional if
|
||||
pre-mixed source)
|
||||
- `editor` — assemble final clip from TouchDesigner recording
|
||||
- `reviewer` — visual QA
|
||||
|
||||
**Task graph:**
|
||||
```
|
||||
T0 director decompose
|
||||
T1 cinematographer TD operator graph spec (parent: T0)
|
||||
T2 renderer-touchdesigner build TD network + record output (parent: T1)
|
||||
T3 editor trim + audio mux (parent: T2)
|
||||
T4 reviewer final QA (parent: T3)
|
||||
```
|
||||
|
||||
**Key choices:**
|
||||
- `touchdesigner-mcp` controls a running TouchDesigner instance — the
|
||||
cinematographer designs the operator graph, renderer builds it
|
||||
- Output is a recording from the running TD network, not a render-to-frames
|
||||
process; editor mostly just trims
|
||||
|
||||
## Pattern recognition
|
||||
|
||||
When the user describes a video, look for these signals to map to an example:
|
||||
|
||||
- **Plot, characters, scripted dialogue** → Example 1 (narrative)
|
||||
- **Specific product, CTA, brand colors, voiceover** → Example 2 (marketing)
|
||||
- **Track file provided, "synced to music"** → Example 3 (music video)
|
||||
- **"Explain how X works", math/algorithm/concept walkthrough** → Example 4 (manim explainer)
|
||||
- **Terminal aesthetic, ASCII, retro pixel** → Example 5 (ASCII)
|
||||
- **"Audio-reactive", "real-time", "installation"** → Example 6 (TouchDesigner)
|
||||
- **Comic-style narrative** → use `renderer-comic` (`baoyu-comic` skill)
|
||||
- **Retro game / pixel-art aesthetic** → use `renderer-pixel` (`pixel-art` skill)
|
||||
- **3D scene, photoreal environment** → use `renderer-3d` (`blender-mcp`)
|
||||
- **Generative art, particle system, shader** → use `renderer-p5js` (`p5js`)
|
||||
- **AI-generated photoreal stills + animation** → use `renderer-comfyui`
|
||||
(`comfyui`) for both stills and image-to-video
|
||||
- **"video about how the system works", recursive demo** → composable from
|
||||
any of the above; the recursion is a rendering technique, not a style
|
||||
|
||||
The actual team should be derived from the specific brief — these examples are
|
||||
starting points, not endpoints.
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
# Intake — Discovery Question Banks
|
||||
|
||||
The discovery process is **adaptive**. Always start with three baseline
|
||||
questions to identify the broad style category, then drill into a per-style
|
||||
question bank. Ask 2-4 questions at a time, listen, then proceed. Make
|
||||
reasonable assumptions whenever the user implies an answer.
|
||||
|
||||
## Tier 0 — Baseline (always ask)
|
||||
|
||||
1. **What is the video?** — One-sentence pitch
|
||||
2. **How long?** — Approximate duration
|
||||
3. **Aspect ratio + target platform?** — 16:9 / 9:16 / 1:1 / 4:5; X, IG, YouTube, internal, etc.
|
||||
|
||||
From these answers, classify the style category and pick the relevant Tier 1
|
||||
follow-ups. **Do not** continue asking until you have at least these three.
|
||||
|
||||
## Style classification
|
||||
|
||||
Map the brief to one of these archetypes (or a hybrid):
|
||||
|
||||
| Archetype | Tells |
|
||||
|-----------|-------|
|
||||
| **Narrative film** | Plot, characters, scenes-with-events, dialogue, location |
|
||||
| **Product / marketing** | A specific product or feature being shown / sold; CTA at end |
|
||||
| **Music video** | A specific track exists; visuals sync to music |
|
||||
| **Explainer / educational** | A concept being taught; voiceover-driven |
|
||||
| **Tutorial / changelog** | Software demo, terminal-heavy, technical |
|
||||
| **ASCII / terminal art** | Retro terminal aesthetic explicit, character-grid |
|
||||
| **Abstract / loop** | Generative, no plot, often perfect-loop |
|
||||
| **Documentary / interview cut** | Real footage, transcription-driven |
|
||||
| **Real-time / installation** | Audio-reactive, gallery installation, VJ output |
|
||||
|
||||
If ambiguous, **ask** which category fits — don't guess. Hybrids are common
|
||||
(e.g., a product video with a narrative arc); decompose into the dominant
|
||||
mode + secondary modifiers.
|
||||
|
||||
**Recursive / meta** ("a video that shows its own production") is a
|
||||
*rendering technique*, not a separate style — compose it from any of the
|
||||
above by adding a two-pass render step where pass 2 uses pass 1's output as
|
||||
texture inside the final scene.
|
||||
|
||||
## Tier 1 — Per-style follow-ups
|
||||
|
||||
### Narrative film
|
||||
|
||||
- **Setting / world?** — When and where the story takes place
|
||||
- **Characters?** — How many, archetypes, who carries dialogue
|
||||
- **Beat list or full script?** — Has the user written the story or do we draft it
|
||||
- **Dialogue language?** — Spoken lines, on-screen subs only, silent
|
||||
- **Visual generation approach?** — Text-to-image (FAL/Midjourney/Imagen) →
|
||||
image-to-video (Runway/Kling), 3D animation (Blender), 2D animation,
|
||||
procedural, or hybrid
|
||||
- **Voice approach?** — TTS (which voice), recorded VO, no dialogue
|
||||
- **Music / score?** — Commissioned (via `songwriting-and-ai-music` Suno
|
||||
prompts, or local `heartmula`), licensed track provided, silent
|
||||
|
||||
### Product / marketing
|
||||
|
||||
- **Product?** — Name, what it does, key feature being shown
|
||||
- **Target audience?** — Who's watching, what they care about
|
||||
- **CTA?** — Visit URL, install, sign up, etc.
|
||||
- **Tone?** — Serious, playful, technical, premium, edgy
|
||||
- **Brand assets available?** — Logo files, color palette, fonts, existing footage
|
||||
- **Animation style?** — Motion graphics (Remotion / AE-style), screen recording,
|
||||
generative, illustrated
|
||||
- **Voiceover?** — Yes (which voice / language) or text-only
|
||||
- **Music?** — Track provided, license-free needed, custom-composed
|
||||
|
||||
### Music video
|
||||
|
||||
- **Track file?** — Path to the audio (essential — we'll analyze BPM + beats)
|
||||
- **Track length to use?** — Full song or a section
|
||||
- **Genre / energy?** — Tells what visual rhythm and density to use
|
||||
- **Lyric / narrative content?** — Are there lyrics to render on screen,
|
||||
or is it purely visual?
|
||||
- **Visual reference style?** — Existing music videos / artists for reference
|
||||
- **Performer footage?** — None, has clips, will provide
|
||||
- **Visual generation approach?** — Per-beat generative, edit-driven cuts of stock
|
||||
footage, illustrated, hybrid
|
||||
|
||||
### Explainer / educational
|
||||
|
||||
- **What concept is being taught?** — One-sentence concept, key takeaway
|
||||
- **Audience expertise?** — Beginner / intermediate / expert
|
||||
- **Diagram density?** — Heavy math / formulas / code / abstract concepts
|
||||
- **Voiceover?** — TTS / recorded / on-screen text only
|
||||
- **Tool preference?** — `manim-video` (math), `p5js` (generative),
|
||||
Remotion (UI motion graphics), `comfyui` (AI-generated visuals),
|
||||
`ascii-video` (technical/retro), hybrid
|
||||
- **Pacing?** — Fast and dense (3Blue1Brown) or slow and contemplative
|
||||
|
||||
### Tutorial / changelog / software demo
|
||||
|
||||
- **Software being demonstrated?** — Name, what it does
|
||||
- **Demo script?** — Sequence of commands / screens to show
|
||||
- **Terminal-only or with GUI?**
|
||||
- **Voiceover for narration?**
|
||||
- **Diagram support needed?** — Often these benefit from a diagram skill
|
||||
alongside the screen-capture/render step (`excalidraw`,
|
||||
`architecture-diagram`, `concept-diagrams`)
|
||||
|
||||
### ASCII / terminal art
|
||||
|
||||
- **Source material?** — Generative / driven by audio / converting existing
|
||||
video / static image starting point
|
||||
- **Color palette?** — Brand-driven (gold/black/blue), Matrix green, full
|
||||
rainbow, monochrome
|
||||
- **Audio reactivity?** — None / loose mood / tight beat sync / FFT-driven
|
||||
- **Character set?** — ASCII only / Unicode block-drawing / mystic glyphs
|
||||
- **Loop or narrative?** — Perfect loop or one-shot
|
||||
|
||||
### Abstract / loop
|
||||
|
||||
- **Mood / emotion?** — One word that captures the feel
|
||||
- **Motion type?** — Zoom-into-itself, particle drift, wave, geometric, organic
|
||||
- **Loop required?** — Perfect loop (Droste-style) or just satisfying ending
|
||||
- **Audio?** — Silent, ambient pad, beat-synced
|
||||
|
||||
### Documentary / interview cut
|
||||
|
||||
- **Source footage?** — Provided clips, length per clip
|
||||
- **Transcript / subtitles?** — Provided or to be generated
|
||||
- **Story structure?** — Chronological / thematic / arc
|
||||
- **B-roll approach?** — Generated, stock library, none
|
||||
|
||||
### Real-time / installation
|
||||
|
||||
- **Output environment?** — Gallery wall, projector, screen, web embed
|
||||
- **Audio source?** — Live audio input, pre-recorded track, both
|
||||
- **Reactivity tightness?** — Mood-level (loose) vs. tight beat-sync vs. live
|
||||
parameter control
|
||||
- **Tool preference?** — `touchdesigner-mcp` for full TD operator graphs;
|
||||
`p5js` for web-canvas; `comfyui` for generative-AI fed by audio features
|
||||
|
||||
## Tier 2 — Always ask near the end
|
||||
|
||||
- **Brand assets path?** — Where logo / color palette / fonts / music library lives
|
||||
- **Output format requirements?** — Codec preference, target file size, accepted
|
||||
alternates (vertical cut, GIF, audio-only)
|
||||
- **Deadline?** — Affects task `max_runtime_seconds` and acceptable scope
|
||||
- **Quality bar?** — Rough draft for review / polished final / archival
|
||||
- **Existing footage / assets to reuse?** — Anything that should appear, not just inform
|
||||
|
||||
## Reasonable assumption defaults
|
||||
|
||||
When the user under-specifies, fill in these defaults rather than asking:
|
||||
|
||||
| Question | Default |
|
||||
|----------|---------|
|
||||
| Frame rate | 30 fps for X / IG; 60 fps for tutorials/explainers; 24 fps for narrative film |
|
||||
| Resolution | 1080×1080 for square, 1920×1080 for 16:9, 1080×1920 for 9:16 |
|
||||
| Codec | H.264 / yuv420p, CRF 18 |
|
||||
| Audio codec | AAC 192 kbps |
|
||||
| Voice | Provider's mid-range neutral voice unless brand calls for distinctive timbre |
|
||||
| Music | Silent (require user to specify if music is wanted) |
|
||||
| Captions | On for explainer/tutorial; off for narrative/abstract unless requested |
|
||||
| Quality bar | Polished final unless user says draft |
|
||||
|
||||
State the assumption explicitly: *"Assuming 30fps and AAC audio unless you say otherwise — proceed?"*
|
||||
|
||||
## Anti-patterns
|
||||
|
||||
- **Asking 10 questions at once.** Maximum 4 per turn.
|
||||
- **Asking for things the brief already implies.** If the user said "music video for my track," do not ask "is there a track?"
|
||||
- **Failing to classify before drilling in.** Tier-1 questions depend on classification; mixing them up wastes turns.
|
||||
- **Treating "make a video" as enough to proceed.** Always confirm the three baseline questions.
|
||||
|
|
@ -0,0 +1,276 @@
|
|||
# Kanban Setup — Project Bootstrap & Profile Configuration
|
||||
|
||||
Once the brief is locked and the team is designed, the next step is producing
|
||||
the actual `setup.sh` that creates the project workspace, configures Hermes
|
||||
profiles, and fires the initial kanban task.
|
||||
|
||||
This file documents the patterns. The companion script
|
||||
`scripts/bootstrap_pipeline.py` automates most of it from a structured input
|
||||
JSON.
|
||||
|
||||
> **Credit:** the single-project-workspace layout, profile-config patching
|
||||
> approach, SOUL.md-per-profile convention, and `--workspace dir:<path>` rule
|
||||
> are adapted from alt-glitch's original multi-agent video pipeline:
|
||||
> [NousResearch/kanban-video-pipeline](https://github.com/NousResearch/kanban-video-pipeline).
|
||||
> This skill generalizes those patterns across video styles and replaces the
|
||||
> string-replacement config patcher with a PyYAML-based one.
|
||||
|
||||
## Project workspace structure
|
||||
|
||||
Every video project gets one workspace under `~/projects/video-pipeline/<slug>/`:
|
||||
|
||||
```
|
||||
~/projects/video-pipeline/<slug>/
|
||||
├── brief.md ← the contract; all tasks reference
|
||||
├── TEAM.md ← team composition + task graph (director reads this)
|
||||
├── taste/
|
||||
│ ├── brand-guide.md ← color, typography, motion rules
|
||||
│ ├── emotional-dna.md ← what the piece should FEEL like
|
||||
│ └── style-frames/ ← optional: visual references
|
||||
├── audio/
|
||||
│ ├── track.mp3 ← provided music (if any)
|
||||
│ ├── voiceover/ ← per-line TTS clips
|
||||
│ └── sfx/ ← sound effects
|
||||
├── assets/
|
||||
│ ├── logos/
|
||||
│ ├── fonts/
|
||||
│ └── existing-footage/ ← reusable provided clips
|
||||
├── scenes/
|
||||
│ ├── scene-01/
|
||||
│ │ ├── VISUAL_SPEC.md ← cinematographer's per-scene spec
|
||||
│ │ ├── render.py ← renderer's code (or sketch.html, etc.)
|
||||
│ │ ├── checkpoints/ ← preview frames for QA
|
||||
│ │ └── clip.mp4 ← the deliverable for this scene
|
||||
│ ├── scene-02/...
|
||||
│ └── ...
|
||||
├── checkpoints/ ← global review frames
|
||||
├── tools/ ← optional project-local helpers
|
||||
└── output/
|
||||
├── final.mp4 ← stitched + audio
|
||||
├── final-noaudio.mp4
|
||||
├── final-9x16.mp4 ← optional: vertical alternate
|
||||
└── captions.srt ← optional: subtitle file
|
||||
```
|
||||
|
||||
**The slug** is derived from the brief title: lowercase, hyphen-separated.
|
||||
Example: `q3-product-teaser`, `ascii-mood-loop`, `interview-cut-2026-q1`.
|
||||
|
||||
## The setup.sh script
|
||||
|
||||
The setup script does six things in order:
|
||||
|
||||
1. **Create workspace tree** — all directories above
|
||||
2. **Create profiles** — `hermes profile create <name> --clone`
|
||||
3. **Configure profiles** — patch each profile's
|
||||
`~/.hermes/profiles/<name>/config.yaml` to set toolsets, always_load skills,
|
||||
and `cwd`
|
||||
4. **Write SOUL.md per profile** — the personality + role definition
|
||||
5. **Copy any provided assets + write `brief.md`, `TEAM.md`, and `taste/`**
|
||||
6. **Fire the initial kanban task** — `hermes kanban create` assigned to the director
|
||||
|
||||
See `assets/setup.sh.tmpl` for the skeleton.
|
||||
|
||||
### Profile creation pattern
|
||||
|
||||
```bash
|
||||
hermes profile create director --clone 2>/dev/null || true
|
||||
```
|
||||
|
||||
The `--clone` flag clones from the active profile (preserving model, base
|
||||
config). The `|| true` makes the script idempotent — re-running won't error if
|
||||
the profile already exists.
|
||||
|
||||
### Profile config patching
|
||||
|
||||
Each profile has a YAML config at `~/.hermes/profiles/<name>/config.yaml`. The
|
||||
setup script edits exactly two keys:
|
||||
|
||||
1. `toolsets:` — replace the default with the role's required toolsets
|
||||
2. `skills.always_load:` — list the role's must-load skills (may be empty)
|
||||
|
||||
**Do NOT** modify `approvals.mode` (controls user-confirmation of tool calls
|
||||
— a security setting that must stay as the user configured it). **Do NOT**
|
||||
modify `terminal.cwd` — the kanban dispatcher overrides cwd per-task via
|
||||
`--workspace dir:<path>`, so the profile's cwd is irrelevant to the kanban
|
||||
work and changing it could break the user's interactive use of the profile.
|
||||
|
||||
Use **PyYAML**, not string replacement, so the patch is robust against
|
||||
default-config schema drift:
|
||||
|
||||
```bash
|
||||
configure_profile() {
|
||||
local profile="$1"
|
||||
local toolsets_json="$2" # JSON array, e.g. '["kanban","terminal","file"]'
|
||||
local skills_json="$3" # JSON array, e.g. '["kanban-worker","ascii-video"]'
|
||||
python3 - "$profile" "$toolsets_json" "$skills_json" <<'PY'
|
||||
import json, os, sys, yaml
|
||||
profile, ts_json, sk_json = sys.argv[1:4]
|
||||
p = os.path.expanduser(f"~/.hermes/profiles/{profile}/config.yaml")
|
||||
with open(p) as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
cfg["toolsets"] = json.loads(ts_json)
|
||||
cfg.setdefault("skills", {})["always_load"] = json.loads(sk_json)
|
||||
with open(p, "w") as f:
|
||||
yaml.safe_dump(cfg, f, sort_keys=False)
|
||||
PY
|
||||
}
|
||||
```
|
||||
|
||||
PyYAML must be installed in the user's Python (it ships with most Hermes
|
||||
installs). If absent: `pip install pyyaml`.
|
||||
|
||||
The setup script should also **validate** the patch by re-reading the file
|
||||
and comparing — see `assets/setup.sh.tmpl` for the validation pattern.
|
||||
|
||||
### SOUL.md per profile
|
||||
|
||||
Each profile gets a `SOUL.md` at `~/.hermes/profiles/<name>/SOUL.md` that
|
||||
defines its role, voice, and rules. See `assets/soul.md.tmpl` for the
|
||||
template. Customize per role and per project.
|
||||
|
||||
The director's SOUL.md should be the most opinionated — its voice flavors
|
||||
the entire production. **Critical content for the director's SOUL.md:**
|
||||
|
||||
- **Anti-temptation rules:** "Do not execute the work yourself. For every
|
||||
concrete task, create a kanban task and assign it. Decompose, route, comment,
|
||||
approve — that's the whole job." (The `kanban-orchestrator` skill provides
|
||||
the deeper playbook; load it.)
|
||||
- **Decomposition steps:** Read `brief.md`, `TEAM.md`, `taste/`. Use the team
|
||||
graph in `TEAM.md` to fan out tasks.
|
||||
- **The workspace_path rule** (see below).
|
||||
|
||||
Other profiles' SOUL.md is briefer; mostly mechanical: who you are, what you
|
||||
read, what you produce, what skills/tools to use, where to write outputs.
|
||||
Most non-director profiles should `always_load: kanban-worker` for the
|
||||
deeper-than-baseline kanban guidance.
|
||||
|
||||
### Initial kanban task
|
||||
|
||||
The final action of setup.sh is firing the kanban:
|
||||
|
||||
```bash
|
||||
hermes kanban create "Direct production of <video title>" \
|
||||
--assignee director \
|
||||
--workspace dir:"$HOME/projects/video-pipeline/${PROJECT_SLUG}" \
|
||||
--tenant ${PROJECT_SLUG} \
|
||||
--priority 2 \
|
||||
--max-runtime 4h \
|
||||
--body "$(cat <<EOF
|
||||
Read brief.md, TEAM.md, and taste/.
|
||||
Decompose into the team graph defined in TEAM.md.
|
||||
All child tasks MUST use:
|
||||
workspace_kind="dir"
|
||||
workspace_path="$HOME/projects/video-pipeline/${PROJECT_SLUG}"
|
||||
tenant="${PROJECT_SLUG}"
|
||||
EOF
|
||||
)"
|
||||
```
|
||||
|
||||
The `--workspace dir:<path>` flag is **critical** — it tells the kanban that
|
||||
all child tasks share this workspace. Skipping or using `worktree` will
|
||||
isolate profiles and break artifact sharing.
|
||||
|
||||
## The TEAM.md file
|
||||
|
||||
Alongside `brief.md`, write a `TEAM.md` that the director reads. It documents
|
||||
the team composition + task graph the orchestrator should follow. This
|
||||
removes ambiguity and prevents the director from inventing extra steps.
|
||||
|
||||
Example structure (for an ASCII video with a music supervisor and editor):
|
||||
|
||||
```markdown
|
||||
# Team & Task Graph — <video title>
|
||||
|
||||
## Team
|
||||
|
||||
- `director` (this profile) — vision, decomposition, approval
|
||||
- `cinematographer` — visual spec, quality review (loads `ascii-video`)
|
||||
- `renderer-ascii` — ASCII scenes (loads `ascii-video`)
|
||||
- `music-supervisor` — track analysis (loads `songsee`)
|
||||
- `voice-talent` — narration (uses ElevenLabs API)
|
||||
- `audio-mixer` — final mix (ffmpeg)
|
||||
- `editor` — assembly (ffmpeg)
|
||||
- `reviewer` — final QA gate
|
||||
|
||||
## Task Graph
|
||||
|
||||
T0: this task — decompose
|
||||
│
|
||||
├── T1: cinematographer "Design visual language" (parent: T0)
|
||||
│ │
|
||||
│ ├── T2a: renderer-ascii "Scene 1 — title card" (parent: T1)
|
||||
│ ├── T2b: renderer-ascii "Scene 2 — main beat" (parent: T1)
|
||||
│ ├── T2c: renderer-ascii "Scene 3 — outro" (parent: T1)
|
||||
│
|
||||
├── T3: music-supervisor "Analyze track + emit beats.json" (parent: T0)
|
||||
│
|
||||
├── T4: voice-talent "Generate narration" (parent: T0)
|
||||
│
|
||||
├── T5: audio-mixer "Mix VO + bg music" (parents: T3, T4)
|
||||
│
|
||||
├── T6: editor "Assemble cut + mux audio" (parents: T2*, T5)
|
||||
│
|
||||
└── T7: reviewer "Final QA" (parent: T6)
|
||||
```
|
||||
|
||||
The director turns this into actual `kanban_create` calls.
|
||||
|
||||
## API-key prerequisites check
|
||||
|
||||
Before firing the kanban, verify required keys are available. Check both
|
||||
`~/.hermes/.env` and macOS Keychain (if on macOS):
|
||||
|
||||
```bash
|
||||
check_key() {
|
||||
local var="$1"
|
||||
local kc_account="$2"
|
||||
local kc_service="$3"
|
||||
if grep -q "^${var}=" ~/.hermes/.env 2>/dev/null && \
|
||||
[ -n "$(grep "^${var}=" ~/.hermes/.env | cut -d= -f2-)" ]; then
|
||||
return 0
|
||||
fi
|
||||
if command -v security >/dev/null 2>&1 && \
|
||||
security find-generic-password -a "${kc_account}" -s "${kc_service}" -w >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
echo "ERROR: ${var} not set in ~/.hermes/.env or Keychain (${kc_account}/${kc_service})"
|
||||
return 1
|
||||
}
|
||||
|
||||
check_key ELEVENLABS_API_KEY hermes ELEVENLABS_API_KEY || exit 1
|
||||
check_key OPENROUTER_API_KEY hermes OPENROUTER_API_KEY || exit 1
|
||||
# ...
|
||||
```
|
||||
|
||||
If a key is missing, the script aborts with a clear message rather than
|
||||
firing a kanban that will hit credential errors mid-execution.
|
||||
|
||||
## Critical rules
|
||||
|
||||
1. **`workspace_kind="dir"` + `workspace_path="<absolute>"` on every kanban_create.** Otherwise profiles can't share artifacts.
|
||||
|
||||
2. **Tenant every task.** `--tenant <project-slug>` keeps the dashboard scoped
|
||||
and prevents cross-pollination with other ongoing kanbans.
|
||||
|
||||
3. **Idempotency keys.** For tasks that should not duplicate on re-run (e.g.,
|
||||
setup creating profiles), use the `idempotency_key` argument or check
|
||||
existence first.
|
||||
|
||||
4. **`max_runtime_seconds` per task.** Renderers that get stuck eat compute.
|
||||
Standard defaults:
|
||||
- Renderer task: 1800s (30min)
|
||||
- Editor task: 600s (10min)
|
||||
- Voice-talent task: 300s (5min)
|
||||
- Image-generator task: 600s (10min)
|
||||
- Image-to-video-generator task: 900s (15min)
|
||||
|
||||
5. **Heartbeats for long renders.** Tasks expected to run >5min should emit
|
||||
`kanban_heartbeat` periodically with progress. Renderers should report
|
||||
frame counts; the editor should report assembly progress.
|
||||
|
||||
6. **The `audio/` and `taste/` dirs are populated BEFORE firing the kanban.**
|
||||
Don't ask the director's pipeline to source these — copy at setup time.
|
||||
|
||||
7. **`brief.md` is read-only after setup.** If the brief changes during
|
||||
execution, that's a significant pivot — re-fire the kanban rather than edit
|
||||
live.
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
# Monitoring — Watch the Pipeline + Intervene
|
||||
|
||||
After `setup.sh` fires the kanban, the work runs autonomously. The role of
|
||||
this skill in the execution phase is to help the user (and the AI overseeing
|
||||
the session) detect problems early and intervene effectively.
|
||||
|
||||
## Live monitoring commands
|
||||
|
||||
```bash
|
||||
# Live event stream — task spawns, status changes, heartbeats, completions
|
||||
hermes kanban watch --tenant <project-slug>
|
||||
|
||||
# Snapshot of the board
|
||||
hermes kanban list --tenant <project-slug>
|
||||
hermes kanban list --tenant <project-slug> --json # machine-readable
|
||||
|
||||
# Per-status counts + oldest-ready age
|
||||
hermes kanban stats --tenant <project-slug>
|
||||
|
||||
# Visual dashboard (browser)
|
||||
hermes dashboard
|
||||
|
||||
# Inspect a specific task (includes comments + events)
|
||||
hermes kanban show <task-id>
|
||||
|
||||
# Follow a single task's event stream
|
||||
hermes kanban tail <task-id>
|
||||
```
|
||||
|
||||
Verify available subcommands with `hermes kanban --help` — the kanban CLI
|
||||
ships with `init / create / list / show / assign / link / unlink / claim /
|
||||
comment / complete / block / unblock / archive / tail / dispatch / watch /
|
||||
stats / heartbeat / log / runs / context / gc`.
|
||||
|
||||
The companion `scripts/monitor.py` polls the kanban via the CLI and surfaces
|
||||
common issues (stuck tasks, missing heartbeats, repeated retries, dependency
|
||||
deadlocks).
|
||||
|
||||
## What to watch for
|
||||
|
||||
### Healthy pipeline indicators
|
||||
|
||||
- Tasks transition `READY → RUNNING → DONE` in roughly the expected order
|
||||
- Renderers emit periodic `kanban_heartbeat` events with progress (e.g. "frame
|
||||
240/720")
|
||||
- Each task's runtime is well under its `max_runtime_seconds` cap
|
||||
- No task accumulates more than 1 retry
|
||||
- Dependency arrows resolve (children unblock as parents complete)
|
||||
|
||||
### Warning signs
|
||||
|
||||
| Symptom | Likely cause | Action |
|
||||
|---------|--------------|--------|
|
||||
| Task RUNNING but no heartbeat in 2+ min | Worker stuck, infinite loop, blocked on input | `hermes kanban show <id>` — read the worker's last events. The dispatcher SIGTERMs tasks that exceed their `max-runtime`; if you need to stop one earlier, `hermes kanban block <id>` then `hermes kanban archive <id>`, and create a re-run task. |
|
||||
| Same task retried 2+ times | Reproducible failure (missing key, bad spec, broken tool) | `hermes kanban show <id>` to read failure events. Fix root cause before re-running. |
|
||||
| RUNNING longer than max_runtime | Task is slow but progressing OR genuinely stuck | Check heartbeats with `hermes kanban tail <id>`. If progressing, the dispatcher will SIGTERM eventually anyway — raise `max-runtime` on a re-created task. |
|
||||
| Child task READY but parents still RUNNING for >2× expected | Cascade slow, dependency miswired | Check the dependency graph. Inspect the parent: sometimes it completed but its handoff fields (summary, metadata) were empty so the child has nothing to consume. |
|
||||
| New tasks not appearing | Director is hung in decomposition | Inspect director task with `kanban show`. Often a malformed `kanban_create` call. |
|
||||
| Specialist tasks completing instantly | Decomposition created tasks without bodies | Director didn't pass enough context. Re-create with explicit body content. |
|
||||
| Tasks created but never picked up | Profile not running, or tenant mismatch, or dispatcher not running | Check `hermes profile list` (profile exists?), `hermes status` (gateway/dispatcher up?), and verify tenant. |
|
||||
| Specific renderer task fails → review note → renderer redoes → fails again | Brief is asking for the impossible | Pivot the brief, not the renderer. |
|
||||
|
||||
## Intervention recipes
|
||||
|
||||
### Rejecting bad output
|
||||
|
||||
When a renderer ships a clip that doesn't pass review:
|
||||
|
||||
```bash
|
||||
# 1. Comment on the renderer's task with specific feedback
|
||||
hermes kanban comment <renderer-task-id> "Scene 3 looks too sparse \
|
||||
— increase visual density. Tighten color palette to brand spec."
|
||||
|
||||
# 2. Create a re-render task with the original as parent
|
||||
hermes kanban create "Scene 3 — re-render with feedback" \
|
||||
--assignee renderer-ascii \
|
||||
--parent <renderer-task-id> \
|
||||
--workspace dir:"$HOME/projects/video-pipeline/<slug>" \
|
||||
--tenant <slug> \
|
||||
--skill ascii-video \
|
||||
--max-runtime 30m
|
||||
```
|
||||
|
||||
### Adding a new dependency mid-flight
|
||||
|
||||
When the editor needs an asset that wasn't originally planned (e.g., a captions
|
||||
file):
|
||||
|
||||
```bash
|
||||
# 1. Create the new task and capture its id
|
||||
NEW_TASK_ID=$(hermes kanban create "Generate SRT captions from voiceover" \
|
||||
--assignee captioner \
|
||||
--workspace dir:"$HOME/projects/video-pipeline/<slug>" \
|
||||
--tenant <slug> \
|
||||
--json | python3 -c "import json,sys;print(json.load(sys.stdin)['id'])")
|
||||
|
||||
# 2. Wire it as a parent of the editor's task with `kanban link`
|
||||
hermes kanban link "$NEW_TASK_ID" <editor-task-id>
|
||||
```
|
||||
|
||||
`kanban link` takes `parent_id child_id` (parent first). Use `kanban unlink`
|
||||
to remove a dependency.
|
||||
|
||||
### Stopping a worker that's stuck
|
||||
|
||||
The kanban dispatcher will SIGTERM (then SIGKILL) any task that exceeds its
|
||||
`--max-runtime` automatically. To stop one sooner:
|
||||
|
||||
```bash
|
||||
# Mark blocked so the dispatcher leaves it alone, then archive
|
||||
hermes kanban block <task-id>
|
||||
hermes kanban archive <task-id>
|
||||
|
||||
# Diagnose what happened
|
||||
hermes kanban show <task-id> # task body, comments, recent events
|
||||
hermes kanban tail <task-id> # follow the live event stream
|
||||
hermes kanban log <task-id> # worker process log
|
||||
```
|
||||
|
||||
After stopping, decide: fix root cause + re-create the task, or skip and
|
||||
adjust dependent tasks.
|
||||
|
||||
### Pivoting the brief
|
||||
|
||||
If during execution the user wants something fundamentally different:
|
||||
|
||||
1. Cancel the active director task and all RUNNING children
|
||||
2. Edit `brief.md` and `TEAM.md`
|
||||
3. Re-fire the initial `hermes kanban create` for the director
|
||||
|
||||
Don't try to "edit while running" — the kanban's audit trail makes a clean
|
||||
pivot more legible than mid-stream changes.
|
||||
|
||||
## Periodic check-in script
|
||||
|
||||
A simple polling pattern for hands-off monitoring:
|
||||
|
||||
```bash
|
||||
while true; do
|
||||
clear
|
||||
hermes kanban list --tenant <slug>
|
||||
echo "---"
|
||||
hermes kanban stats --tenant <slug>
|
||||
sleep 30
|
||||
done
|
||||
```
|
||||
|
||||
For a live event feed, run `hermes kanban watch --tenant <slug>` in a
|
||||
separate terminal — it streams task lifecycle events as they happen.
|
||||
|
||||
For automated intervention (auto-restart stuck tasks, auto-create re-render on
|
||||
review failure), see the `scripts/monitor.py` patterns.
|
||||
|
||||
## When to call it done
|
||||
|
||||
The pipeline is finished when:
|
||||
|
||||
1. All RENDER tasks complete and pass review
|
||||
2. The editor's `output/final.mp4` exists and `ffprobe` confirms expected
|
||||
duration + streams
|
||||
3. The reviewer (if present) has approved
|
||||
4. Optional masterer variants exist
|
||||
|
||||
At this point, present the final.mp4 path to the user along with any review
|
||||
notes. Do NOT delete the workspace — the user may want to iterate on a single
|
||||
scene without re-running the whole pipeline.
|
||||
|
||||
## Common gotchas
|
||||
|
||||
- **Tenant mismatches.** A task created with the wrong tenant won't appear in
|
||||
monitoring. Always pass `--tenant <slug>` consistently.
|
||||
- **Profile process not running.** Tasks queue indefinitely in READY if no
|
||||
worker for that profile is online. Check `hermes profile list` and start
|
||||
any missing profiles.
|
||||
- **Workspace permissions.** All profiles need read+write to the workspace
|
||||
directory. `chmod -R u+rw <workspace>` if any worker reports permission
|
||||
errors.
|
||||
- **Audio/visual sync.** The editor's clip stitching must match the
|
||||
renderer's actual output durations. Don't hardcode scene durations in
|
||||
the editor — read from the renderer's handoff metadata.
|
||||
|
|
@ -0,0 +1,298 @@
|
|||
# Role Archetypes
|
||||
|
||||
The library of role archetypes for video production. **Compose a team from this
|
||||
list, don't clone a fixed roster.** Most videos need 4-7 profiles. The director
|
||||
is always present; everything else is conditional on the brief.
|
||||
|
||||
Each role's profile name is by convention `kebab-case` (e.g. `creative-director`,
|
||||
`image-generator`). Multiple instances of the same role get descriptive suffixes
|
||||
when they need different focus (e.g., `renderer-ascii`, `renderer-3d`).
|
||||
|
||||
For toolset + skill mapping per role, see [tool-matrix.md](tool-matrix.md).
|
||||
|
||||
## Always present
|
||||
|
||||
### director
|
||||
|
||||
The vision-holder. Reads the brief and brand guide, decomposes into a task
|
||||
graph, comments to steer creative direction, approves the final cut.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-orchestrator`. The kanban plugin auto-injects baseline
|
||||
orchestration guidance for free; `kanban-orchestrator` is the deeper
|
||||
decomposition playbook. Add `creative-ideation` if the brief is wide-open
|
||||
and needs framing help.
|
||||
- **Personality:** Tied to the brand voice — see `assets/soul.md.tmpl`
|
||||
|
||||
The director has the same toolset as everyone else, but its `SOUL.md` rules
|
||||
**forbid** execution. The "decompose, don't execute" discipline is enforced
|
||||
by personality + the kanban-orchestrator skill, not by missing tools.
|
||||
|
||||
## Pre-production roles
|
||||
|
||||
Pick based on what the brief needs.
|
||||
|
||||
### writer / screenwriter
|
||||
|
||||
Writes scripts, dialogue, voiceover copy, narration. Use for any video with
|
||||
spoken or written words beyond a tagline.
|
||||
|
||||
- **Toolsets:** kanban, file
|
||||
- **Skills:** `kanban-worker`, `humanizer` (post-process to strip AI-tells)
|
||||
- **Outputs:** `script.md`, `narration.md`, `dialogue/scene-NN.md`
|
||||
|
||||
### copywriter
|
||||
|
||||
Like `writer` but specifically for marketing copy: taglines, CTAs, voiceover
|
||||
scripts for product videos.
|
||||
|
||||
- **Toolsets:** kanban, file
|
||||
- **Skills:** `kanban-worker`, `humanizer`
|
||||
- **Outputs:** `copy.md`
|
||||
|
||||
### concept-artist / visual-designer
|
||||
|
||||
Develops the visual identity: mood board, style frames, color palette
|
||||
rationale, typography choices. Produces a `visual-spec.md` that all generators
|
||||
follow. Often produces still reference frames using image-generation APIs or
|
||||
local skills.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker` plus any project-specific design skill —
|
||||
`claude-design` (UI/web), `sketch` (quick mockup variants),
|
||||
`popular-web-designs` (matching known web aesthetic), `pixel-art` (retro),
|
||||
`ascii-art` (terminal/retro), `excalidraw` (hand-drawn frames),
|
||||
`design-md` (text-based design docs)
|
||||
- **Outputs:** `visual-spec.md`, `taste/style-frames/*.png`
|
||||
|
||||
### storyboarder
|
||||
|
||||
Maps the brief to a beat-by-beat shot list with timing. Critical for narrative
|
||||
film and music video. Often pairs with a diagramming tool.
|
||||
|
||||
- **Toolsets:** kanban, file
|
||||
- **Skills:** `kanban-worker` plus a diagram skill — `excalidraw` (sketch),
|
||||
`architecture-diagram` (technical/system), `concept-diagrams` (educational/
|
||||
scientific)
|
||||
- **Outputs:** `storyboard.md` with one row per scene/shot, optional
|
||||
storyboard sketches
|
||||
|
||||
### cinematographer / dp
|
||||
|
||||
Designs the visual language: framing, color, motion, transitions. Reviews
|
||||
generator output for visual consistency. Hands off per-scene `VISUAL_SPEC.md`.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker` plus the visual skill that matches the project
|
||||
(e.g., `ascii-video` for ASCII work, `manim-video` for explainers,
|
||||
`touchdesigner-mcp` for real-time visuals, etc.)
|
||||
- **Outputs:** `scenes/scene-NN/VISUAL_SPEC.md`, review comments on renderer
|
||||
tasks
|
||||
- **Reviews via:** any media-analysis approach (Gemini multimodal, manual
|
||||
inspection of clip thumbnails, ffprobe summaries)
|
||||
|
||||
## Production roles
|
||||
|
||||
### renderer (generic)
|
||||
|
||||
A worker that produces visual content for one or more scenes. Loaded with
|
||||
whichever creative skill fits the scene's style. Multiple renderers can run in
|
||||
parallel, each pinned to a different skill via `always_load` in their profile
|
||||
or `--skill` on the task.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** one creative skill (see specialized variants below)
|
||||
- **Outputs:** `scenes/scene-NN/clip.mp4`
|
||||
|
||||
### Specialized renderer variants
|
||||
|
||||
When scenes need very different tools, create specialized renderer profiles
|
||||
instead of overloading one. Each loads a different creative skill.
|
||||
|
||||
| Variant | Skill | Best for |
|
||||
|---------|-------|----------|
|
||||
| `renderer-ascii` | `ascii-video` | Terminal aesthetic, retro pixel, audio-reactive grid, video-to-ASCII conversion |
|
||||
| `renderer-manim` | `manim-video` | Math, algorithms, 3Blue1Brown-style explainers, equation derivations |
|
||||
| `renderer-p5js` | `p5js` | Generative art, particles, shaders, organic motion, web-canvas content |
|
||||
| `renderer-comfyui` | `comfyui` | AI-generated stills + video using local ComfyUI workflows (img-to-img, img-to-video, etc.) |
|
||||
| `renderer-touchdesigner` | `touchdesigner-mcp` | Real-time, audio-reactive, installation art, VJ-style content |
|
||||
| `renderer-3d` | `blender-mcp` *(optional)* | 3D modeling, animation, photoreal environments, character animation |
|
||||
| `renderer-pixel` | `pixel-art` | Retro game aesthetic with era-correct palettes |
|
||||
| `renderer-comic` | `baoyu-comic` | Knowledge-comic style narrative scenes |
|
||||
| `renderer-meme` | `meme-generation` *(optional)* | Meme-style stills for satirical/social content |
|
||||
| `renderer-procedural` | (none — Python with PIL + ffmpeg directly) | Custom procedural content where no skill fits |
|
||||
| `renderer-video` | (external image-to-video API: Runway / Kling / Luma) | Animating still images in narrative film |
|
||||
| `renderer-motion-graphics` | (external — Remotion CLI) | Motion graphics, kinetic typography, UI animations |
|
||||
|
||||
For external-API renderers, the profile holds the API client logic; only
|
||||
`kanban-worker` is loaded, plus the terminal toolset and the API key.
|
||||
|
||||
### image-generator
|
||||
|
||||
Specifically for text-to-image generation. Often produces stills that go to
|
||||
`renderer-video` for animation.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`, optionally `comfyui` (drives a local
|
||||
ComfyUI install for image generation)
|
||||
- **External APIs (alternative to local ComfyUI):** FAL, Replicate, OpenAI
|
||||
Images, Midjourney
|
||||
- **Outputs:** `scenes/scene-NN/stills/*.png`
|
||||
|
||||
### image-to-video-generator
|
||||
|
||||
Takes still images and animates them via Runway/Kling/Luma APIs, or via
|
||||
ComfyUI's image-to-video workflows locally. Almost always follows
|
||||
`image-generator` in narrative film pipelines.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`, optionally `comfyui` (for local image-to-video
|
||||
workflows like AnimateDiff or WAN)
|
||||
- **External APIs:** Runway, Kling, Luma, Pika
|
||||
- **Outputs:** `scenes/scene-NN/clip.mp4`
|
||||
|
||||
### music-supervisor
|
||||
|
||||
Sources, analyzes, and prepares the music track. For music videos, also
|
||||
produces a beat/BPM map and key-moment timestamps. Uses `songsee` for
|
||||
spectrograms when the editor or renderer needs a visual reference of the
|
||||
audio's energy.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`, `songsee` (audio visualization), plus one of:
|
||||
- `songwriting-and-ai-music` — when commissioning lyrics + Suno prompts
|
||||
- `heartmula` — when generating music with the open-source local model
|
||||
- `spotify` — when sourcing existing tracks
|
||||
- **Outputs:** `audio/track.mp3`, `audio/beats.json`, optional
|
||||
`audio/track-spectrogram.png`
|
||||
|
||||
### voice-talent / narrator
|
||||
|
||||
Generates voiceover audio. Calls a TTS API directly; no Hermes skill required
|
||||
beyond `kanban-worker`. The user can also supply pre-recorded VO instead of
|
||||
generation.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **External APIs:** ElevenLabs, OpenAI TTS, etc.
|
||||
- **Outputs:** `audio/voiceover/line-NN.mp3`, `audio/voiceover/timeline.mp3`
|
||||
|
||||
### foley / sfx-designer
|
||||
|
||||
Sound effects and ambient design. Often optional unless the brief calls for
|
||||
sound design specifically.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`, `songsee` for audio-feature visualization when
|
||||
designing to a track
|
||||
- **Outputs:** `audio/sfx/*.mp3`
|
||||
|
||||
## Post-production roles
|
||||
|
||||
### editor
|
||||
|
||||
Assembles the final cut from clips. Uses ffmpeg for stitching, fades,
|
||||
transitions. Reviews each clip for pacing and quality before assembly.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **External tools:** ffmpeg, ffprobe
|
||||
- **Outputs:** `output/final.mp4`, `output/final-noaudio.mp4`
|
||||
|
||||
### colorist
|
||||
|
||||
Color grading. Usually optional — if the renderers already produce
|
||||
brand-consistent output and the editor just stitches, the colorist is overkill.
|
||||
Worth including for narrative film with hero shots.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **Outputs:** `output/final-graded.mp4`
|
||||
|
||||
### audio-mixer
|
||||
|
||||
Mixes voiceover + music + SFX into a final audio track. Sets levels, ducks
|
||||
music under VO, normalizes loudness (LUFS).
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **External tools:** ffmpeg with `loudnorm` filter, optional `sox`
|
||||
- **Outputs:** `audio/final-mix.mp3`
|
||||
|
||||
### captioner
|
||||
|
||||
Burns subtitles into the video, generates SRT, handles accessibility. Can also
|
||||
generate captions from audio via Whisper.
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **External tools:** Whisper (CLI or API), ffmpeg subtitle filters
|
||||
- **Outputs:** `output/captions.srt`, `output/final-captioned.mp4`
|
||||
|
||||
### masterer
|
||||
|
||||
Final encode + format variants. Produces deliverables for each platform target
|
||||
(square for IG, vertical for TikTok, full HD for YouTube, etc.).
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **Outputs:** `output/final-1080.mp4`, `output/final-9x16.mp4`, etc.
|
||||
|
||||
## QA roles
|
||||
|
||||
### reviewer
|
||||
|
||||
A neutral quality gate. Reads the brief, watches the cut, comments
|
||||
specifically on what's off (pacing, sync, brand alignment, technical
|
||||
quality). Distinct from the cinematographer (who reviews visuals during
|
||||
production) and the editor (who reviews for assembly).
|
||||
|
||||
- **Toolsets:** kanban, terminal, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **External tools:** any media-analysis approach (Gemini multimodal,
|
||||
ffprobe, manual frame extraction)
|
||||
- **Outputs:** `review-notes.md`, comments on tasks
|
||||
|
||||
### brand-cop
|
||||
|
||||
Reviews specifically for brand compliance — colors, typography, voice. Use
|
||||
when the brand guidelines are detailed and a generic reviewer might miss
|
||||
violations.
|
||||
|
||||
- **Toolsets:** kanban, file
|
||||
- **Skills:** `kanban-worker`
|
||||
- **Outputs:** comments + `brand-review.md`
|
||||
|
||||
## Composing teams — heuristics
|
||||
|
||||
- **Always:** director + at least one renderer + editor.
|
||||
- **Add writer** if scripted dialogue / narration / on-screen text exceeds a
|
||||
tagline.
|
||||
- **Add storyboarder** if the brief has more than 5 distinct beats and the
|
||||
director hasn't already laid out a beat list.
|
||||
- **Add cinematographer** if multiple renderer instances need consistent
|
||||
visual language. (For a single-tool video, the renderer's own skill spec
|
||||
is enough.)
|
||||
- **Add image-generator + image-to-video-generator pair** for narrative film
|
||||
with photorealistic visuals.
|
||||
- **Add music-supervisor** when music is provided and rhythm matters
|
||||
(music videos always; explainers sometimes).
|
||||
- **Add voice-talent** for any voiceover / narrative dialogue.
|
||||
- **Add audio-mixer** when there are 2+ audio sources (VO + music, music + SFX).
|
||||
- **Add captioner** for accessibility-priority projects (explainer, tutorial,
|
||||
any platform that defaults to muted playback).
|
||||
- **Add reviewer** for high-stakes projects. Skip for quick experimental loops.
|
||||
- **Add masterer** when multiple platform deliverables are needed.
|
||||
|
||||
## Anti-patterns
|
||||
|
||||
- **One renderer doing everything.** If scenes use very different tools
|
||||
(ASCII + 3D + motion graphics), use specialized renderer variants. The
|
||||
renderer loads ONE creative skill at a time; mixing styles in a single
|
||||
renderer causes thrashing.
|
||||
- **A separate profile per scene.** No. Profiles are per-role, not per-scene.
|
||||
Eight scenes use one or two renderer profiles, not eight.
|
||||
- **A "general" profile that does everything.** Worse than no specialization.
|
||||
The kanban routing breaks down if every task fits every profile.
|
||||
- **No reviewer for important deliverables.** Saves an hour of pipeline time
|
||||
but ships flaws.
|
||||
|
|
@ -0,0 +1,305 @@
|
|||
# Tool Matrix — Skills + Toolsets per Role
|
||||
|
||||
Maps each role archetype to the Hermes skills it should `always_load` and the
|
||||
toolsets it needs. Only references skills that ship in the public hermes-agent
|
||||
repository (under `skills/` or `optional-skills/`). External APIs and CLIs are
|
||||
called from the terminal toolset; they don't appear in `always_load`.
|
||||
|
||||
## Hermes skills relevant to video production
|
||||
|
||||
### Visual / rendering skills (`hermes-agent/skills/creative/`)
|
||||
|
||||
| Skill | What it does | Best fit for |
|
||||
|-------|--------------|--------------|
|
||||
| `ascii-video` | Production pipeline for ASCII art video — generative, audio-reactive, video-to-ASCII | Renderer for ASCII / terminal / retro pixel content; cinematographer for ASCII projects |
|
||||
| `ascii-art` | Static ASCII art generation | Concept artist for ASCII style frames; secondary tool for ASCII renderer |
|
||||
| `manim-video` | Manim CE animations — math, algorithms, 3Blue1Brown-style explainers | Renderer for math, algorithm walkthroughs, technical concept explainers |
|
||||
| `p5js` | p5.js sketches — generative art, shaders, interactive, 3D | Renderer for generative art, particle systems, organic motion, web-canvas content |
|
||||
| `comfyui` | Generate images, video, audio with ComfyUI workflows (image-to-image, image-to-video, etc.) | image-generator, image-to-video-generator, or general renderer for AI-generated content |
|
||||
| `touchdesigner-mcp` | Control a running TouchDesigner instance — real-time visuals, audio-reactive installation art, VJ | Renderer for real-time/audio-reactive content; installation art; live performance |
|
||||
| `blender-mcp` *(optional)* | Control Blender 4.3+ via MCP — 3D modeling, animation, rendering | Renderer for 3D scenes, photoreal environments, character animation |
|
||||
| `pixel-art` | Pixel art with era palettes (NES, Game Boy, PICO-8) | Renderer for retro game aesthetic; concept artist for pixel-style frames |
|
||||
| `baoyu-comic` | Knowledge-comic generation (educational, biography, tutorial) | Renderer for comic-style narrative; explainer in panel form |
|
||||
| `baoyu-infographic` | Infographic generation | Renderer for data-driven explainer scenes |
|
||||
| `meme-generation` *(optional)* | Generate meme images by overlaying text on templates | Generator for satirical/social content; meme-style stills |
|
||||
|
||||
### Design / pre-production skills (`hermes-agent/skills/creative/`)
|
||||
|
||||
| Skill | What it does | Best fit for |
|
||||
|-------|--------------|--------------|
|
||||
| `claude-design` | Design one-off HTML artifacts (landing, deck, prototype) | Concept artist for product video style frames; storyboarder for UI-heavy content |
|
||||
| `design-md` | Design markdown docs | Concept artist documenting visual specs |
|
||||
| `popular-web-designs` | Reference patterns for popular web designs | Concept artist; cinematographer when matching a known UI aesthetic |
|
||||
| `sketch` | Throwaway HTML mockups (2-3 design variants to compare) | Concept artist exploring directions; storyboarder for UI flows |
|
||||
| `excalidraw` | Excalidraw-style hand-drawn diagrams | Storyboarder; concept artist for sketch-style frames |
|
||||
| `architecture-diagram` | Software architecture diagrams | Storyboarder for technical content; explainer scenes about systems |
|
||||
| `concept-diagrams` *(optional)* | Flat, minimal SVG diagrams (educational visual language; physics, chemistry, math, anatomy, etc.) | Renderer / storyboarder for explainer scenes with clean educational diagrams |
|
||||
| `pretext` | Mathematical/scientific content authoring | Writer / cinematographer for technical-explainer pretexts |
|
||||
| `creative-ideation` | Constraint-driven project ideation | Director / cinematographer when the brief is wide-open and needs framing |
|
||||
| `humanizer` | Strip AI-isms from text, add real voice | Writer / copywriter post-process to avoid AI-tells in scripts and VO copy |
|
||||
|
||||
### Audio / media skills (`hermes-agent/skills/creative/` + `skills/media/`)
|
||||
|
||||
| Skill | What it does | Best fit for |
|
||||
|-------|--------------|--------------|
|
||||
| `songwriting-and-ai-music` | Songwriting craft + Suno prompt patterns | Music supervisor when commissioning a track via Suno |
|
||||
| `heartmula` | Open-source music generation (Apache-2.0, Suno-like) | Music supervisor generating bespoke tracks without external APIs |
|
||||
| `songsee` | Spectrograms, mel/chroma/MFCC of audio files | Music supervisor analyzing tracks; foley-designer designing to a beat; editor visualizing a mix |
|
||||
| `spotify` | Spotify control — play, search, queue, manage playlists | Music supervisor sourcing existing tracks; reference research |
|
||||
| `youtube-content` | Fetch transcripts + transform to chapters/summaries/posts | Documentary cut, content adaptation, research for explainers |
|
||||
| `gif-search` | Find existing GIFs | Editor / concept artist sourcing references |
|
||||
| `gifs` | GIF tooling | Masterer producing GIF deliverables |
|
||||
|
||||
### Kanban infrastructure (`hermes-agent/skills/devops/`)
|
||||
|
||||
| Skill | What it does | When to load |
|
||||
|-------|--------------|--------------|
|
||||
| `kanban-orchestrator` | Decomposition playbook + anti-temptation rules for orchestrator profiles | Director only |
|
||||
| `kanban-worker` | Pitfalls, examples, edge cases for kanban workers (deeper than auto-injected guidance) | Any profile — load when handling tricky multi-step workflows |
|
||||
|
||||
The kanban plugin auto-injects baseline orchestration guidance into every
|
||||
worker's system prompt — the `kanban_create` fan-out pattern, claim/handoff
|
||||
lifecycle, and the "decompose, don't execute" rule for orchestrators.
|
||||
`kanban-orchestrator` and `kanban-worker` are deeper playbooks loaded when a
|
||||
profile needs them.
|
||||
|
||||
## External tools (called from terminal toolset)
|
||||
|
||||
These are **not** Hermes skills but external CLIs / APIs that profiles invoke.
|
||||
They don't appear in `always_load`; instead the role's terminal commands hit
|
||||
them directly.
|
||||
|
||||
| Tool | What it does | Profile that uses it |
|
||||
|------|--------------|----------------------|
|
||||
| `ffmpeg` | Video / audio encode, splice, mux | renderer, editor, audio-mixer, masterer |
|
||||
| `ffprobe` | Inspect media | All media-touching profiles |
|
||||
| Whisper (CLI or API) | Speech-to-text for captions | captioner |
|
||||
| Text-to-image API (FAL / Replicate / OpenAI / Midjourney) | Stills generation | image-generator (alternative to local `comfyui`) |
|
||||
| Image-to-video API (Runway / Kling / Luma / Pika) | Animate stills | image-to-video-generator |
|
||||
| Text-to-speech API (ElevenLabs / OpenAI TTS / etc.) | Voiceover generation | voice-talent |
|
||||
| Suno API or web | Track composition (paired with `songwriting-and-ai-music`) | music-supervisor |
|
||||
| Remotion CLI (`npx remotion render`) | React-based motion graphics | renderer-motion-graphics |
|
||||
| Manim CE (`manim`) | Math animation render (driven by `manim-video` skill's recipes) | renderer-manim |
|
||||
| Blender (`blender -b`) | 3D rendering (alternative to `blender-mcp`) | renderer-3d |
|
||||
| Gemini multimodal / Claude vision | AI review of clips | reviewer, cinematographer, editor |
|
||||
|
||||
## Standard toolset configurations per role
|
||||
|
||||
### director
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-orchestrator
|
||||
```
|
||||
|
||||
The director's terminal access is conventional but the SOUL.md rules forbid
|
||||
execution. Audit logs catch violations.
|
||||
|
||||
### writer / copywriter
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
- humanizer # post-process scripts to strip AI-tells
|
||||
```
|
||||
|
||||
No terminal — writers don't need it.
|
||||
|
||||
### concept-artist
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
# plus one or more (style-dependent):
|
||||
# - claude-design (UI / web product video)
|
||||
# - sketch (quick mockup variants)
|
||||
# - excalidraw (hand-drawn frames)
|
||||
# - ascii-art (ASCII style frames)
|
||||
# - pixel-art (retro/game aesthetic)
|
||||
# - popular-web-designs (matching known web aesthetic)
|
||||
# - design-md (text-based design docs)
|
||||
```
|
||||
|
||||
### storyboarder
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
# one of:
|
||||
# - excalidraw (sketch storyboards)
|
||||
# - architecture-diagram (technical/system content)
|
||||
# - concept-diagrams (educational / scientific content)
|
||||
```
|
||||
|
||||
### cinematographer
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
# the visual skill that matches the project, e.g.:
|
||||
# - ascii-video (ASCII projects)
|
||||
# - manim-video (math/explainer)
|
||||
# - p5js (generative)
|
||||
# - comfyui (AI-generated visuals)
|
||||
# - blender-mcp (3D)
|
||||
# - touchdesigner-mcp (real-time/installation)
|
||||
```
|
||||
|
||||
### renderer (specialized variants)
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
# ONE skill per renderer variant (or empty for external-API renderers):
|
||||
# - ascii-video (renderer-ascii)
|
||||
# - manim-video (renderer-manim)
|
||||
# - p5js (renderer-p5js)
|
||||
# - comfyui (renderer-comfyui — img/video AI gen)
|
||||
# - touchdesigner-mcp (renderer-touchdesigner)
|
||||
# - blender-mcp (renderer-3d)
|
||||
# - pixel-art (renderer-pixel)
|
||||
# - baoyu-comic (renderer-comic)
|
||||
# - meme-generation (renderer-meme)
|
||||
```
|
||||
|
||||
For external-API renderers (image-to-video-generator using Runway, voice-talent
|
||||
using ElevenLabs, renderer-motion-graphics using Remotion), `always_load` only
|
||||
contains `kanban-worker` — the role's work is API-driven and the API key +
|
||||
terminal commands suffice.
|
||||
|
||||
For multi-skill renderer setups (rare — usually one variant per skill is
|
||||
cleaner) use `--skill <name>` on individual `kanban_create` calls to override
|
||||
which skill loads for that specific task.
|
||||
|
||||
### image-generator / image-to-video-generator / voice-talent
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
# for image-generator that drives ComfyUI locally:
|
||||
# - comfyui
|
||||
env_required:
|
||||
# populate based on the chosen API:
|
||||
- FAL_KEY # or REPLICATE_API_TOKEN, OPENAI_API_KEY for image-gen
|
||||
- RUNWAY_API_KEY # or KLING_API_KEY, LUMA_API_KEY for image-to-video
|
||||
- ELEVENLABS_API_KEY # or OPENAI_API_KEY for TTS
|
||||
```
|
||||
|
||||
If the user's setup has ComfyUI installed locally, the `comfyui` skill can
|
||||
replace the external image-gen API entirely (cheaper, more control, supports
|
||||
custom workflows for image-to-video too).
|
||||
|
||||
### music-supervisor
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
- songsee # spectrograms / audio analysis
|
||||
# plus (depending on what the project needs):
|
||||
# - songwriting-and-ai-music (commissioning Suno tracks)
|
||||
# - heartmula (commissioning open-source local generation)
|
||||
# - spotify (sourcing existing tracks)
|
||||
```
|
||||
|
||||
### editor / audio-mixer / captioner / masterer
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
```
|
||||
|
||||
These are mostly ffmpeg-driven; no special skill needed beyond `kanban-worker`.
|
||||
For captioner add Whisper invocation patterns to the SOUL.md.
|
||||
|
||||
### reviewer / brand-cop
|
||||
|
||||
```yaml
|
||||
toolsets:
|
||||
- kanban
|
||||
- terminal # for media inspection
|
||||
- file
|
||||
skills:
|
||||
always_load:
|
||||
- kanban-worker
|
||||
env_required:
|
||||
- OPENROUTER_API_KEY # if using Gemini multimodal review
|
||||
# or ANTHROPIC_API_KEY if using Claude vision (already required globally)
|
||||
```
|
||||
|
||||
## API key requirements
|
||||
|
||||
Track these in the project setup. The setup script should verify each required
|
||||
key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban.
|
||||
|
||||
| Service | Env var | Used by |
|
||||
|---------|---------|---------|
|
||||
| ElevenLabs | `ELEVENLABS_API_KEY` | voice-talent |
|
||||
| OpenAI | `OPENAI_API_KEY` | image-generator (DALL-E), voice-talent (TTS) |
|
||||
| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (Gemini multimodal review) |
|
||||
| FAL | `FAL_KEY` | image-generator (FAL flux models) |
|
||||
| Replicate | `REPLICATE_API_TOKEN` | image-generator (alternate provider) |
|
||||
| Runway | `RUNWAY_API_KEY` | image-to-video-generator |
|
||||
| Kling | `KLING_API_KEY` | image-to-video-generator (alternate) |
|
||||
| Luma | `LUMA_API_KEY` | image-to-video-generator (alternate) |
|
||||
| Suno | `SUNO_API_KEY` | music-supervisor (paired with `songwriting-and-ai-music`) |
|
||||
| Spotify | `SPOTIFY_CLIENT_ID` + `SPOTIFY_CLIENT_SECRET` | music-supervisor (paired with `spotify` skill) |
|
||||
| Anthropic | `ANTHROPIC_API_KEY` | every Hermes profile (Claude) |
|
||||
|
||||
If a key is missing, prompt the user to add it. Storage methods, in order of
|
||||
preference: macOS Keychain → `~/.hermes/.env` → environment variable.
|
||||
|
||||
## Skill version pinning
|
||||
|
||||
If a specific skill version is desired, pass it via the per-task
|
||||
`--skill <name>=<version>` flag. The default is whatever's installed.
|
||||
|
||||
## Adding a new skill to the matrix
|
||||
|
||||
When a new Hermes-public video skill ships:
|
||||
|
||||
1. Add a row to the relevant table at the top of this file
|
||||
2. If it warrants a specialized renderer variant, add to `role-archetypes.md`
|
||||
3. Update relevant per-style examples in `examples.md`
|
||||
501
optional-skills/creative/kanban-video-orchestrator/scripts/bootstrap_pipeline.py
Executable file
501
optional-skills/creative/kanban-video-orchestrator/scripts/bootstrap_pipeline.py
Executable file
|
|
@ -0,0 +1,501 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bootstrap a video production kanban from a structured plan JSON.
|
||||
|
||||
Reads a plan.json describing the team + brief, expands templates from
|
||||
../assets/, and writes a setup.sh that creates Hermes profiles and fires the
|
||||
initial kanban task.
|
||||
|
||||
Profile-config patching, SOUL.md-per-profile, TEAM.md task-graph convention,
|
||||
and the `hermes kanban create --workspace dir:` initial-task pattern are
|
||||
adapted from alt-glitch's NousResearch/kanban-video-pipeline.
|
||||
|
||||
Usage:
|
||||
bootstrap_pipeline.py plan.json [--out setup.sh]
|
||||
|
||||
The plan.json schema is documented inline below — see the `validate_plan`
|
||||
function. A minimal example:
|
||||
|
||||
{
|
||||
"title": "Q3 Product Teaser",
|
||||
"slug": "q3-product-teaser",
|
||||
"tenant": "q3-product-teaser",
|
||||
"duration_s": 30,
|
||||
"aspect": "1:1",
|
||||
"resolution": "1080x1080",
|
||||
"fps": 30,
|
||||
"team": [
|
||||
{
|
||||
"profile": "director",
|
||||
"role": "director",
|
||||
"toolsets": ["kanban", "terminal", "file"],
|
||||
"skills": [],
|
||||
"responsibilities": "...",
|
||||
"inputs": "brief.md, TEAM.md, taste/",
|
||||
"outputs": "kanban tasks for the team"
|
||||
},
|
||||
...
|
||||
],
|
||||
"scenes": [
|
||||
{"n": 1, "time": "0:00-0:08", "content": "...", "tool": "renderer-ascii"},
|
||||
...
|
||||
],
|
||||
"audio": {"approach": "voiceover + music bed", "vo": "ElevenLabs Lily",
|
||||
"music": "license-free", "sfx": "n/a"},
|
||||
"deliverables": [
|
||||
{"format": "mp4", "resolution": "1080x1080", "notes": "primary"}
|
||||
],
|
||||
"api_keys_required": ["ELEVENLABS_API_KEY", "OPENROUTER_API_KEY"],
|
||||
"brief_extra": {
|
||||
"concept_one_liner": "...",
|
||||
"emotional_north_star": "...",
|
||||
"visual_refs": "...",
|
||||
"tone": "...",
|
||||
"brand_constraints": "..."
|
||||
}
|
||||
}
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ASSETS_DIR = Path(__file__).resolve().parent.parent / "assets"
|
||||
|
||||
|
||||
def load_template(name: str) -> str:
|
||||
return (ASSETS_DIR / name).read_text()
|
||||
|
||||
|
||||
PROFILE_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
|
||||
SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9-]+$")
|
||||
|
||||
|
||||
def validate_plan(plan: dict) -> list[str]:
|
||||
"""Return a list of validation error strings; empty list = valid."""
|
||||
errors = []
|
||||
required_top = ["title", "slug", "tenant", "duration_s", "aspect",
|
||||
"resolution", "fps", "team", "scenes", "audio",
|
||||
"deliverables"]
|
||||
for k in required_top:
|
||||
if k not in plan:
|
||||
errors.append(f"missing required key: {k}")
|
||||
|
||||
if "team" in plan:
|
||||
if not isinstance(plan["team"], list) or not plan["team"]:
|
||||
errors.append("team must be a non-empty list")
|
||||
else:
|
||||
roles = [t.get("role") for t in plan["team"]]
|
||||
if "director" not in roles:
|
||||
errors.append("team must include a director role")
|
||||
seen_profiles = set()
|
||||
for i, t in enumerate(plan["team"]):
|
||||
for k in ["profile", "role", "toolsets", "skills",
|
||||
"responsibilities"]:
|
||||
if k not in t:
|
||||
errors.append(f"team[{i}] missing {k}")
|
||||
# Profile name must match Hermes's regex (lowercase
|
||||
# alphanumeric + hyphens + underscores, up to 64 chars).
|
||||
if "profile" in t:
|
||||
if not PROFILE_NAME_RE.match(t["profile"]):
|
||||
errors.append(
|
||||
f"team[{i}].profile {t['profile']!r} must match "
|
||||
f"[a-z0-9][a-z0-9_-]{{0,63}} per Hermes profile rules"
|
||||
)
|
||||
if t["profile"] in seen_profiles:
|
||||
errors.append(
|
||||
f"team[{i}].profile {t['profile']!r} is duplicated"
|
||||
)
|
||||
seen_profiles.add(t["profile"])
|
||||
# Toolsets / skills must be lists, not strings.
|
||||
if "toolsets" in t and not isinstance(t["toolsets"], list):
|
||||
errors.append(
|
||||
f"team[{i}].toolsets must be a list of strings"
|
||||
)
|
||||
if "skills" in t and not isinstance(t["skills"], list):
|
||||
errors.append(
|
||||
f"team[{i}].skills must be a list of strings"
|
||||
)
|
||||
|
||||
if "slug" in plan:
|
||||
if not SLUG_RE.match(plan["slug"]):
|
||||
errors.append("slug must be lowercase, hyphenated, "
|
||||
"starting with [a-z0-9]")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def render_brief(plan: dict) -> str:
|
||||
"""Render brief.md from the plan."""
|
||||
tmpl = load_template("brief.md.tmpl")
|
||||
extra = plan.get("brief_extra", {})
|
||||
|
||||
# Scene table rows
|
||||
scene_rows = []
|
||||
for s in plan["scenes"]:
|
||||
scene_rows.append(
|
||||
f"| {s.get('n', '?')} | {s.get('time', '?')} | "
|
||||
f"{s.get('content', '')} | {s.get('tool', '')} | "
|
||||
f"{s.get('audio', '')} | {s.get('notes', '')} |"
|
||||
)
|
||||
scene_table = "\n".join(scene_rows) if scene_rows else "_(none yet)_"
|
||||
|
||||
# Deliverable rows
|
||||
deliv_rows = []
|
||||
for d in plan["deliverables"]:
|
||||
deliv_rows.append(
|
||||
f"| {d.get('format', '?')} | {d.get('resolution', '?')} | "
|
||||
f"{d.get('notes', '')} |"
|
||||
)
|
||||
deliv_table = "\n".join(deliv_rows) if deliv_rows else "_(none)_"
|
||||
|
||||
# Replacements (single-pass)
|
||||
replacements = {
|
||||
"TITLE": plan["title"],
|
||||
"SLUG": plan["slug"],
|
||||
"TENANT": plan["tenant"],
|
||||
"WORKSPACE": f"~/projects/video-pipeline/{plan['slug']}",
|
||||
"ONE_LINE_PITCH": extra.get("concept_one_liner", "_(TBD)_"),
|
||||
"EMOTIONAL_NORTH_STAR": extra.get("emotional_north_star", "_(TBD)_"),
|
||||
"DURATION_S": str(plan["duration_s"]),
|
||||
"ASPECT": plan["aspect"],
|
||||
"RESOLUTION": plan["resolution"],
|
||||
"FPS": str(plan["fps"]),
|
||||
"PLATFORMS": extra.get("platforms", "_(TBD)_"),
|
||||
"DEADLINE": extra.get("deadline", "_(none)_"),
|
||||
"QUALITY_BAR": extra.get("quality_bar", "polished"),
|
||||
"VISUAL_REFS": extra.get("visual_refs", "_(none)_"),
|
||||
"TONE": extra.get("tone", "_(TBD)_"),
|
||||
"BRAND_CONSTRAINTS": extra.get("brand_constraints", "_(none)_"),
|
||||
"AESTHETIC_RULES": extra.get("aesthetic_rules", "_(TBD)_"),
|
||||
"AUDIO_APPROACH": plan["audio"].get("approach", "_(TBD)_"),
|
||||
"VO_DETAILS": plan["audio"].get("vo", "_(n/a)_"),
|
||||
"MUSIC_DETAILS": plan["audio"].get("music", "_(n/a)_"),
|
||||
"SFX_DETAILS": plan["audio"].get("sfx", "_(n/a)_"),
|
||||
"PRIMARY_FORMAT": plan["deliverables"][0]["format"],
|
||||
"PRIMARY_RES": plan["deliverables"][0]["resolution"],
|
||||
"ALT_FORMAT_1": (plan["deliverables"][1]["format"]
|
||||
if len(plan["deliverables"]) > 1 else "_(none)_"),
|
||||
"ALT_RES_1": (plan["deliverables"][1]["resolution"]
|
||||
if len(plan["deliverables"]) > 1 else ""),
|
||||
"ALT_NOTES_1": (plan["deliverables"][1].get("notes", "")
|
||||
if len(plan["deliverables"]) > 1 else ""),
|
||||
"API_KEYS_REQUIRED": ", ".join(plan.get("api_keys_required", [])) or "none",
|
||||
"EXT_DEPS": extra.get("ext_deps", "ffmpeg, Python 3.11+"),
|
||||
"SOURCE_ASSETS": extra.get("source_assets", "_(none)_"),
|
||||
}
|
||||
out = tmpl
|
||||
for k, v in replacements.items():
|
||||
out = out.replace("{{" + k + "}}", str(v))
|
||||
|
||||
# Scene + deliv tables: replace the placeholder row in the template
|
||||
out = re.sub(
|
||||
r"\|\s*1\s*\|\s*0:00–0:0X.+?\n\|\s*2\s*\|.+?\n",
|
||||
scene_table + "\n",
|
||||
out, flags=re.DOTALL,
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
def render_team_md(plan: dict) -> str:
|
||||
"""Render TEAM.md from the team list + scene → tool mapping."""
|
||||
lines = [f"# Team & Task Graph — {plan['title']}", "", "## Team", ""]
|
||||
for t in plan["team"]:
|
||||
skills = (
|
||||
f"loads `{', '.join(t['skills'])}`"
|
||||
if t["skills"] else "no skills required"
|
||||
)
|
||||
lines.append(
|
||||
f"- `{t['profile']}` — {t['responsibilities']} ({skills})"
|
||||
)
|
||||
lines.extend(["", "## Task Graph", "", "```"])
|
||||
|
||||
# Build a simple task graph based on conventions
|
||||
profiles_by_role = {t["role"]: t["profile"] for t in plan["team"]}
|
||||
director = profiles_by_role.get("director", "director")
|
||||
lines.append(f"T0 {director} — decompose")
|
||||
|
||||
next_id = 1
|
||||
parents_for_renderer: list[str] = ["T0"]
|
||||
|
||||
if "cinematographer" in profiles_by_role:
|
||||
cid = f"T{next_id}"
|
||||
lines.append(
|
||||
f"{cid:5} {profiles_by_role['cinematographer']} — visual spec for all scenes (parent: T0)"
|
||||
)
|
||||
parents_for_renderer = [cid]
|
||||
next_id += 1
|
||||
|
||||
if "music-supervisor" in profiles_by_role:
|
||||
cid = f"T{next_id}"
|
||||
lines.append(
|
||||
f"{cid:5} {profiles_by_role['music-supervisor']} — track analysis + beats.json (parent: T0)"
|
||||
)
|
||||
next_id += 1
|
||||
ms_id = cid
|
||||
else:
|
||||
ms_id = None
|
||||
|
||||
# Scenes
|
||||
scene_ids = []
|
||||
for s in plan["scenes"]:
|
||||
cid = f"T{next_id}"
|
||||
renderer_profile = s.get("tool") or "renderer"
|
||||
# Lookup the actual profile name
|
||||
for t in plan["team"]:
|
||||
if t["role"] == renderer_profile or t["profile"] == renderer_profile:
|
||||
renderer_profile = t["profile"]
|
||||
break
|
||||
parents = parents_for_renderer + ([ms_id] if ms_id else [])
|
||||
parent_str = ", ".join(parents)
|
||||
lines.append(
|
||||
f"{cid:5} {renderer_profile} — scene {s.get('n', '?')}: "
|
||||
f"{s.get('content', '')[:50]} (parents: {parent_str})"
|
||||
)
|
||||
scene_ids.append(cid)
|
||||
next_id += 1
|
||||
|
||||
# VO + audio mix
|
||||
if "voice-talent" in profiles_by_role:
|
||||
vo_id = f"T{next_id}"
|
||||
lines.append(f"{vo_id:5} {profiles_by_role['voice-talent']} — narration (parent: T0)")
|
||||
next_id += 1
|
||||
else:
|
||||
vo_id = None
|
||||
|
||||
if "audio-mixer" in profiles_by_role:
|
||||
am_id = f"T{next_id}"
|
||||
am_parents = [p for p in [ms_id, vo_id] if p]
|
||||
lines.append(
|
||||
f"{am_id:5} {profiles_by_role['audio-mixer']} — mix audio (parents: {', '.join(am_parents)})"
|
||||
)
|
||||
next_id += 1
|
||||
else:
|
||||
am_id = None
|
||||
|
||||
# Editor
|
||||
if "editor" in profiles_by_role:
|
||||
ed_id = f"T{next_id}"
|
||||
ed_parents = scene_ids + [p for p in [am_id, vo_id, ms_id] if p and p not in scene_ids]
|
||||
lines.append(
|
||||
f"{ed_id:5} {profiles_by_role['editor']} — assemble + mux (parents: {', '.join(ed_parents)})"
|
||||
)
|
||||
next_id += 1
|
||||
else:
|
||||
ed_id = None
|
||||
|
||||
# Captioner
|
||||
if "captioner" in profiles_by_role and ed_id:
|
||||
cap_id = f"T{next_id}"
|
||||
lines.append(
|
||||
f"{cap_id:5} {profiles_by_role['captioner']} — SRT + burn (parent: {ed_id})"
|
||||
)
|
||||
next_id += 1
|
||||
last = cap_id
|
||||
else:
|
||||
last = ed_id
|
||||
|
||||
# Reviewer
|
||||
if "reviewer" in profiles_by_role and last:
|
||||
rv_id = f"T{next_id}"
|
||||
lines.append(
|
||||
f"{rv_id:5} {profiles_by_role['reviewer']} — final QA (parent: {last})"
|
||||
)
|
||||
|
||||
lines.append("```")
|
||||
lines.extend([
|
||||
"",
|
||||
"## Per-task workspace requirement",
|
||||
"",
|
||||
f"All `kanban_create` calls MUST pass:",
|
||||
f"```",
|
||||
f'workspace_kind="dir"',
|
||||
f'workspace_path="$HOME/projects/video-pipeline/{plan["slug"]}"',
|
||||
f'tenant="{plan["tenant"]}"',
|
||||
f"```",
|
||||
])
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def render_setup_sh(plan: dict, brief_md: str, team_md: str) -> str:
|
||||
"""Render setup.sh from the plan."""
|
||||
tmpl = load_template("setup.sh.tmpl")
|
||||
|
||||
# API key checks
|
||||
key_checks = []
|
||||
for key in plan.get("api_keys_required", []):
|
||||
key_checks.append(f'check_key {key} hermes {key} || exit 1')
|
||||
key_checks_str = "\n".join(key_checks) if key_checks else "# (no API keys required)"
|
||||
|
||||
# Scene dirs
|
||||
scene_dir_lines = []
|
||||
for s in plan["scenes"]:
|
||||
n = s.get("n", "?")
|
||||
scene_dir_lines.append(f'mkdir -p "$WORKSPACE/scenes/scene-{n:02d}"/checkpoints')
|
||||
scene_dirs = "\n".join(scene_dir_lines) if scene_dir_lines else ""
|
||||
|
||||
# Profile create
|
||||
profile_creates = []
|
||||
for t in plan["team"]:
|
||||
profile_creates.append(
|
||||
f'hermes profile create {t["profile"]} --clone 2>/dev/null || true'
|
||||
)
|
||||
|
||||
# Profile config — emit JSON arrays so the bash function can pass them
|
||||
# safely through to the Python YAML patcher.
|
||||
profile_configs = []
|
||||
for t in plan["team"]:
|
||||
ts_json = json.dumps(t["toolsets"])
|
||||
sk_json = json.dumps(t["skills"])
|
||||
# Use single-quoted bash strings; JSON only contains "/[/], no single
|
||||
# quotes, so this is safe.
|
||||
profile_configs.append(
|
||||
f"configure_profile {t['profile']!r} {ts_json!r} {sk_json!r}"
|
||||
)
|
||||
|
||||
# SOUL writes — uses heredocs per profile
|
||||
soul_writes = []
|
||||
for t in plan["team"]:
|
||||
soul_writes.append(
|
||||
f'cat > "$HOME/.hermes/profiles/{t["profile"]}/SOUL.md" <<\'SOUL_EOF\'\n'
|
||||
f"{render_soul_md(t, plan)}\n"
|
||||
f"SOUL_EOF\n"
|
||||
f'echo " ✓ SOUL.md for {t["profile"]}"'
|
||||
)
|
||||
|
||||
# Taste writes (placeholder; real content optional)
|
||||
taste_writes = (
|
||||
'cat > "$WORKSPACE/taste/brand-guide.md" <<\'TASTE_EOF\'\n'
|
||||
'# Brand Guide\n\n'
|
||||
'_(Populate with project-specific colors, typography, motion rules)_\n'
|
||||
'TASTE_EOF\n'
|
||||
'cat > "$WORKSPACE/taste/emotional-dna.md" <<\'DNA_EOF\'\n'
|
||||
'# Emotional DNA\n\n'
|
||||
'_(What this piece should FEEL like — populate from the brief.)_\n'
|
||||
'DNA_EOF'
|
||||
)
|
||||
|
||||
# Asset copies — leave empty by default; user fills in
|
||||
asset_copies = "# Add cp/rsync commands here for any provided assets"
|
||||
|
||||
out = tmpl
|
||||
out = out.replace("{{TITLE}}", plan["title"])
|
||||
out = out.replace("{{SLUG}}", plan["slug"])
|
||||
out = out.replace("{{TENANT}}", plan["tenant"])
|
||||
out = out.replace("{{WORKSPACE}}", f"~/projects/video-pipeline/{plan['slug']}")
|
||||
out = out.replace("{{KEY_CHECKS}}", key_checks_str)
|
||||
out = out.replace("{{SCENE_DIRS}}", scene_dirs)
|
||||
out = out.replace("{{PROFILE_CREATE_COMMANDS}}", "\n".join(profile_creates))
|
||||
out = out.replace("{{PROFILE_CONFIG_COMMANDS}}", "\n".join(profile_configs))
|
||||
out = out.replace("{{SOUL_WRITES}}", "\n".join(soul_writes))
|
||||
out = out.replace("{{BRIEF_CONTENTS}}", brief_md)
|
||||
out = out.replace("{{TEAM_CONTENTS}}", team_md)
|
||||
out = out.replace("{{TASTE_WRITES}}", taste_writes)
|
||||
out = out.replace("{{ASSET_COPIES}}", asset_copies)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def render_soul_md(team_member: dict, plan: dict) -> str:
|
||||
"""Render a profile's SOUL.md from a team member dict + plan context."""
|
||||
tmpl = load_template("soul.md.tmpl")
|
||||
role = team_member["role"]
|
||||
|
||||
common_rules = (
|
||||
"- **Read the brief and team graph** before doing anything else.\n"
|
||||
"- **Pass `workspace_kind=\"dir\"` and `workspace_path` on every "
|
||||
"`kanban_create` call.** This keeps the team in one shared workspace.\n"
|
||||
f"- **Use tenant `{plan['tenant']}`** on every kanban call.\n"
|
||||
"- **Write outputs to predictable paths.** Other profiles depend on "
|
||||
"your filename conventions.\n"
|
||||
"- **Emit heartbeats** during long-running work. Renderers should "
|
||||
"report frame counts; editors should report assembly progress.\n"
|
||||
)
|
||||
|
||||
if role == "director":
|
||||
common_rules += (
|
||||
"- **Do not execute the work yourself.** For every concrete task, "
|
||||
"create a kanban task and assign it to the appropriate profile.\n"
|
||||
"- **Decompose, route, comment, approve — that's the whole job.**\n"
|
||||
"- **Read TEAM.md** for the canonical task graph. Do not invent "
|
||||
"new roles unless the brief truly demands it.\n"
|
||||
"- **Load the `kanban-orchestrator` skill** for the deeper "
|
||||
"decomposition playbook beyond the auto-injected baseline.\n"
|
||||
)
|
||||
|
||||
common_commands = (
|
||||
"```bash\n"
|
||||
"# Inspect a clip\n"
|
||||
"ffprobe -v quiet -show_entries format=duration -show_entries "
|
||||
"stream=codec_name,width,height,r_frame_rate <file.mp4>\n"
|
||||
"\n"
|
||||
"# Extract a frame for QA\n"
|
||||
"ffmpeg -y -i <input.mp4> -vf \"select='eq(n,30)'\" -vsync vfr <out.png>\n"
|
||||
"```"
|
||||
)
|
||||
|
||||
out = tmpl
|
||||
out = out.replace("{{ROLE_NAME}}", role)
|
||||
out = out.replace("{{ROLE_RESPONSIBILITIES}}", team_member["responsibilities"])
|
||||
out = out.replace("{{INPUTS_READ}}", team_member.get("inputs", "_(see brief)_"))
|
||||
out = out.replace("{{OUTPUTS_PRODUCED}}", team_member.get("outputs", "_(see brief)_"))
|
||||
out = out.replace("{{TOOLSETS}}", ", ".join(team_member["toolsets"]))
|
||||
out = out.replace(
|
||||
"{{SKILLS}}",
|
||||
", ".join(team_member["skills"]) if team_member["skills"] else "(none)"
|
||||
)
|
||||
out = out.replace(
|
||||
"{{EXTERNAL_TOOLS}}",
|
||||
team_member.get("external_tools", "ffmpeg, ffprobe (via terminal)")
|
||||
)
|
||||
out = out.replace(
|
||||
"{{ROLE_RULES}}",
|
||||
team_member.get("role_rules", "_(see TEAM.md and brief.md)_")
|
||||
)
|
||||
out = out.replace("{{COMMON_RULES}}", common_rules)
|
||||
out = out.replace("{{COMMON_COMMANDS}}", common_commands)
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
ap.add_argument("plan_json", help="Path to plan.json")
|
||||
ap.add_argument("--out", default="setup.sh",
|
||||
help="Output path for setup.sh (default: ./setup.sh)")
|
||||
ap.add_argument("--brief-out", default=None,
|
||||
help="Write brief.md alongside (default: skipped)")
|
||||
ap.add_argument("--team-out", default=None,
|
||||
help="Write TEAM.md alongside (default: skipped)")
|
||||
args = ap.parse_args()
|
||||
|
||||
plan = json.loads(Path(args.plan_json).read_text())
|
||||
errors = validate_plan(plan)
|
||||
if errors:
|
||||
print("Plan validation failed:", file=sys.stderr)
|
||||
for e in errors:
|
||||
print(f" - {e}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
brief = render_brief(plan)
|
||||
team = render_team_md(plan)
|
||||
setup = render_setup_sh(plan, brief, team)
|
||||
|
||||
Path(args.out).write_text(setup)
|
||||
os.chmod(args.out, 0o755)
|
||||
print(f"Wrote {args.out}")
|
||||
|
||||
if args.brief_out:
|
||||
Path(args.brief_out).write_text(brief)
|
||||
print(f"Wrote {args.brief_out}")
|
||||
if args.team_out:
|
||||
Path(args.team_out).write_text(team)
|
||||
print(f"Wrote {args.team_out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
195
optional-skills/creative/kanban-video-orchestrator/scripts/monitor.py
Executable file
195
optional-skills/creative/kanban-video-orchestrator/scripts/monitor.py
Executable file
|
|
@ -0,0 +1,195 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Monitor a running video-production kanban. Polls `hermes kanban list` and
|
||||
`events` for a tenant and surfaces issues (stuck tasks, missing heartbeats,
|
||||
repeated retries, dependency deadlocks).
|
||||
|
||||
Usage:
|
||||
monitor.py --tenant <project-slug> [--interval 30]
|
||||
|
||||
Outputs a periodic snapshot to stdout. Sends alerts via stderr when issues
|
||||
are detected. Designed to run alongside the kanban — kill with Ctrl-C when
|
||||
you're satisfied (or scripted to stop on completion).
|
||||
|
||||
This is best-effort observability. It does not auto-restart tasks; intervention
|
||||
decisions should remain human/AI-overseen.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
def hermes_available() -> bool:
|
||||
return shutil.which("hermes") is not None
|
||||
|
||||
|
||||
def kanban_list(tenant: str) -> list[dict]:
|
||||
"""Returns parsed task rows. Falls back to plain stdout parsing if JSON
|
||||
output isn't supported by the installed hermes CLI."""
|
||||
try:
|
||||
out = subprocess.run(
|
||||
["hermes", "kanban", "list", "--tenant", tenant, "--json"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if out.returncode == 0 and out.stdout.strip().startswith("["):
|
||||
return json.loads(out.stdout)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
pass
|
||||
# Fallback: textual parse of `hermes kanban list`
|
||||
out = subprocess.run(
|
||||
["hermes", "kanban", "list", "--tenant", tenant],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
rows = []
|
||||
for line in out.stdout.splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "STATUS" in line.upper():
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) >= 4 and parts[0].startswith("t_"):
|
||||
rows.append({
|
||||
"id": parts[0],
|
||||
"status": parts[1] if len(parts) > 1 else "?",
|
||||
"assignee": parts[2] if len(parts) > 2 else "?",
|
||||
"title": " ".join(parts[3:]) if len(parts) > 3 else "",
|
||||
"started_at": None,
|
||||
"heartbeat_at": None,
|
||||
"max_runtime_s": None,
|
||||
})
|
||||
return rows
|
||||
|
||||
|
||||
def kanban_show(task_id: str) -> dict | None:
|
||||
out = subprocess.run(
|
||||
["hermes", "kanban", "show", task_id, "--json"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if out.returncode != 0:
|
||||
return None
|
||||
try:
|
||||
return json.loads(out.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
|
||||
def detect_issues(tasks: list[dict]) -> list[str]:
|
||||
"""Return a list of issue strings, one per concern."""
|
||||
now = datetime.now()
|
||||
issues: list[str] = []
|
||||
by_status = defaultdict(list)
|
||||
for t in tasks:
|
||||
by_status[t.get("status", "?")].append(t)
|
||||
|
||||
# Stuck tasks: RUNNING with no heartbeat in 2 min
|
||||
for t in by_status.get("running", []) + by_status.get("RUNNING", []):
|
||||
hb = t.get("heartbeat_at")
|
||||
if not hb:
|
||||
continue
|
||||
try:
|
||||
hb_dt = datetime.fromisoformat(str(hb).rstrip("Z"))
|
||||
except ValueError:
|
||||
continue
|
||||
if now - hb_dt > timedelta(minutes=2):
|
||||
issues.append(
|
||||
f"STUCK: {t['id']} ({t.get('assignee', '?')}) — "
|
||||
f"no heartbeat in {(now - hb_dt).total_seconds():.0f}s"
|
||||
)
|
||||
|
||||
# Tasks exceeding max_runtime
|
||||
for t in by_status.get("running", []) + by_status.get("RUNNING", []):
|
||||
started = t.get("started_at")
|
||||
max_rt = t.get("max_runtime_s")
|
||||
if not started or not max_rt:
|
||||
continue
|
||||
try:
|
||||
started_dt = datetime.fromisoformat(str(started).rstrip("Z"))
|
||||
except ValueError:
|
||||
continue
|
||||
elapsed = (now - started_dt).total_seconds()
|
||||
if elapsed > max_rt:
|
||||
issues.append(
|
||||
f"OVERTIME: {t['id']} ({t.get('assignee', '?')}) — "
|
||||
f"running {elapsed:.0f}s, cap was {max_rt}s"
|
||||
)
|
||||
|
||||
# Repeated retries
|
||||
for t in tasks:
|
||||
retries = t.get("retries", 0)
|
||||
if retries and retries >= 2:
|
||||
issues.append(
|
||||
f"FLAPPING: {t['id']} ({t.get('assignee', '?')}) — "
|
||||
f"retried {retries}× — fix root cause before next run"
|
||||
)
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def snapshot(tenant: str) -> tuple[list[dict], list[str]]:
|
||||
tasks = kanban_list(tenant)
|
||||
issues = detect_issues(tasks)
|
||||
return tasks, issues
|
||||
|
||||
|
||||
def print_snapshot(tasks: list[dict], issues: list[str]):
|
||||
counts = defaultdict(int)
|
||||
for t in tasks:
|
||||
counts[str(t.get("status", "?")).lower()] += 1
|
||||
|
||||
print(f"\n[{datetime.now().strftime('%H:%M:%S')}] "
|
||||
f"Total: {len(tasks)} | "
|
||||
+ " | ".join(f"{k}: {v}" for k, v in sorted(counts.items())))
|
||||
|
||||
for t in tasks:
|
||||
bar = "✓" if str(t.get("status", "")).lower() == "done" else \
|
||||
"▶" if str(t.get("status", "")).lower() == "running" else \
|
||||
"·" if str(t.get("status", "")).lower() == "ready" else \
|
||||
"✗" if str(t.get("status", "")).lower() == "failed" else "?"
|
||||
print(f" {bar} {t.get('id', '?'):14} {t.get('assignee', '?'):20} "
|
||||
f"{t.get('title', '')[:60]}")
|
||||
|
||||
if issues:
|
||||
print("\n ⚠ ISSUES:", file=sys.stderr)
|
||||
for i in issues:
|
||||
print(f" {i}", file=sys.stderr)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
ap.add_argument("--tenant", required=True,
|
||||
help="Project tenant slug to monitor")
|
||||
ap.add_argument("--interval", type=int, default=30,
|
||||
help="Poll interval in seconds (default: 30)")
|
||||
ap.add_argument("--once", action="store_true",
|
||||
help="Print one snapshot and exit (no polling loop)")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not hermes_available():
|
||||
print("ERROR: 'hermes' CLI not found in PATH", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if args.once:
|
||||
tasks, issues = snapshot(args.tenant)
|
||||
print_snapshot(tasks, issues)
|
||||
sys.exit(0 if not issues else 2)
|
||||
|
||||
print(f"Monitoring tenant '{args.tenant}' every {args.interval}s. "
|
||||
"Ctrl-C to exit.")
|
||||
try:
|
||||
while True:
|
||||
tasks, issues = snapshot(args.tenant)
|
||||
print_snapshot(tasks, issues)
|
||||
time.sleep(args.interval)
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopped.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
92
run_agent.py
92
run_agent.py
|
|
@ -1258,6 +1258,10 @@ class AIAgent:
|
|||
# after each API call. Accessed by /usage slash command.
|
||||
self._rate_limit_state: Optional["RateLimitState"] = None
|
||||
|
||||
# OpenRouter response cache hit counter — incremented when
|
||||
# X-OpenRouter-Cache-Status: HIT is seen in streaming response headers.
|
||||
self._or_cache_hits: int = 0
|
||||
|
||||
# Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
|
||||
# both live under ~/.hermes/logs/. Idempotent, so gateway mode
|
||||
# (which creates a new AIAgent per message) won't duplicate handlers.
|
||||
|
|
@ -1421,11 +1425,8 @@ class AIAgent:
|
|||
client_kwargs["args"] = self.acp_args
|
||||
effective_base = base_url
|
||||
if base_url_host_matches(effective_base, "openrouter.ai"):
|
||||
client_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
client_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(effective_base, "api.routermint.com"):
|
||||
client_kwargs["default_headers"] = _routermint_headers()
|
||||
elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
|
||||
|
|
@ -1473,17 +1474,49 @@ class AIAgent:
|
|||
_env_hint = _pcfg.api_key_env_vars[0]
|
||||
except Exception:
|
||||
pass
|
||||
# --- Init-time fallback (#17929) ---
|
||||
_fb_entries = []
|
||||
if isinstance(fallback_model, list):
|
||||
_fb_entries = [
|
||||
f for f in fallback_model
|
||||
if isinstance(f, dict) and f.get("provider") and f.get("model")
|
||||
]
|
||||
elif isinstance(fallback_model, dict) and fallback_model.get("provider") and fallback_model.get("model"):
|
||||
_fb_entries = [fallback_model]
|
||||
_fb_resolved = False
|
||||
for _fb in _fb_entries:
|
||||
_fb_client, _fb_model = resolve_provider_client(
|
||||
_fb["provider"], model=_fb["model"], raw_codex=True,
|
||||
explicit_base_url=_fb.get("base_url"),
|
||||
explicit_api_key=_fb.get("api_key"),
|
||||
)
|
||||
if _fb_client is not None:
|
||||
self.provider = _fb["provider"]
|
||||
self.model = _fb_model or _fb["model"]
|
||||
self._fallback_activated = True
|
||||
client_kwargs = {
|
||||
"api_key": _fb_client.api_key,
|
||||
"base_url": str(_fb_client.base_url),
|
||||
}
|
||||
if _provider_timeout is not None:
|
||||
client_kwargs["timeout"] = _provider_timeout
|
||||
if hasattr(_fb_client, "_default_headers") and _fb_client._default_headers:
|
||||
client_kwargs["default_headers"] = dict(_fb_client._default_headers)
|
||||
_fb_resolved = True
|
||||
break
|
||||
if not _fb_resolved:
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_env_hint} environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
)
|
||||
if not getattr(self, "_fallback_activated", False):
|
||||
# No provider configured — reject with a clear message.
|
||||
raise RuntimeError(
|
||||
f"Provider '{_explicit}' is set in config.yaml but no API key "
|
||||
f"was found. Set the {_env_hint} environment "
|
||||
f"variable, or switch to a different provider with `hermes model`."
|
||||
"No LLM provider configured. Run `hermes model` to "
|
||||
"select a provider, or run `hermes setup` for first-time "
|
||||
"configuration."
|
||||
)
|
||||
# No provider configured — reject with a clear message.
|
||||
raise RuntimeError(
|
||||
"No LLM provider configured. Run `hermes model` to "
|
||||
"select a provider, or run `hermes setup` for first-time "
|
||||
"configuration."
|
||||
)
|
||||
|
||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||
|
||||
|
|
@ -1536,7 +1569,7 @@ class AIAgent:
|
|||
else:
|
||||
self._fallback_chain = []
|
||||
self._fallback_index = 0
|
||||
self._fallback_activated = False
|
||||
self._fallback_activated = getattr(self, "_fallback_activated", False)
|
||||
# Legacy attribute kept for backward compat (tests, external callers)
|
||||
self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
|
||||
if self._fallback_chain and not self.quiet_mode:
|
||||
|
|
@ -4548,6 +4581,28 @@ class AIAgent:
|
|||
"""Return the last captured RateLimitState, or None."""
|
||||
return self._rate_limit_state
|
||||
|
||||
def _check_openrouter_cache_status(self, http_response: Any) -> None:
|
||||
"""Read X-OpenRouter-Cache-Status from response headers and log it.
|
||||
|
||||
Increments ``_or_cache_hits`` on HIT so callers can report savings.
|
||||
"""
|
||||
if http_response is None:
|
||||
return
|
||||
headers = getattr(http_response, "headers", None)
|
||||
if not headers:
|
||||
return
|
||||
try:
|
||||
status = headers.get("x-openrouter-cache-status")
|
||||
if not status:
|
||||
return
|
||||
if status.upper() == "HIT":
|
||||
self._or_cache_hits += 1
|
||||
logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits)
|
||||
else:
|
||||
logger.debug("OpenRouter response cache %s", status.upper())
|
||||
except Exception:
|
||||
pass # Never let header parsing break the agent loop
|
||||
|
||||
def get_activity_summary(self) -> dict:
|
||||
"""Return a snapshot of the agent's current activity for diagnostics.
|
||||
|
||||
|
|
@ -6125,10 +6180,10 @@ class AIAgent:
|
|||
return True
|
||||
|
||||
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
|
||||
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
|
||||
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers
|
||||
|
||||
if base_url_host_matches(base_url, "openrouter.ai"):
|
||||
self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
self._client_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
|
||||
self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
|
||||
elif base_url_host_matches(base_url, "api.routermint.com"):
|
||||
|
|
@ -6748,6 +6803,9 @@ class AIAgent:
|
|||
# response via .response before any chunks are consumed.
|
||||
self._capture_rate_limits(getattr(stream, "response", None))
|
||||
|
||||
# Log OpenRouter response cache status when present.
|
||||
self._check_openrouter_cache_status(getattr(stream, "response", None))
|
||||
|
||||
content_parts: list = []
|
||||
tool_calls_acc: dict = {}
|
||||
tool_gen_notified: set = set()
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ AUTHOR_MAP = {
|
|||
"leone.parise@gmail.com": "leoneparise",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"159539633+MottledShadow@users.noreply.github.com": "MottledShadow",
|
||||
"aludwin+gh@gmail.com": "adamludwin",
|
||||
"2093036+exiao@users.noreply.github.com": "exiao",
|
||||
"rylen.anil@gmail.com": "rylena",
|
||||
|
|
@ -67,6 +68,7 @@ AUTHOR_MAP = {
|
|||
"274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi",
|
||||
"dejie.guo@gmail.com": "JayGwod",
|
||||
"maxence@groine.fr": "MaxyMoos",
|
||||
"61830395+leprincep35700@users.noreply.github.com": "leprincep35700",
|
||||
# OpenViking viking_read salvage (April 2026)
|
||||
"hitesh@gmail.com": "htsh",
|
||||
"pty819@outlook.com": "pty819",
|
||||
|
|
@ -370,6 +372,10 @@ AUTHOR_MAP = {
|
|||
"xowiekk@gmail.com": "Xowiek",
|
||||
"1243352777@qq.com": "zons-zhaozhy",
|
||||
"e.silacandmr@gmail.com": "Es1la",
|
||||
"h3057183414@gmail.com": "CoreyNoDream",
|
||||
"franksong2702@gmail.com": "franksong2702",
|
||||
"673088860@qq.com": "ambition0802",
|
||||
"beibei1988@proton.me": "beibi9966",
|
||||
# ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
|
||||
# crossref, and GH contributor list matching (April 2026 audit) ──
|
||||
"1115117931@qq.com": "aaronagent",
|
||||
|
|
@ -500,6 +506,10 @@ AUTHOR_MAP = {
|
|||
"michel.belleau@malaiwah.com": "malaiwah",
|
||||
"gnanasekaran.sekareee@gmail.com": "gnanam1990",
|
||||
"jz.pentest@gmail.com": "0xyg3n",
|
||||
"7093928+0xyg3n@users.noreply.github.com": "0xyg3n",
|
||||
"nftpoetrist@gmail.com": "nftpoetrist", # PR #18982
|
||||
"millerc79@users.noreply.github.com": "millerc79", # PR #19033
|
||||
"hermes@example.com": "shellybotmoyer", # PR #18915 (bot-committed)
|
||||
"hypnosis.mda@gmail.com": "Hypn0sis",
|
||||
"ywt000818@gmail.com": "OwenYWT",
|
||||
"dhandhalyabhavik@gmail.com": "v1k22",
|
||||
|
|
@ -668,6 +678,7 @@ AUTHOR_MAP = {
|
|||
"web3blind@gmail.com": "web3blind",
|
||||
"ztzheng@163.com": "chengoak", # PR #17467
|
||||
"24110240104@m.fudan.edu.cn": "YuShu", # co-author only
|
||||
"charliekerfoot@gmail.com": "CharlieKerfoot", # PR #18951
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
216
scripts/whatsapp-bridge/package-lock.json
generated
216
scripts/whatsapp-bridge/package-lock.json
generated
|
|
@ -25,15 +25,15 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@cacheable/memory": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmjs.org/@cacheable/memory/-/memory-2.0.7.tgz",
|
||||
"integrity": "sha512-RbxnxAMf89Tp1dLhXMS7ceft/PGsDl1Ip7T20z5nZ+pwIAsQ1p2izPjVG69oCLv/jfQ7HDPHTWK0c9rcAWXN3A==",
|
||||
"version": "2.0.8",
|
||||
"resolved": "https://registry.npmjs.org/@cacheable/memory/-/memory-2.0.8.tgz",
|
||||
"integrity": "sha512-FvEb29x5wVwu/Kf93IWwsOOEuhHh6dYCJF3vcKLzXc0KXIW181AOzv6ceT4ZpBHDvAfG60eqb+ekmrnLHIy+jw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@cacheable/utils": "^2.3.3",
|
||||
"@keyv/bigmap": "^1.3.0",
|
||||
"hookified": "^1.14.0",
|
||||
"keyv": "^5.5.5"
|
||||
"@cacheable/utils": "^2.4.0",
|
||||
"@keyv/bigmap": "^1.3.1",
|
||||
"hookified": "^1.15.1",
|
||||
"keyv": "^5.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@cacheable/node-cache": {
|
||||
|
|
@ -51,19 +51,19 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@cacheable/utils": {
|
||||
"version": "2.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@cacheable/utils/-/utils-2.3.4.tgz",
|
||||
"integrity": "sha512-knwKUJEYgIfwShABS1BX6JyJJTglAFcEU7EXqzTdiGCXur4voqkiJkdgZIQtWNFhynzDWERcTYv/sETMu3uJWA==",
|
||||
"version": "2.4.1",
|
||||
"resolved": "https://registry.npmjs.org/@cacheable/utils/-/utils-2.4.1.tgz",
|
||||
"integrity": "sha512-eiFgzCbIneyMlLOmNG4g9xzF7Hv3Mga4LjxjcSC/ues6VYq2+gUbQI8JqNuw/ZM8tJIeIaBGpswAsqV2V7ApgA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"hashery": "^1.3.0",
|
||||
"hashery": "^1.5.1",
|
||||
"keyv": "^5.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@emnapi/runtime": {
|
||||
"version": "1.8.1",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.8.1.tgz",
|
||||
"integrity": "sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==",
|
||||
"version": "1.10.0",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
|
||||
"integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
|
|
@ -87,9 +87,9 @@
|
|||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@img/colour": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.0.0.tgz",
|
||||
"integrity": "sha512-A5P/LfWGFSl6nsckYtjw9da+19jB8hkJ6ACTGcDfEJ0aE+l2n2El7dsVM7UVHZQ9s2lmYMWlrS21YLy2IR1LUw==",
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
|
||||
"integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"engines": {
|
||||
|
|
@ -617,9 +617,9 @@
|
|||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
|
||||
"integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
|
||||
"integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
|
|
@ -645,9 +645,9 @@
|
|||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
|
||||
"integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
|
||||
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
|
|
@ -663,9 +663,9 @@
|
|||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
|
||||
"integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@tokenizer/inflate": {
|
||||
|
|
@ -714,25 +714,20 @@
|
|||
"integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/long": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
|
||||
"integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "25.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.1.tgz",
|
||||
"integrity": "sha512-hj9YIJimBCipHVfHKRMnvmHg+wfhKc0o4mTtXh9pKBjC8TLJzz0nzGmLi5UJsYAUgSvXFHgb0V2oY10DUFtImw==",
|
||||
"version": "25.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz",
|
||||
"integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.18.0"
|
||||
"undici-types": "~7.19.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@whiskeysockets/baileys": {
|
||||
"name": "baileys",
|
||||
"version": "7.0.0-rc.9",
|
||||
"resolved": "git+ssh://git@github.com/WhiskeySockets/Baileys.git#01047debd81beb20da7b7779b08edcb06aa03770",
|
||||
"integrity": "sha512-letWyB96JHD6NdqpAiseOfaUBi13u8AhiRcKSRqcVjc5Vw5xoPTZGvVnw8K/NvGBFAvyLJkwim9Mjvwzhx/SlA==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
|
@ -807,9 +802,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/body-parser": {
|
||||
"version": "1.20.4",
|
||||
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.4.tgz",
|
||||
"integrity": "sha512-ZTgYYLMOXY9qKU/57FAo8F+HA2dGX7bqGc71txDRC1rS4frdFI5R7NhluHxH6M0YItAP0sHB4uqAOcYKxO6uGA==",
|
||||
"version": "1.20.5",
|
||||
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.5.tgz",
|
||||
"integrity": "sha512-3grm+/2tUOvu2cjJkvsIxrv/wVpfXQW4PsQHYm7yk4vfpu7Ekl6nEsYBoJUL6qDwZUx8wUhQ8tR2qz+ad9c9OA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"bytes": "~3.1.2",
|
||||
|
|
@ -820,7 +815,7 @@
|
|||
"http-errors": "~2.0.1",
|
||||
"iconv-lite": "~0.4.24",
|
||||
"on-finished": "~2.4.1",
|
||||
"qs": "~6.14.0",
|
||||
"qs": "~6.15.1",
|
||||
"raw-body": "~2.5.3",
|
||||
"type-is": "~1.6.18",
|
||||
"unpipe": "~1.0.0"
|
||||
|
|
@ -830,6 +825,21 @@
|
|||
"npm": "1.2.8000 || >= 1.4.16"
|
||||
}
|
||||
},
|
||||
"node_modules/body-parser/node_modules/qs": {
|
||||
"version": "6.15.1",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.15.1.tgz",
|
||||
"integrity": "sha512-6YHEFRL9mfgcAvql/XhwTvf5jKcOiiupt2FiJxHkiX1z4j7WL8J/jRHYLluORvc1XxB5rV20KoeK00gVJamspg==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"side-channel": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.6"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/bytes": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
|
||||
|
|
@ -840,16 +850,16 @@
|
|||
}
|
||||
},
|
||||
"node_modules/cacheable": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/cacheable/-/cacheable-2.3.2.tgz",
|
||||
"integrity": "sha512-w+ZuRNmex9c1TR9RcsxbfTKCjSL0rh1WA5SABbrWprIHeNBdmyQLSYonlDy9gpD+63XT8DgZ/wNh1Smvc9WnJA==",
|
||||
"version": "2.3.4",
|
||||
"resolved": "https://registry.npmjs.org/cacheable/-/cacheable-2.3.4.tgz",
|
||||
"integrity": "sha512-djgxybDbw9fL/ZWMI3+CE8ZilNxcwFkVtDc1gJ+IlOSSWkSMPQabhV/XCHTQ6pwwN6aivXPZ43omTooZiX06Ew==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@cacheable/memory": "^2.0.7",
|
||||
"@cacheable/utils": "^2.3.3",
|
||||
"@cacheable/memory": "^2.0.8",
|
||||
"@cacheable/utils": "^2.4.0",
|
||||
"hookified": "^1.15.0",
|
||||
"keyv": "^5.5.5",
|
||||
"qified": "^0.6.0"
|
||||
"keyv": "^5.6.0",
|
||||
"qified": "^0.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/call-bind-apply-helpers": {
|
||||
|
|
@ -1212,21 +1222,21 @@
|
|||
}
|
||||
},
|
||||
"node_modules/hashery": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/hashery/-/hashery-1.5.0.tgz",
|
||||
"integrity": "sha512-nhQ6ExaOIqti2FDWoEMWARUqIKyjr2VcZzXShrI+A3zpeiuPWzx6iPftt44LhP74E5sW36B75N6VHbvRtpvO6Q==",
|
||||
"version": "1.5.1",
|
||||
"resolved": "https://registry.npmjs.org/hashery/-/hashery-1.5.1.tgz",
|
||||
"integrity": "sha512-iZyKG96/JwPz1N55vj2Ie2vXbhu440zfUfJvSwEqEbeLluk7NnapfGqa7LH0mOsnDxTF85Mx8/dyR6HfqcbmbQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"hookified": "^1.14.0"
|
||||
"hookified": "^1.15.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/hasown": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
|
||||
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.3.tgz",
|
||||
"integrity": "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"function-bind": "^1.1.2"
|
||||
|
|
@ -1327,44 +1337,6 @@
|
|||
"protobufjs": "6.8.8"
|
||||
}
|
||||
},
|
||||
"node_modules/libsignal/node_modules/@types/node": {
|
||||
"version": "10.17.60",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-10.17.60.tgz",
|
||||
"integrity": "sha512-F0KIgDJfy2nA3zMLmWGKxcH2ZVEtCZXHHdOQs2gSaQ27+lNeEfGxzkIw90aXswATX7AZ33tahPbzy6KAfUreVw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/libsignal/node_modules/long": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
|
||||
"integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/libsignal/node_modules/protobufjs": {
|
||||
"version": "6.8.8",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.8.8.tgz",
|
||||
"integrity": "sha512-AAmHtD5pXgZfi7GMpllpO3q1Xw1OYldr+dMUlAnffGTAhqkg72WdmSY71uKBF/JuyiKs8psYbtKrhi0ASCD8qw==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@types/long": "^4.0.0",
|
||||
"@types/node": "^10.1.0",
|
||||
"long": "^4.0.0"
|
||||
},
|
||||
"bin": {
|
||||
"pbjs": "bin/pbjs",
|
||||
"pbts": "bin/pbts"
|
||||
}
|
||||
},
|
||||
"node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
|
|
@ -1372,9 +1344,9 @@
|
|||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "11.2.6",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz",
|
||||
"integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==",
|
||||
"version": "11.3.5",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.3.5.tgz",
|
||||
"integrity": "sha512-NxVFwLAnrd9i7KUBxC4DrUhmgjzOs+1Qm50D3oF1/oL+r1NpZ4gA7xvG0/zJ8evR7zIKn4vLf7qTNduWFtCrRw==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
|
|
@ -1552,12 +1524,12 @@
|
|||
}
|
||||
},
|
||||
"node_modules/p-queue": {
|
||||
"version": "9.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.1.0.tgz",
|
||||
"integrity": "sha512-O/ZPaXuQV29uSLbxWBGGZO1mCQXV2BLIwUr59JUU9SoH76mnYvtms7aafH/isNSNGwuEfP6W/4xD0/TJXxrizw==",
|
||||
"version": "9.2.0",
|
||||
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.2.0.tgz",
|
||||
"integrity": "sha512-dWgLE8AH0HjQ9fe74pUkKkvzzYT18Inp4zra3lKHnnwqGvcfcUBrvF2EAVX+envufDNBOzpPq/IBUONDbI7+3g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"eventemitter3": "^5.0.1",
|
||||
"eventemitter3": "^5.0.4",
|
||||
"p-timeout": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -1648,22 +1620,22 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.4",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
|
||||
"integrity": "sha512-CvexbZtbov6jW2eXAvLukXjXUW1TzFaivC46BpWc/3BpcCysb5Vffu+B3XHMm8lVEuy2Mm4XGex8hBSg1yapPg==",
|
||||
"version": "7.5.6",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
|
||||
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.4",
|
||||
"@protobufjs/codegen": "^2.0.5",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.0",
|
||||
"@protobufjs/inquire": "^1.1.1",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
},
|
||||
|
|
@ -1685,17 +1657,23 @@
|
|||
}
|
||||
},
|
||||
"node_modules/qified": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/qified/-/qified-0.6.0.tgz",
|
||||
"integrity": "sha512-tsSGN1x3h569ZSU1u6diwhltLyfUWDp3YbFHedapTmpBl0B3P6U3+Qptg7xu+v+1io1EwhdPyyRHYbEw0KN2FA==",
|
||||
"version": "0.9.1",
|
||||
"resolved": "https://registry.npmjs.org/qified/-/qified-0.9.1.tgz",
|
||||
"integrity": "sha512-n7mar4T0xQ+39dE2vGTAlbxUEpndwPANH0kDef1/MYsB8Bba9wshkybIRx74qgcvKQPEWErf9AqAdYjhzY2Ilg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"hookified": "^1.14.0"
|
||||
"hookified": "^2.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/qified/node_modules/hookified": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/hookified/-/hookified-2.2.0.tgz",
|
||||
"integrity": "sha512-p/LgFzRN5FeoD3DLS6bkUapeye6E4SI6yJs6KetENd18S+FBthqYq2amJUWpt5z0EQwwHemidjY5OqJGEKm5uA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/qrcode-terminal": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/qrcode-terminal/-/qrcode-terminal-0.12.0.tgz",
|
||||
|
|
@ -1922,13 +1900,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/side-channel-list": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
|
||||
"integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.1.tgz",
|
||||
"integrity": "sha512-mjn/0bi/oUURjc5Xl7IaWi/OJJJumuoJFQJfDDyO46+hBWsfaVM65TBHq2eoZBhzl9EchxOijpkbRC8SVBQU0w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"es-errors": "^1.3.0",
|
||||
"object-inspect": "^1.13.3"
|
||||
"object-inspect": "^1.13.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 0.4"
|
||||
|
|
@ -2094,9 +2072,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "7.18.2",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz",
|
||||
"integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==",
|
||||
"version": "7.19.2",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz",
|
||||
"integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/unpipe": {
|
||||
|
|
@ -2139,9 +2117,9 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.19.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
|
||||
"integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
|
||||
"version": "8.20.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
|
||||
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
|
|
|
|||
|
|
@ -12,5 +12,8 @@
|
|||
"express": "^4.21.0",
|
||||
"qrcode-terminal": "^0.12.0",
|
||||
"pino": "^9.0.0"
|
||||
},
|
||||
"overrides": {
|
||||
"protobufjs": "^7.5.5"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -178,9 +178,10 @@ class TestMcpRegistrationE2E:
|
|||
complete_event = completions[0]
|
||||
assert isinstance(complete_event, ToolCallProgress)
|
||||
assert complete_event.status == "completed"
|
||||
# rawOutput should contain the tool result string
|
||||
assert complete_event.raw_output is not None
|
||||
assert "hello" in str(complete_event.raw_output)
|
||||
# Completion should contain human-readable output rather than forcing raw JSON panes.
|
||||
assert complete_event.content
|
||||
assert "hello" in complete_event.content[0].content.text
|
||||
assert complete_event.raw_output is None
|
||||
|
||||
def test_patch_mode_tool_start_emits_diff_blocks_for_v4a_patch(self):
|
||||
update = build_tool_start(
|
||||
|
|
|
|||
|
|
@ -27,7 +27,10 @@ from acp.schema import (
|
|||
SetSessionModeResponse,
|
||||
SessionInfo,
|
||||
TextContentBlock,
|
||||
ToolCallProgress,
|
||||
ToolCallStart,
|
||||
Usage,
|
||||
UsageUpdate,
|
||||
UserMessageChunk,
|
||||
)
|
||||
from acp_adapter.server import HermesACPAgent, HERMES_VERSION
|
||||
|
|
@ -200,6 +203,8 @@ class TestSessionOps:
|
|||
"context",
|
||||
"reset",
|
||||
"compact",
|
||||
"steer",
|
||||
"queue",
|
||||
"version",
|
||||
]
|
||||
model_cmd = next(
|
||||
|
|
@ -208,6 +213,46 @@ class TestSessionOps:
|
|||
assert model_cmd.input is not None
|
||||
assert model_cmd.input.root.hint == "model name to switch to"
|
||||
|
||||
def test_build_usage_update_for_zed_context_indicator(self, agent, mock_manager):
|
||||
state = mock_manager.create_session(cwd="/tmp")
|
||||
state.history = [{"role": "user", "content": "hello"}]
|
||||
state.agent.context_compressor = MagicMock(context_length=100_000)
|
||||
state.agent._cached_system_prompt = "system"
|
||||
state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
|
||||
|
||||
with patch(
|
||||
"agent.model_metadata.estimate_request_tokens_rough",
|
||||
return_value=25_000,
|
||||
):
|
||||
update = agent._build_usage_update(state)
|
||||
|
||||
assert isinstance(update, UsageUpdate)
|
||||
assert update.session_update == "usage_update"
|
||||
assert update.size == 100_000
|
||||
assert update.used == 25_000
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_usage_update_to_client(self, agent, mock_manager):
|
||||
state = mock_manager.create_session(cwd="/tmp")
|
||||
state.agent.context_compressor = MagicMock(context_length=100_000)
|
||||
mock_conn = MagicMock(spec=acp.Client)
|
||||
mock_conn.session_update = AsyncMock()
|
||||
agent._conn = mock_conn
|
||||
|
||||
with patch(
|
||||
"agent.model_metadata.estimate_request_tokens_rough",
|
||||
return_value=25_000,
|
||||
):
|
||||
await agent._send_usage_update(state)
|
||||
|
||||
mock_conn.session_update.assert_awaited_once()
|
||||
call = mock_conn.session_update.await_args
|
||||
assert call.kwargs["session_id"] == state.session_id
|
||||
update = call.kwargs["update"]
|
||||
assert isinstance(update, UsageUpdate)
|
||||
assert update.size == 100_000
|
||||
assert update.used == 25_000
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cancel_sets_event(self, agent):
|
||||
resp = await agent.new_session(cwd=".")
|
||||
|
|
@ -238,11 +283,31 @@ class TestSessionOps:
|
|||
{"role": "system", "content": "hidden system"},
|
||||
{"role": "user", "content": "what controls the / slash commands?"},
|
||||
{"role": "assistant", "content": "HermesACPAgent._ADVERTISED_COMMANDS controls them."},
|
||||
{"role": "tool", "content": "tool output should not replay"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_search_1",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search_files",
|
||||
"arguments": '{"pattern":"slash commands","path":"."}',
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_search_1",
|
||||
"content": '{"total_count":1,"matches":[{"path":"cli.py","line":42,"content":"slash commands"}]}',
|
||||
},
|
||||
]
|
||||
|
||||
mock_conn.session_update.reset_mock()
|
||||
resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert isinstance(resp, LoadSessionResponse)
|
||||
calls = mock_conn.session_update.await_args_list
|
||||
|
|
@ -257,6 +322,21 @@ class TestSessionOps:
|
|||
assert isinstance(replay_calls[1].kwargs["update"], AgentMessageChunk)
|
||||
assert replay_calls[1].kwargs["update"].content.text.startswith("HermesACPAgent")
|
||||
|
||||
tool_updates = [
|
||||
call.kwargs["update"]
|
||||
for call in calls
|
||||
if getattr(call.kwargs.get("update"), "session_update", None)
|
||||
in {"tool_call", "tool_call_update"}
|
||||
]
|
||||
assert len(tool_updates) == 2
|
||||
assert isinstance(tool_updates[0], ToolCallStart)
|
||||
assert tool_updates[0].tool_call_id == "call_search_1"
|
||||
assert tool_updates[0].title == "search: slash commands"
|
||||
assert isinstance(tool_updates[1], ToolCallProgress)
|
||||
assert tool_updates[1].tool_call_id == "call_search_1"
|
||||
assert "Search results" in tool_updates[1].content[0].content.text
|
||||
assert "cli.py:42" in tool_updates[1].content[0].content.text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_session_replays_persisted_history_to_client(self, agent):
|
||||
mock_conn = MagicMock(spec=acp.Client)
|
||||
|
|
@ -269,6 +349,8 @@ class TestSessionOps:
|
|||
|
||||
mock_conn.session_update.reset_mock()
|
||||
resp = await agent.resume_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
assert isinstance(resp, ResumeSessionResponse)
|
||||
updates = [call.kwargs["update"] for call in mock_conn.session_update.await_args_list]
|
||||
|
|
@ -278,6 +360,27 @@ class TestSessionOps:
|
|||
for update in updates
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_session_schedules_history_replay_after_response(self, agent):
|
||||
"""Zed only attaches replayed updates after session/load has completed."""
|
||||
new_resp = await agent.new_session(cwd="/tmp")
|
||||
state = agent.session_manager.get_session(new_resp.session_id)
|
||||
state.history = [{"role": "user", "content": "hello from history"}]
|
||||
events = []
|
||||
|
||||
async def replay_after_response(_state):
|
||||
events.append("replay")
|
||||
|
||||
with patch.object(agent, "_replay_session_history", side_effect=replay_after_response):
|
||||
resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||
events.append("returned")
|
||||
|
||||
assert isinstance(resp, LoadSessionResponse)
|
||||
assert events == ["returned"]
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
assert events == ["returned", "replay"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_session_creates_new_if_missing(self, agent):
|
||||
resume_resp = await agent.resume_session(cwd="/tmp", session_id="nonexistent")
|
||||
|
|
@ -522,6 +625,11 @@ class TestPrompt:
|
|||
assert isinstance(resp, PromptResponse)
|
||||
assert resp.stop_reason == "end_turn"
|
||||
state.agent.run_conversation.assert_called_once()
|
||||
assert state.agent.tool_progress_callback is not None
|
||||
assert state.agent.step_callback is not None
|
||||
assert state.agent.stream_delta_callback is not None
|
||||
assert state.agent.reasoning_callback is not None
|
||||
assert state.agent.thinking_callback is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_updates_history(self, agent):
|
||||
|
|
@ -565,12 +673,40 @@ class TestPrompt:
|
|||
prompt = [TextContentBlock(type="text", text="help me")]
|
||||
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
|
||||
|
||||
# session_update should have been called with the final message
|
||||
# session_update should include the final message (usage_update may follow it)
|
||||
mock_conn.session_update.assert_called()
|
||||
# Get the last call's update argument
|
||||
last_call = mock_conn.session_update.call_args_list[-1]
|
||||
update = last_call[1].get("update") or last_call[0][1]
|
||||
assert update.session_update == "agent_message_chunk"
|
||||
updates = [
|
||||
call.kwargs.get("update") or call.args[1]
|
||||
for call in mock_conn.session_update.call_args_list
|
||||
]
|
||||
assert any(update.session_update == "agent_message_chunk" for update in updates)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_does_not_duplicate_streamed_final_message(self, agent):
|
||||
"""If ACP already streamed response chunks, final_response should not be sent again."""
|
||||
new_resp = await agent.new_session(cwd=".")
|
||||
state = agent.session_manager.get_session(new_resp.session_id)
|
||||
|
||||
def mock_run(*args, **kwargs):
|
||||
state.agent.stream_delta_callback("streamed answer")
|
||||
return {"final_response": "streamed answer", "messages": []}
|
||||
|
||||
state.agent.run_conversation = mock_run
|
||||
|
||||
mock_conn = MagicMock(spec=acp.Client)
|
||||
mock_conn.session_update = AsyncMock()
|
||||
agent._conn = mock_conn
|
||||
|
||||
prompt = [TextContentBlock(type="text", text="hello")]
|
||||
await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
|
||||
|
||||
updates = [
|
||||
call.kwargs.get("update") or call.args[1]
|
||||
for call in mock_conn.session_update.call_args_list
|
||||
]
|
||||
agent_chunks = [update for update in updates if update.session_update == "agent_message_chunk"]
|
||||
assert len(agent_chunks) == 1
|
||||
assert agent_chunks[0].content.text == "streamed answer"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_auto_titles_session(self, agent):
|
||||
|
|
@ -708,6 +844,43 @@ class TestSlashCommands:
|
|||
assert "2 messages" in result
|
||||
assert "user: 1" in result
|
||||
|
||||
def test_context_shows_usage_and_compression_threshold(self, agent, mock_manager):
|
||||
state = self._make_state(mock_manager)
|
||||
state.history = [{"role": "user", "content": "hello"}]
|
||||
state.agent.context_compressor = MagicMock(
|
||||
context_length=100_000,
|
||||
threshold_tokens=80_000,
|
||||
)
|
||||
state.agent._cached_system_prompt = "system"
|
||||
state.agent.tools = [{"type": "function", "function": {"name": "demo"}}]
|
||||
|
||||
with patch(
|
||||
"agent.model_metadata.estimate_request_tokens_rough",
|
||||
return_value=25_000,
|
||||
):
|
||||
result = agent._handle_slash_command("/context", state)
|
||||
|
||||
assert "Context usage: ~25,000 / 100,000 tokens (25.0%)" in result
|
||||
assert "Compression: ~55,000 tokens until threshold (~80,000, 80%)" in result
|
||||
assert "Tip: run /compact" in result
|
||||
|
||||
def test_context_says_compression_due_when_past_threshold(self, agent, mock_manager):
|
||||
state = self._make_state(mock_manager)
|
||||
state.history = [{"role": "user", "content": "hello"}]
|
||||
state.agent.context_compressor = MagicMock(
|
||||
context_length=100_000,
|
||||
threshold_tokens=80_000,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"agent.model_metadata.estimate_request_tokens_rough",
|
||||
return_value=82_000,
|
||||
):
|
||||
result = agent._handle_slash_command("/context", state)
|
||||
|
||||
assert "Context usage: ~82,000 / 100,000 tokens (82.0%)" in result
|
||||
assert "Compression: due now (threshold ~80,000, 80%). Run /compact." in result
|
||||
|
||||
def test_reset_clears_history(self, agent, mock_manager):
|
||||
state = self._make_state(mock_manager)
|
||||
state.history = [{"role": "user", "content": "hello"}]
|
||||
|
|
@ -787,7 +960,12 @@ class TestSlashCommands:
|
|||
resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
|
||||
|
||||
assert resp.stop_reason == "end_turn"
|
||||
mock_conn.session_update.assert_called_once()
|
||||
updates = [
|
||||
call.kwargs.get("update") or call.args[1]
|
||||
for call in mock_conn.session_update.call_args_list
|
||||
]
|
||||
assert any(update.session_update == "agent_message_chunk" for update in updates)
|
||||
assert any(update.session_update == "usage_update" for update in updates)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager):
|
||||
|
|
|
|||
|
|
@ -52,6 +52,12 @@ class TestToolKindMap:
|
|||
def test_tool_kind_execute_code(self):
|
||||
assert get_tool_kind("execute_code") == "execute"
|
||||
|
||||
def test_tool_kind_todo(self):
|
||||
assert get_tool_kind("todo") == "other"
|
||||
|
||||
def test_tool_kind_skill_view(self):
|
||||
assert get_tool_kind("skill_view") == "read"
|
||||
|
||||
def test_tool_kind_browser_navigate(self):
|
||||
assert get_tool_kind("browser_navigate") == "fetch"
|
||||
|
||||
|
|
@ -110,6 +116,25 @@ class TestBuildToolTitle:
|
|||
title = build_tool_title("web_search", {"query": "python asyncio"})
|
||||
assert "python asyncio" in title
|
||||
|
||||
def test_skill_view_title_includes_skill_name(self):
|
||||
title = build_tool_title("skill_view", {"name": "github-pitfalls"})
|
||||
assert title == "skill view (github-pitfalls)"
|
||||
|
||||
def test_skill_view_title_includes_linked_file(self):
|
||||
title = build_tool_title("skill_view", {"name": "github-pitfalls", "file_path": "references/api.md"})
|
||||
assert title == "skill view (github-pitfalls/references/api.md)"
|
||||
|
||||
def test_execute_code_title_includes_first_code_line(self):
|
||||
title = build_tool_title("execute_code", {"code": "\nfrom hermes_tools import terminal\nprint('done')"})
|
||||
assert title == "python: from hermes_tools import terminal"
|
||||
|
||||
def test_skill_manage_title_includes_action_and_target(self):
|
||||
title = build_tool_title(
|
||||
"skill_manage",
|
||||
{"action": "patch", "name": "hermes-agent-operations", "file_path": "references/acp.md"},
|
||||
)
|
||||
assert title == "skill patch: hermes-agent-operations/references/acp.md"
|
||||
|
||||
def test_unknown_tool_uses_name(self):
|
||||
title = build_tool_title("some_new_tool", {"foo": "bar"})
|
||||
assert title == "some_new_tool"
|
||||
|
|
@ -164,15 +189,23 @@ class TestBuildToolStart:
|
|||
assert "ls -la /tmp" in text
|
||||
|
||||
def test_build_tool_start_for_read_file(self):
|
||||
"""read_file should include the path in content."""
|
||||
"""read_file start should stay compact; completion carries file contents."""
|
||||
args = {"path": "/etc/hosts", "offset": 1, "limit": 50}
|
||||
result = build_tool_start("tc-3", "read_file", args)
|
||||
assert isinstance(result, ToolCallStart)
|
||||
assert result.kind == "read"
|
||||
assert len(result.content) >= 1
|
||||
content_item = result.content[0]
|
||||
assert isinstance(content_item, ContentToolCallContent)
|
||||
assert "/etc/hosts" in content_item.content.text
|
||||
assert result.content is None
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_web_extract_is_compact(self):
|
||||
"""web_extract start should stay compact; title identifies URLs."""
|
||||
args = {"urls": ["https://example.com/docs"]}
|
||||
result = build_tool_start("tc-web-start", "web_extract", args)
|
||||
assert isinstance(result, ToolCallStart)
|
||||
assert result.title == "extract: https://example.com/docs"
|
||||
assert result.kind == "fetch"
|
||||
assert result.content is None
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_search(self):
|
||||
"""search_files should include pattern in content."""
|
||||
|
|
@ -181,6 +214,48 @@ class TestBuildToolStart:
|
|||
assert isinstance(result, ToolCallStart)
|
||||
assert result.kind == "search"
|
||||
assert "TODO" in result.content[0].content.text
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_todo_is_human_readable(self):
|
||||
args = {"todos": [{"id": "one", "content": "Fix ACP rendering", "status": "in_progress"}]}
|
||||
result = build_tool_start("tc-todo", "todo", args)
|
||||
assert result.title == "todo (1 item)"
|
||||
assert "Fix ACP rendering" in result.content[0].content.text
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_skill_view_is_human_readable(self):
|
||||
result = build_tool_start("tc-skill", "skill_view", {"name": "github-pitfalls"})
|
||||
assert result.title == "skill view (github-pitfalls)"
|
||||
assert "github-pitfalls" in result.content[0].content.text
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_execute_code_shows_code_preview(self):
|
||||
result = build_tool_start("tc-code", "execute_code", {"code": "print('hello')"})
|
||||
assert result.kind == "execute"
|
||||
assert result.title == "python: print('hello')"
|
||||
assert "```python" in result.content[0].content.text
|
||||
assert "print('hello')" in result.content[0].content.text
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_for_skill_manage_patch_shows_diff(self):
|
||||
result = build_tool_start(
|
||||
"tc-skill-manage",
|
||||
"skill_manage",
|
||||
{
|
||||
"action": "patch",
|
||||
"name": "hermes-agent-operations",
|
||||
"file_path": "references/acp.md",
|
||||
"old_string": "old advice",
|
||||
"new_string": "new advice",
|
||||
},
|
||||
)
|
||||
assert result.kind == "edit"
|
||||
assert result.title == "skill patch: hermes-agent-operations/references/acp.md"
|
||||
assert isinstance(result.content[0], FileEditToolCallContent)
|
||||
assert result.content[0].path == "skills/hermes-agent-operations/references/acp.md"
|
||||
assert result.content[0].old_text == "old advice"
|
||||
assert result.content[0].new_text == "new advice"
|
||||
assert result.raw_input is None
|
||||
|
||||
def test_build_tool_start_generic_fallback(self):
|
||||
"""Unknown tools should get a generic text representation."""
|
||||
|
|
@ -205,6 +280,158 @@ class TestBuildToolComplete:
|
|||
content_item = result.content[0]
|
||||
assert isinstance(content_item, ContentToolCallContent)
|
||||
assert "total 42" in content_item.content.text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_todo_is_checklist(self):
|
||||
result = build_tool_complete(
|
||||
"tc-todo",
|
||||
"todo",
|
||||
'{"todos":[{"id":"a","content":"Inspect ACP","status":"completed"},{"id":"b","content":"Patch renderers","status":"in_progress"}],"summary":{"total":2,"pending":0,"in_progress":1,"completed":1,"cancelled":0}}',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "✅ Inspect ACP" in text
|
||||
assert "- 🔄 Patch renderers" in text
|
||||
assert "**Progress:** 1 completed, 1 in progress, 0 pending" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_skill_view_summarizes_content_without_raw_json(self):
|
||||
result = build_tool_complete(
|
||||
"tc-skill",
|
||||
"skill_view",
|
||||
'{"success":true,"name":"github-pitfalls","description":"GitHub gotchas","content":"# GitHub Pitfalls\\nUse gh carefully.","path":"github/github-pitfalls/SKILL.md"}',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "**Skill loaded**" in text
|
||||
assert "`github-pitfalls`" in text
|
||||
assert "GitHub gotchas" in text
|
||||
assert "GitHub Pitfalls" in text
|
||||
assert "Use gh carefully" not in text
|
||||
assert "Full skill content is available to the agent" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_execute_code_formats_output(self):
|
||||
result = build_tool_complete("tc-code", "execute_code", '{"output":"hello\\n","exit_code":0}')
|
||||
text = result.content[0].content.text
|
||||
assert "Exit code: 0" in text
|
||||
assert "hello" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_skill_manage_summarizes_without_raw_json(self):
|
||||
result = build_tool_complete(
|
||||
"tc-skill-manage",
|
||||
"skill_manage",
|
||||
'{"success":true,"message":"Patched references/hermes-acp-zed-rendering.md in skill \'hermes-agent-operations\' (1 replacement)."}',
|
||||
function_args={
|
||||
"action": "patch",
|
||||
"name": "hermes-agent-operations",
|
||||
"file_path": "references/hermes-acp-zed-rendering.md",
|
||||
},
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "**✅ Skill updated**" in text
|
||||
assert "`patch`" in text
|
||||
assert "`hermes-agent-operations`" in text
|
||||
assert "references/hermes-acp-zed-rendering.md" in text
|
||||
assert "{\"success\"" not in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_read_file_formats_content(self):
|
||||
result = build_tool_complete(
|
||||
"tc-read",
|
||||
"read_file",
|
||||
'{"content":"1|hello\\n2|world","total_lines":2}',
|
||||
function_args={"path":"README.md","offset":1,"limit":20},
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Read README.md" in text
|
||||
assert "```\n1|hello\n2|world\n```" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_search_files_formats_matches(self):
|
||||
result = build_tool_complete(
|
||||
"tc-search",
|
||||
"search_files",
|
||||
'{"total_count":2,"matches":[{"path":"README.md","line":3,"content":"TODO: fix this"},{"path":"src/app.py","line":9,"content":"needle"}],"truncated":true}\n\n[Hint: Results truncated. Use offset=12 to see more.]',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Search results" in text
|
||||
assert "Found 2 matches" in text
|
||||
assert "README.md:3" in text
|
||||
assert "TODO: fix this" in text
|
||||
assert "Results truncated" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_process_list_formats_table(self):
|
||||
result = build_tool_complete(
|
||||
"tc-process",
|
||||
"process",
|
||||
'{"processes":[{"session_id":"p1","status":"running","pid":123,"command":"npm run dev"}]}',
|
||||
function_args={"action":"list"},
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Processes: 1" in text
|
||||
assert "`p1`" in text
|
||||
assert "npm run dev" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_delegate_task_summarizes_children(self):
|
||||
result = build_tool_complete(
|
||||
"tc-delegate",
|
||||
"delegate_task",
|
||||
'{"results":[{"task_index":0,"status":"completed","summary":"Reviewed ACP rendering.","model":"gpt-5.5","duration_seconds":3.2,"tool_trace":[{"tool":"read_file"}]}],"total_duration_seconds":3.4}',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Delegation results: 1 task" in text
|
||||
assert "Reviewed ACP rendering" in text
|
||||
assert "gpt-5.5" in text
|
||||
assert "Tools: read_file" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_session_search_recent(self):
|
||||
result = build_tool_complete(
|
||||
"tc-session",
|
||||
"session_search",
|
||||
'{"success":true,"mode":"recent","results":[{"session_id":"s1","title":"ACP work","last_active":"2026-05-02","message_count":12,"preview":"Polished tool rendering."}],"count":1}',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Recent sessions" in text
|
||||
assert "ACP work" in text
|
||||
assert "Polished tool rendering" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_memory_avoids_dumping_entries(self):
|
||||
result = build_tool_complete(
|
||||
"tc-memory",
|
||||
"memory",
|
||||
'{"success":true,"target":"user","entries":["private long memory"],"usage":"1% — 19/2000 chars","entry_count":1,"message":"Entry added."}',
|
||||
function_args={"action":"add","target":"user","content":"User likes concise ACP rendering."},
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Memory add saved" in text
|
||||
assert "User likes concise ACP rendering" in text
|
||||
assert "private long memory" not in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_web_extract_success_stays_compact(self):
|
||||
result = build_tool_complete(
|
||||
"tc-web-extract",
|
||||
"web_extract",
|
||||
'{"results":[{"url":"https://example.com","title":"Example","content":"# Intro\\nThis is extracted content."}]}',
|
||||
)
|
||||
assert result.content is None
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_for_web_extract_error_shows_error(self):
|
||||
result = build_tool_complete(
|
||||
"tc-web-extract-error",
|
||||
"web_extract",
|
||||
'{"results":[{"url":"https://example.com","title":"Example","error":"timeout"}]}',
|
||||
)
|
||||
text = result.content[0].content.text
|
||||
assert "Web extract failed" in text
|
||||
assert "https://example.com" in text
|
||||
assert "timeout" in text
|
||||
assert result.raw_output is None
|
||||
|
||||
def test_build_tool_complete_truncates_large_output(self):
|
||||
"""Very large outputs should be truncated."""
|
||||
|
|
|
|||
|
|
@ -1836,3 +1836,55 @@ class TestResolveMessagesMaxTokens:
|
|||
result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
|
||||
assert result > 0
|
||||
assert result != 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# convert_tools_to_anthropic — tool dedup at API boundary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestConvertToolsToAnthropicDedup:
|
||||
"""convert_tools_to_anthropic must deduplicate tool names.
|
||||
|
||||
Anthropic rejects requests with duplicate tool names. This guard converts
|
||||
a hard failure into a warning log. See:
|
||||
https://github.com/NousResearch/hermes-agent/issues/18478
|
||||
"""
|
||||
|
||||
def _make_openai_tool(self, name: str) -> dict:
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": name,
|
||||
"description": f"Tool {name}",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
|
||||
def test_unique_tools_pass_through(self):
|
||||
tools = [self._make_openai_tool("alpha"), self._make_openai_tool("beta")]
|
||||
result = convert_tools_to_anthropic(tools)
|
||||
assert len(result) == 2
|
||||
names = [t["name"] for t in result]
|
||||
assert names == ["alpha", "beta"]
|
||||
|
||||
def test_duplicate_tool_names_are_deduplicated(self):
|
||||
"""RED test — must fail until dedup guard is added."""
|
||||
tools = [
|
||||
self._make_openai_tool("lcm_grep"),
|
||||
self._make_openai_tool("lcm_describe"),
|
||||
self._make_openai_tool("lcm_grep"), # duplicate
|
||||
self._make_openai_tool("lcm_expand"),
|
||||
self._make_openai_tool("lcm_describe"), # duplicate
|
||||
]
|
||||
result = convert_tools_to_anthropic(tools)
|
||||
names = [t["name"] for t in result]
|
||||
assert len(names) == len(set(names)), (
|
||||
f"Duplicate tool names found: {names}"
|
||||
)
|
||||
assert len(result) == 3 # lcm_grep, lcm_describe, lcm_expand
|
||||
|
||||
def test_empty_tools_returns_empty(self):
|
||||
assert convert_tools_to_anthropic([]) == []
|
||||
|
||||
def test_none_tools_returns_empty(self):
|
||||
assert convert_tools_to_anthropic(None) == []
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ from agent.auxiliary_client import (
|
|||
auxiliary_max_tokens_param,
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_build_call_kwargs,
|
||||
_read_codex_access_token,
|
||||
_get_provider_chain,
|
||||
_is_payment_error,
|
||||
|
|
@ -1752,3 +1753,143 @@ class TestVisionAutoSkipsKimiCoding:
|
|||
"kimi-coding",
|
||||
"kimi-coding-cn",
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _build_call_kwargs — tool dedup at API boundary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildCallKwargsToolDedup:
|
||||
"""_build_call_kwargs must deduplicate tool names before passing to API.
|
||||
|
||||
Providers like Google Vertex, Azure, and Bedrock reject requests with
|
||||
duplicate tool names (HTTP 400). This guard converts a hard failure into
|
||||
a warning log so agent turns succeed even if an upstream injection path
|
||||
regresses. See: https://github.com/NousResearch/hermes-agent/issues/18478
|
||||
"""
|
||||
|
||||
def _make_tool(self, name: str) -> dict:
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": name,
|
||||
"description": f"Tool {name}",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
}
|
||||
|
||||
def test_unique_tools_pass_through_unchanged(self):
|
||||
tools = [self._make_tool("alpha"), self._make_tool("beta")]
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="openai", model="gpt-4o", messages=[], tools=tools,
|
||||
)
|
||||
assert len(kwargs["tools"]) == 2
|
||||
names = [t["function"]["name"] for t in kwargs["tools"]]
|
||||
assert names == ["alpha", "beta"]
|
||||
|
||||
def test_duplicate_tool_names_are_deduplicated(self):
|
||||
"""RED test — must fail until dedup guard is added."""
|
||||
tools = [
|
||||
self._make_tool("lcm_grep"),
|
||||
self._make_tool("lcm_describe"),
|
||||
self._make_tool("lcm_grep"), # duplicate
|
||||
self._make_tool("lcm_expand"),
|
||||
self._make_tool("lcm_describe"), # duplicate
|
||||
]
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="google", model="gemini-2.5-pro", messages=[], tools=tools,
|
||||
)
|
||||
result_tools = kwargs["tools"]
|
||||
names = [t["function"]["name"] for t in result_tools]
|
||||
# Must be deduplicated — no repeated names
|
||||
assert len(names) == len(set(names)), (
|
||||
f"Duplicate tool names found: {names}"
|
||||
)
|
||||
assert len(result_tools) == 3 # lcm_grep, lcm_describe, lcm_expand
|
||||
|
||||
def test_empty_tools_unchanged(self):
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="openai", model="gpt-4o", messages=[], tools=[],
|
||||
)
|
||||
assert kwargs.get("tools") == [] or "tools" not in kwargs
|
||||
|
||||
def test_none_tools_unchanged(self):
|
||||
kwargs = _build_call_kwargs(
|
||||
provider="openai", model="gpt-4o", messages=[], tools=None,
|
||||
)
|
||||
assert "tools" not in kwargs
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_env(monkeypatch):
|
||||
"""Strip provider env vars so each test starts clean."""
|
||||
for key in (
|
||||
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
|
||||
):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
class TestOpenRouterExplicitApiKey:
|
||||
"""Test that explicit_api_key is correctly propagated to _try_openrouter()."""
|
||||
|
||||
def test_resolve_provider_client_passes_explicit_api_key_to_openrouter(
|
||||
self, monkeypatch
|
||||
):
|
||||
"""
|
||||
When resolve_provider_client() is called with explicit_api_key for OpenRouter,
|
||||
the explicit key should be passed to the OpenAI client instead of falling back
|
||||
to OPENROUTER_API_KEY env var.
|
||||
"""
|
||||
# Set up env var as fallback (should NOT be used when explicit_api_key is provided)
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
|
||||
|
||||
# Mock OpenAI to capture the api_key used
|
||||
mock_openai = MagicMock()
|
||||
mock_openai.return_value = MagicMock(name="openrouter-client")
|
||||
|
||||
with patch("agent.auxiliary_client.OpenAI", mock_openai):
|
||||
client, model = resolve_provider_client(
|
||||
provider="openrouter",
|
||||
explicit_api_key="explicit-pool-key",
|
||||
)
|
||||
|
||||
# Verify a client was created
|
||||
assert client is not None
|
||||
# Verify the explicit key was used, not the env var fallback
|
||||
mock_openai.assert_called_once()
|
||||
call_kwargs = mock_openai.call_args[1]
|
||||
assert call_kwargs["api_key"] == "explicit-pool-key", (
|
||||
f"Expected explicit_api_key to be passed, got: {call_kwargs['api_key']}"
|
||||
)
|
||||
assert call_kwargs["api_key"] != "env-fallback-key", (
|
||||
"Should NOT fall back to OPENROUTER_API_KEY when explicit_api_key is provided"
|
||||
)
|
||||
|
||||
def test_resolve_provider_client_without_explicit_api_key_falls_back_to_env(
|
||||
self, monkeypatch
|
||||
):
|
||||
"""
|
||||
When resolve_provider_client() is called WITHOUT explicit_api_key for OpenRouter,
|
||||
it should fall back to OPENROUTER_API_KEY env var.
|
||||
"""
|
||||
# Set up env var as fallback (should be used when explicit_api_key is NOT provided)
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "env-fallback-key")
|
||||
|
||||
# Mock OpenAI to capture the api_key used
|
||||
mock_openai = MagicMock()
|
||||
mock_openai.return_value = MagicMock(name="openrouter-client")
|
||||
|
||||
with patch("agent.auxiliary_client.OpenAI", mock_openai):
|
||||
client, model = resolve_provider_client(
|
||||
provider="openrouter",
|
||||
explicit_api_key=None,
|
||||
)
|
||||
|
||||
# Verify a client was created
|
||||
assert client is not None
|
||||
# Verify the env var fallback was used
|
||||
mock_openai.assert_called_once()
|
||||
call_kwargs = mock_openai.call_args[1]
|
||||
assert call_kwargs["api_key"] == "env-fallback-key", (
|
||||
f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -348,6 +348,64 @@ def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
|
|||
assert entry.access_token == "sk-or-seeded"
|
||||
|
||||
|
||||
|
||||
def test_load_pool_prefers_dotenv_over_stale_os_environ(tmp_path, monkeypatch):
|
||||
"""Regression for #18254: stale OPENROUTER_API_KEY in os.environ (inherited
|
||||
from a parent shell) must NOT shadow the fresh key in ~/.hermes/.env when
|
||||
seeding the credential pool. Before the fix, `get_env_value()` preferred
|
||||
os.environ and silently wrote the stale value into auth.json, causing
|
||||
persistent 401 errors after key rotation.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
# Simulate the bug: parent shell exported a stale test key
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-STALE-from-shell")
|
||||
|
||||
# User edited ~/.hermes/.env with the fresh key
|
||||
(hermes_home / ".env").write_text(
|
||||
"OPENROUTER_API_KEY=sk-or-FRESH-from-dotenv\n"
|
||||
)
|
||||
|
||||
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("openrouter")
|
||||
entry = pool.select()
|
||||
|
||||
assert entry is not None
|
||||
assert entry.source == "env:OPENROUTER_API_KEY"
|
||||
# The fresh key from .env must win over the stale shell export
|
||||
assert entry.access_token == "sk-or-FRESH-from-dotenv", (
|
||||
f"Expected .env to win, got {entry.access_token!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_load_pool_falls_back_to_os_environ_when_dotenv_empty(tmp_path, monkeypatch):
|
||||
"""When ~/.hermes/.env does not define OPENROUTER_API_KEY (typical Docker /
|
||||
K8s / systemd deployment), seeding must still pick up the key from
|
||||
os.environ. Guards against regressions that would break production
|
||||
deployments relying on runtime-injected env vars.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-from-runtime-env")
|
||||
|
||||
# .env exists but does not define OPENROUTER_API_KEY
|
||||
(hermes_home / ".env").write_text("SOME_OTHER_VAR=unrelated\n")
|
||||
|
||||
_write_auth_store(tmp_path, {"version": 1, "providers": {}})
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("openrouter")
|
||||
entry = pool.select()
|
||||
|
||||
assert entry is not None
|
||||
assert entry.access_token == "sk-or-from-runtime-env"
|
||||
|
||||
|
||||
def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
|
|
|||
|
|
@ -314,3 +314,281 @@ def test_dry_run_skips_snapshot(backup_env, monkeypatch):
|
|||
assert not any(r.get("reason") == "pre-curator-run" for r in rows), (
|
||||
"dry-run must not create a pre-run snapshot"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cron-jobs backup + rollback (the part issue #18671's follow-up adds)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_cron_jobs(home: Path, jobs: list) -> Path:
|
||||
"""Write a synthetic cron/jobs.json under HERMES_HOME. Returns the path.
|
||||
Mirrors cron.jobs.save_jobs() wrapper shape: `{"jobs": [...], "updated_at": ...}`.
|
||||
"""
|
||||
cron_dir = home / "cron"
|
||||
cron_dir.mkdir(parents=True, exist_ok=True)
|
||||
path = cron_dir / "jobs.json"
|
||||
path.write_text(
|
||||
json.dumps({"jobs": jobs, "updated_at": "2026-05-01T00:00:00Z"}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
def _reload_cron_jobs(home: Path):
|
||||
"""Reload cron.jobs so its module-level HERMES_DIR picks up the tmp HOME."""
|
||||
import hermes_constants
|
||||
importlib.reload(hermes_constants)
|
||||
if "cron.jobs" in sys.modules:
|
||||
import cron.jobs as _cj
|
||||
importlib.reload(_cj)
|
||||
else:
|
||||
import cron.jobs as _cj # noqa: F401
|
||||
import cron.jobs as cj
|
||||
return cj
|
||||
|
||||
|
||||
def test_snapshot_includes_cron_jobs(backup_env):
|
||||
"""With a cron/jobs.json present, snapshot writes cron-jobs.json and records it in manifest."""
|
||||
cb = backup_env["cb"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
_write_cron_jobs(backup_env["home"], [
|
||||
{"id": "job-a", "name": "a", "schedule": "every 1h", "skills": ["alpha"]},
|
||||
{"id": "job-b", "name": "b", "schedule": "every 2h", "skill": "alpha"},
|
||||
])
|
||||
|
||||
snap = cb.snapshot_skills(reason="test")
|
||||
assert snap is not None
|
||||
assert (snap / cb.CRON_JOBS_FILENAME).exists()
|
||||
|
||||
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
|
||||
assert mf["cron_jobs"]["backed_up"] is True
|
||||
assert mf["cron_jobs"]["jobs_count"] == 2
|
||||
|
||||
|
||||
def test_snapshot_without_cron_jobs_file_still_succeeds(backup_env):
|
||||
"""No cron/jobs.json on disk → snapshot succeeds, manifest records absence."""
|
||||
cb = backup_env["cb"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
# Deliberately do not create ~/.hermes/cron/jobs.json
|
||||
|
||||
snap = cb.snapshot_skills(reason="test")
|
||||
assert snap is not None
|
||||
assert not (snap / cb.CRON_JOBS_FILENAME).exists()
|
||||
|
||||
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
|
||||
assert mf["cron_jobs"]["backed_up"] is False
|
||||
assert "cron/jobs.json" in mf["cron_jobs"]["reason"]
|
||||
|
||||
|
||||
def test_snapshot_cron_jobs_malformed_json_still_captured(backup_env):
|
||||
"""Malformed jobs.json is still copied to the snapshot (fidelity over
|
||||
validation); the manifest notes the parse warning."""
|
||||
cb = backup_env["cb"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
(backup_env["home"] / "cron").mkdir()
|
||||
(backup_env["home"] / "cron" / "jobs.json").write_text("{oh no", encoding="utf-8")
|
||||
|
||||
snap = cb.snapshot_skills(reason="test")
|
||||
assert snap is not None
|
||||
# Raw file was copied even though we couldn't parse it
|
||||
assert (snap / cb.CRON_JOBS_FILENAME).read_text() == "{oh no"
|
||||
|
||||
mf = json.loads((snap / "manifest.json").read_text(encoding="utf-8"))
|
||||
assert mf["cron_jobs"]["backed_up"] is True
|
||||
assert mf["cron_jobs"]["jobs_count"] == 0
|
||||
assert "parse_warning" in mf["cron_jobs"]
|
||||
|
||||
|
||||
def test_rollback_restores_cron_skill_links(backup_env):
|
||||
"""End-to-end: snapshot with job [alpha,beta], curator-style in-place
|
||||
rewrite to [umbrella], then rollback → skills restored to [alpha,beta]."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
_write_skill(backup_env["skills"], "beta")
|
||||
_write_skill(backup_env["skills"], "umbrella")
|
||||
|
||||
cj = _reload_cron_jobs(home)
|
||||
cj.create_job(name="weekly", prompt="p", schedule="every 7d",
|
||||
skills=["alpha", "beta"])
|
||||
|
||||
snap = cb.snapshot_skills(reason="pre-curator-run")
|
||||
assert snap is not None
|
||||
|
||||
# Simulate the curator's in-place cron rewrite after consolidation
|
||||
cj.rewrite_skill_refs(
|
||||
consolidated={"alpha": "umbrella", "beta": "umbrella"},
|
||||
pruned=[],
|
||||
)
|
||||
live_after_curator = cj.load_jobs()
|
||||
assert live_after_curator[0]["skills"] == ["umbrella"]
|
||||
|
||||
# Now roll back
|
||||
ok, msg, _ = cb.rollback(backup_id=snap.name)
|
||||
assert ok, msg
|
||||
assert "cron links" in msg
|
||||
|
||||
live_after_rollback = cj.load_jobs()
|
||||
# skills restored; legacy `skill` mirror follows first element
|
||||
assert live_after_rollback[0]["skills"] == ["alpha", "beta"]
|
||||
|
||||
|
||||
def test_rollback_only_touches_skill_fields(backup_env):
|
||||
"""Every field other than skills/skill must remain untouched across rollback.
|
||||
Schedule, enabled, prompt, timestamps — all live state, hands off."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
|
||||
# Hand-rolled jobs.json with varied fields (no real create_job — we want
|
||||
# exact field control).
|
||||
_write_cron_jobs(home, [{
|
||||
"id": "stable-id",
|
||||
"name": "original-name",
|
||||
"prompt": "original prompt",
|
||||
"schedule": "every 1h",
|
||||
"skills": ["alpha"],
|
||||
"enabled": True,
|
||||
"last_run_at": "2026-04-01T00:00:00Z",
|
||||
}])
|
||||
snap = cb.snapshot_skills(reason="pre-curator-run")
|
||||
assert snap is not None
|
||||
|
||||
# User/scheduler activity AFTER the snapshot: rename the job, change
|
||||
# the schedule, update timestamps, and (curator) rewrite the skills list.
|
||||
cj = _reload_cron_jobs(home)
|
||||
jobs = cj.load_jobs()
|
||||
jobs[0]["name"] = "renamed-since-snapshot"
|
||||
jobs[0]["schedule"] = "every 30m"
|
||||
jobs[0]["last_run_at"] = "2026-05-01T12:00:00Z"
|
||||
jobs[0]["skills"] = ["umbrella"] # pretend curator did this
|
||||
cj.save_jobs(jobs)
|
||||
|
||||
ok, _, _ = cb.rollback(backup_id=snap.name)
|
||||
assert ok
|
||||
|
||||
after = cj.load_jobs()
|
||||
job = after[0]
|
||||
# skills: restored
|
||||
assert job["skills"] == ["alpha"]
|
||||
# everything else: untouched (live state preserved)
|
||||
assert job["name"] == "renamed-since-snapshot"
|
||||
assert job["schedule"] == "every 30m"
|
||||
assert job["last_run_at"] == "2026-05-01T12:00:00Z"
|
||||
assert job["prompt"] == "original prompt"
|
||||
|
||||
|
||||
def test_rollback_skips_jobs_the_user_deleted(backup_env):
|
||||
"""If the user deleted a cron job after the snapshot, rollback must
|
||||
NOT resurrect it — the user's delete is a later, explicit choice."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
|
||||
_write_cron_jobs(home, [
|
||||
{"id": "keep-me", "name": "keep", "schedule": "every 1h", "skills": ["alpha"]},
|
||||
{"id": "delete-me", "name": "gone", "schedule": "every 1h", "skills": ["alpha"]},
|
||||
])
|
||||
snap = cb.snapshot_skills(reason="pre-curator-run")
|
||||
|
||||
# User deletes one job after the snapshot
|
||||
cj = _reload_cron_jobs(home)
|
||||
cj.save_jobs([j for j in cj.load_jobs() if j["id"] != "delete-me"])
|
||||
|
||||
ok, _, _ = cb.rollback(backup_id=snap.name)
|
||||
assert ok
|
||||
|
||||
live_after = cj.load_jobs()
|
||||
live_ids = {j["id"] for j in live_after}
|
||||
assert "keep-me" in live_ids
|
||||
assert "delete-me" not in live_ids # not resurrected
|
||||
|
||||
|
||||
def test_rollback_leaves_new_jobs_untouched(backup_env):
|
||||
"""Jobs created AFTER the snapshot must pass through rollback unchanged."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
_write_cron_jobs(home, [
|
||||
{"id": "original", "name": "o", "schedule": "every 1h", "skills": ["alpha"]},
|
||||
])
|
||||
snap = cb.snapshot_skills(reason="pre-curator-run")
|
||||
|
||||
cj = _reload_cron_jobs(home)
|
||||
jobs = cj.load_jobs()
|
||||
jobs.append({"id": "new-after-snapshot", "name": "new",
|
||||
"schedule": "every 15m", "skills": ["brand-new-skill"]})
|
||||
cj.save_jobs(jobs)
|
||||
|
||||
ok, _, _ = cb.rollback(backup_id=snap.name)
|
||||
assert ok
|
||||
|
||||
live = cj.load_jobs()
|
||||
by_id = {j["id"]: j for j in live}
|
||||
assert "new-after-snapshot" in by_id
|
||||
# New job's fields completely preserved
|
||||
assert by_id["new-after-snapshot"]["skills"] == ["brand-new-skill"]
|
||||
assert by_id["new-after-snapshot"]["schedule"] == "every 15m"
|
||||
|
||||
|
||||
def test_rollback_with_snapshot_missing_cron_succeeds(backup_env):
|
||||
"""Older snapshots (created before this feature shipped) have no
|
||||
cron-jobs.json. Rollback must still restore the skills tree and not
|
||||
error out."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
_write_skill(backup_env["skills"], "alpha")
|
||||
|
||||
# No cron/jobs.json at snapshot time — simulates a pre-feature snapshot
|
||||
snap = cb.snapshot_skills(reason="test")
|
||||
assert snap is not None
|
||||
assert not (snap / cb.CRON_JOBS_FILENAME).exists()
|
||||
|
||||
# Later the user created a cron job
|
||||
_write_cron_jobs(home, [
|
||||
{"id": "later-job", "name": "l", "schedule": "every 1h", "skills": ["x"]},
|
||||
])
|
||||
|
||||
ok, msg, _ = cb.rollback(backup_id=snap.name)
|
||||
# Main rollback still succeeds; cron report notes the missing file.
|
||||
assert ok, msg
|
||||
# Jobs.json untouched (nothing to restore from)
|
||||
cj = _reload_cron_jobs(home)
|
||||
jobs = cj.load_jobs()
|
||||
assert jobs[0]["id"] == "later-job"
|
||||
assert jobs[0]["skills"] == ["x"]
|
||||
|
||||
|
||||
def test_restore_cron_skill_links_standalone(backup_env):
|
||||
"""Unit-level test on _restore_cron_skill_links without the full rollback.
|
||||
Verifies the report structure carefully."""
|
||||
cb = backup_env["cb"]
|
||||
home = backup_env["home"]
|
||||
|
||||
# Prime a snapshot dir manually with cron-jobs.json
|
||||
backups_dir = home / "skills" / ".curator_backups" / "fake-id"
|
||||
backups_dir.mkdir(parents=True)
|
||||
(backups_dir / cb.CRON_JOBS_FILENAME).write_text(json.dumps([
|
||||
{"id": "job-1", "name": "one", "skills": ["narrow-a", "narrow-b"]},
|
||||
{"id": "job-2", "name": "two", "skill": "legacy-single"},
|
||||
{"id": "job-gone", "name": "deleted", "skills": ["whatever"]},
|
||||
]), encoding="utf-8")
|
||||
|
||||
# Live jobs: job-1 got rewritten, job-2 unchanged, job-gone deleted
|
||||
_write_cron_jobs(home, [
|
||||
{"id": "job-1", "name": "one", "skills": ["umbrella"], "schedule": "every 1h"},
|
||||
{"id": "job-2", "name": "two", "skill": "legacy-single", "schedule": "every 1h"},
|
||||
{"id": "job-new", "name": "new", "skills": ["x"], "schedule": "every 1h"},
|
||||
])
|
||||
_reload_cron_jobs(home)
|
||||
|
||||
report = cb._restore_cron_skill_links(backups_dir)
|
||||
assert report["attempted"] is True
|
||||
assert report["error"] is None
|
||||
assert report["unchanged"] == 1 # job-2 matched
|
||||
assert len(report["restored"]) == 1 # job-1 got restored
|
||||
assert report["restored"][0]["job_id"] == "job-1"
|
||||
assert report["restored"][0]["to"]["skills"] == ["narrow-a", "narrow-b"]
|
||||
assert len(report["skipped_missing"]) == 1
|
||||
assert report["skipped_missing"][0]["job_id"] == "job-gone"
|
||||
|
|
|
|||
|
|
@ -548,3 +548,266 @@ def test_reconcile_model_block_visible_in_full_report(curator_env):
|
|||
md = (run_dir / "REPORT.md").read_text()
|
||||
assert "duplicate content, now a subsection" in md
|
||||
assert "pre-curator junk" in md
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_absorbed_into_declarations — authoritative signal from delete calls
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_extract_absorbed_into_picks_up_consolidation(curator_env):
|
||||
"""Delete call with absorbed_into=<umbrella> yields a declaration."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": json.dumps({
|
||||
"action": "delete",
|
||||
"name": "narrow-skill",
|
||||
"absorbed_into": "umbrella",
|
||||
}),
|
||||
},
|
||||
])
|
||||
assert declarations == {
|
||||
"narrow-skill": {"into": "umbrella", "declared": True},
|
||||
}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env):
|
||||
"""absorbed_into='' is recorded as an explicit prune declaration."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": json.dumps({
|
||||
"action": "delete",
|
||||
"name": "stale",
|
||||
"absorbed_into": "",
|
||||
}),
|
||||
},
|
||||
])
|
||||
assert declarations == {"stale": {"into": "", "declared": True}}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_missing_arg_ignored(curator_env):
|
||||
"""Delete call without absorbed_into is skipped — fallback to heuristic."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": json.dumps({
|
||||
"action": "delete",
|
||||
"name": "legacy-skill",
|
||||
}),
|
||||
},
|
||||
])
|
||||
assert declarations == {}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_ignores_non_delete_actions(curator_env):
|
||||
"""Patch, create, write_file etc. must not leak into declarations."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": json.dumps({
|
||||
"action": "patch",
|
||||
"name": "umbrella",
|
||||
"old_string": "...",
|
||||
"new_string": "...",
|
||||
"absorbed_into": "something", # bogus on non-delete, must be ignored
|
||||
}),
|
||||
},
|
||||
])
|
||||
assert declarations == {}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_accepts_dict_arguments(curator_env):
|
||||
"""arguments can arrive as a dict (defensive path) — still works."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": {
|
||||
"action": "delete",
|
||||
"name": "narrow",
|
||||
"absorbed_into": "umbrella",
|
||||
},
|
||||
},
|
||||
])
|
||||
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_strips_whitespace(curator_env):
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{
|
||||
"name": "skill_manage",
|
||||
"arguments": json.dumps({
|
||||
"action": "delete",
|
||||
"name": " narrow ",
|
||||
"absorbed_into": " umbrella ",
|
||||
}),
|
||||
},
|
||||
])
|
||||
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env):
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{"name": "terminal", "arguments": json.dumps({"command": "ls"})},
|
||||
{"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})},
|
||||
])
|
||||
assert declarations == {}
|
||||
|
||||
|
||||
def test_extract_absorbed_into_handles_malformed_arguments(curator_env):
|
||||
"""Garbage JSON in arguments must not crash the extractor."""
|
||||
declarations = curator_env._extract_absorbed_into_declarations([
|
||||
{"name": "skill_manage", "arguments": "{not json"},
|
||||
{"name": "skill_manage", "arguments": None},
|
||||
{"name": "skill_manage"}, # no arguments key at all
|
||||
])
|
||||
assert declarations == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _reconcile_classification with absorbed_into declarations (authoritative)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_reconcile_absorbed_into_beats_everything_else(curator_env):
|
||||
"""Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins.
|
||||
|
||||
This is the exact #18671 regression: the model forgets to emit the YAML
|
||||
summary block, the heuristic's substring match misses because the
|
||||
umbrella's patch content doesn't literally contain the old skill's
|
||||
slug. Previously this fell through to 'no-evidence fallback' prune,
|
||||
which dropped the cron ref instead of rewriting. With absorbed_into
|
||||
declared, the model tells us directly.
|
||||
"""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["pr-review-format"],
|
||||
heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]},
|
||||
model_block={"consolidations": [], "prunings": []}, # model forgot YAML block
|
||||
destinations={"hermes-agent-dev"},
|
||||
absorbed_declarations={
|
||||
"pr-review-format": {"into": "hermes-agent-dev", "declared": True},
|
||||
},
|
||||
)
|
||||
assert len(out["consolidated"]) == 1
|
||||
assert out["pruned"] == []
|
||||
e = out["consolidated"][0]
|
||||
assert e["name"] == "pr-review-format"
|
||||
assert e["into"] == "hermes-agent-dev"
|
||||
assert "absorbed_into" in e["source"]
|
||||
|
||||
|
||||
def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env):
|
||||
"""absorbed_into='' takes precedence and routes to pruned, not fallback."""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["stale"],
|
||||
heuristic={"consolidated": [], "pruned": [{"name": "stale"}]},
|
||||
model_block={"consolidations": [], "prunings": []},
|
||||
destinations=set(),
|
||||
absorbed_declarations={
|
||||
"stale": {"into": "", "declared": True},
|
||||
},
|
||||
)
|
||||
assert out["consolidated"] == []
|
||||
assert len(out["pruned"]) == 1
|
||||
assert "model-declared prune" in out["pruned"][0]["source"]
|
||||
|
||||
|
||||
def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env):
|
||||
"""If the declared umbrella doesn't exist in destinations, fall through to
|
||||
heuristic/YAML logic. Shouldn't happen in practice (the tool validates at
|
||||
delete time) but the reconciler is defensive."""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["thing"],
|
||||
heuristic={
|
||||
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
|
||||
"pruned": [],
|
||||
},
|
||||
model_block={"consolidations": [], "prunings": []},
|
||||
destinations={"real-umbrella"},
|
||||
absorbed_declarations={
|
||||
"thing": {"into": "ghost-umbrella", "declared": True},
|
||||
},
|
||||
)
|
||||
assert len(out["consolidated"]) == 1
|
||||
assert out["consolidated"][0]["into"] == "real-umbrella"
|
||||
assert "tool-call audit" in out["consolidated"][0]["source"]
|
||||
|
||||
|
||||
def test_reconcile_declaration_preserves_yaml_reason(curator_env):
|
||||
"""When the model both declared absorbed_into AND emitted YAML with reason,
|
||||
the reason carries through so REPORT.md still has it."""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["narrow"],
|
||||
heuristic={"consolidated": [], "pruned": []},
|
||||
model_block={
|
||||
"consolidations": [{
|
||||
"from": "narrow",
|
||||
"into": "umbrella",
|
||||
"reason": "duplicate of umbrella's main content",
|
||||
}],
|
||||
"prunings": [],
|
||||
},
|
||||
destinations={"umbrella"},
|
||||
absorbed_declarations={
|
||||
"narrow": {"into": "umbrella", "declared": True},
|
||||
},
|
||||
)
|
||||
assert len(out["consolidated"]) == 1
|
||||
e = out["consolidated"][0]
|
||||
assert e["into"] == "umbrella"
|
||||
assert "absorbed_into" in e["source"]
|
||||
assert e["reason"] == "duplicate of umbrella's main content"
|
||||
|
||||
|
||||
def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env):
|
||||
"""Backward compat: no absorbed_declarations arg → all existing logic intact."""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["thing"],
|
||||
heuristic={
|
||||
"consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}],
|
||||
"pruned": [],
|
||||
},
|
||||
model_block={"consolidations": [], "prunings": []},
|
||||
destinations={"umbrella"},
|
||||
# no absorbed_declarations — defaults to None → behaves identically to pre-change
|
||||
)
|
||||
assert len(out["consolidated"]) == 1
|
||||
assert out["consolidated"][0]["into"] == "umbrella"
|
||||
|
||||
|
||||
def test_reconcile_mixed_declarations_and_legacy_calls(curator_env):
|
||||
"""Real-world run: some deletes declared absorbed_into, some didn't.
|
||||
Declared ones use the authoritative path; others fall through to YAML/heuristic.
|
||||
"""
|
||||
out = curator_env._reconcile_classification(
|
||||
removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"],
|
||||
heuristic={
|
||||
"consolidated": [
|
||||
{"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."},
|
||||
],
|
||||
"pruned": [{"name": "legacy-prune"}],
|
||||
},
|
||||
model_block={"consolidations": [], "prunings": []},
|
||||
destinations={"umbrella-a", "umbrella-b"},
|
||||
absorbed_declarations={
|
||||
"declared-cons": {"into": "umbrella-b", "declared": True},
|
||||
"declared-prune": {"into": "", "declared": True},
|
||||
},
|
||||
)
|
||||
cons_by_name = {e["name"]: e for e in out["consolidated"]}
|
||||
pruned_by_name = {e["name"]: e for e in out["pruned"]}
|
||||
|
||||
assert "declared-cons" in cons_by_name
|
||||
assert cons_by_name["declared-cons"]["into"] == "umbrella-b"
|
||||
assert "absorbed_into" in cons_by_name["declared-cons"]["source"]
|
||||
|
||||
assert "legacy-cons" in cons_by_name
|
||||
assert cons_by_name["legacy-cons"]["into"] == "umbrella-a"
|
||||
assert "tool-call audit" in cons_by_name["legacy-cons"]["source"]
|
||||
|
||||
assert "declared-prune" in pruned_by_name
|
||||
assert "model-declared prune" in pruned_by_name["declared-prune"]["source"]
|
||||
|
||||
assert "legacy-prune" in pruned_by_name
|
||||
assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"]
|
||||
|
|
|
|||
284
tests/agent/test_openrouter_response_cache.py
Normal file
284
tests/agent/test_openrouter_response_cache.py
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
"""Tests for OpenRouter response caching header injection."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_or_headers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildOrHeaders:
|
||||
"""Test the build_or_headers() helper in agent/auxiliary_client.py."""
|
||||
|
||||
def test_base_attribution_always_present(self):
|
||||
"""Attribution headers must always be included regardless of cache setting."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert headers["X-OpenRouter-Title"] == "Hermes Agent"
|
||||
assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent"
|
||||
|
||||
def test_cache_enabled(self):
|
||||
"""When response_cache is True, X-OpenRouter-Cache header is set."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_cache_disabled(self):
|
||||
"""When response_cache is False, no cache header is sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_cache_disabled_by_default_empty_config(self):
|
||||
"""Empty config dict means no cache headers (response_cache defaults to False)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
def test_ttl_default(self):
|
||||
"""Default TTL (300) is included when cache is enabled."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "300"
|
||||
|
||||
def test_ttl_custom(self):
|
||||
"""Custom TTL values within range are sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "3600"
|
||||
|
||||
def test_ttl_max(self):
|
||||
"""Maximum TTL (86400) is accepted."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "86400"
|
||||
|
||||
def test_ttl_out_of_range_too_high(self):
|
||||
"""TTL above 86400 is silently ignored (no TTL header sent)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
# But cache is still enabled
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_ttl_out_of_range_zero(self):
|
||||
"""TTL of 0 is below minimum — no TTL header sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_negative(self):
|
||||
"""Negative TTL is ignored."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_not_a_number(self):
|
||||
"""Non-numeric TTL is ignored."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_float_truncated(self):
|
||||
"""Float TTL values are truncated to int."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
def test_returns_fresh_dict(self):
|
||||
"""Each call returns a new dict so mutations don't leak."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
cfg = {"response_cache": True}
|
||||
h1 = build_or_headers(or_config=cfg)
|
||||
h2 = build_or_headers(or_config=cfg)
|
||||
assert h1 is not h2
|
||||
assert h1 == h2
|
||||
|
||||
def test_none_config_falls_back_to_load_config(self):
|
||||
"""When or_config is None, build_or_headers reads from load_config()."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
fake_cfg = {
|
||||
"openrouter": {"response_cache": True, "response_cache_ttl": 900},
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=fake_cfg):
|
||||
headers = build_or_headers(or_config=None)
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "900"
|
||||
|
||||
def test_none_config_load_config_fails_gracefully(self):
|
||||
"""When load_config() fails, build_or_headers still returns base headers."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
|
||||
headers = build_or_headers(or_config=None)
|
||||
# Should have base attribution but no cache headers
|
||||
assert "HTTP-Referer" in headers
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment variable overrides
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEnvVarOverrides:
|
||||
"""Test env var precedence over config.yaml for response caching."""
|
||||
|
||||
def test_env_enables_cache(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE=true enables cache even when config disables it."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_env_disables_cache(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE=false disables cache even when config enables it."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false")
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
@pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"])
|
||||
def test_truthy_values(self, monkeypatch, value):
|
||||
"""Various truthy strings enable caching."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
|
||||
headers = build_or_headers(or_config={})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
@pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""])
|
||||
def test_non_truthy_values(self, monkeypatch, value):
|
||||
"""Non-truthy strings do not enable caching (empty falls through to config)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
|
||||
# Empty string falls through to config; others are explicitly non-truthy
|
||||
if value == "":
|
||||
# Empty env var falls through to config default (False)
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
else:
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
def test_env_ttl_overrides_config(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE_TTL overrides config TTL."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800")
|
||||
headers = build_or_headers(or_config={"response_cache_ttl": 300})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "1800"
|
||||
|
||||
@pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"])
|
||||
def test_invalid_env_ttl_dropped(self, monkeypatch, ttl):
|
||||
"""Invalid TTL env values are ignored; cache still enabled without TTL."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
|
||||
headers = build_or_headers(or_config={})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
@pytest.mark.parametrize("ttl", ["1", "300", "86400"])
|
||||
def test_valid_env_ttl_boundaries(self, monkeypatch, ttl):
|
||||
"""Boundary TTL values (1, 300, 86400) are accepted."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
|
||||
assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl
|
||||
|
||||
def test_no_env_vars_falls_through_to_config(self, monkeypatch):
|
||||
"""Without env vars, config.yaml controls behavior."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False)
|
||||
monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False)
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
class TestDefaultConfig:
|
||||
"""Verify the openrouter config section is in DEFAULT_CONFIG."""
|
||||
|
||||
def test_openrouter_section_exists(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
assert "openrouter" in DEFAULT_CONFIG
|
||||
or_cfg = DEFAULT_CONFIG["openrouter"]
|
||||
assert or_cfg["response_cache"] is True
|
||||
assert or_cfg["response_cache_ttl"] == 300
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _check_openrouter_cache_status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCheckOpenrouterCacheStatus:
|
||||
"""Test the _check_openrouter_cache_status method on AIAgent."""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Create a minimal AIAgent-like object with just the method under test."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
# Use object.__new__ to skip __init__, then set the attributes we need
|
||||
agent = object.__new__(AIAgent)
|
||||
agent._or_cache_hits = 0
|
||||
return agent
|
||||
|
||||
def test_hit_increments_counter(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 1
|
||||
# Second hit increments
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 2
|
||||
|
||||
def test_miss_does_not_increment(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert getattr(agent, "_or_cache_hits", 0) == 0
|
||||
|
||||
def test_no_header_is_noop(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert getattr(agent, "_or_cache_hits", 0) == 0
|
||||
|
||||
def test_none_response_is_safe(self):
|
||||
agent = self._make_agent()
|
||||
agent._check_openrouter_cache_status(None) # no crash
|
||||
|
||||
def test_no_headers_attr_is_safe(self):
|
||||
agent = self._make_agent()
|
||||
agent._check_openrouter_cache_status(object()) # no crash
|
||||
|
||||
def test_case_insensitive(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 1
|
||||
|
|
@ -125,6 +125,58 @@ class TestScanSkillCommands:
|
|||
assert "/knowledge-brain" in result
|
||||
assert result["/knowledge-brain"]["name"] == "knowledge-brain"
|
||||
|
||||
def test_get_skill_commands_rescans_when_platform_scope_changes(self, tmp_path):
|
||||
"""Platform-specific disabled-skill caches must not leak across platforms.
|
||||
|
||||
Regression test for #14536: a gateway process serving Telegram
|
||||
and Discord concurrently would seed the process-global cache
|
||||
with whichever platform scanned first, and subsequent
|
||||
``get_skill_commands()`` calls from the other platform silently
|
||||
inherited that filter.
|
||||
"""
|
||||
import agent.skill_commands as sc_mod
|
||||
from agent.skill_commands import get_skill_commands
|
||||
|
||||
def _disabled_skills():
|
||||
platform = os.getenv("HERMES_PLATFORM")
|
||||
if platform == "telegram":
|
||||
return {"telegram-only"}
|
||||
if platform == "discord":
|
||||
return {"discord-only"}
|
||||
return set()
|
||||
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch("tools.skills_tool._get_disabled_skill_names", side_effect=_disabled_skills),
|
||||
patch.object(sc_mod, "_skill_commands", {}),
|
||||
patch.object(sc_mod, "_skill_commands_platform", None),
|
||||
):
|
||||
_make_skill(tmp_path, "shared")
|
||||
_make_skill(tmp_path, "telegram-only")
|
||||
_make_skill(tmp_path, "discord-only")
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
|
||||
telegram_commands = dict(get_skill_commands())
|
||||
|
||||
assert "/shared" in telegram_commands
|
||||
assert "/discord-only" in telegram_commands
|
||||
assert "/telegram-only" not in telegram_commands
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_PLATFORM": "discord"}):
|
||||
discord_commands = dict(get_skill_commands())
|
||||
|
||||
assert "/shared" in discord_commands
|
||||
assert "/telegram-only" in discord_commands
|
||||
assert "/discord-only" not in discord_commands
|
||||
|
||||
# Switching back to telegram must also rescan — not re-serve
|
||||
# the discord view that was just cached.
|
||||
with patch.dict(os.environ, {"HERMES_PLATFORM": "telegram"}):
|
||||
telegram_again = dict(get_skill_commands())
|
||||
|
||||
assert "/telegram-only" not in telegram_again
|
||||
assert "/discord-only" in telegram_again
|
||||
|
||||
|
||||
def test_special_chars_stripped_from_cmd_key(self, tmp_path):
|
||||
"""Skill names with +, /, or other special chars produce clean cmd keys."""
|
||||
|
|
|
|||
|
|
@ -46,6 +46,29 @@ class TestResolveOrigin:
|
|||
job = {"origin": {}}
|
||||
assert _resolve_origin(job) is None
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_dict_origin",
|
||||
[
|
||||
"combined-digest-replaces-x-and-y-20260503",
|
||||
123,
|
||||
["telegram", "12345"],
|
||||
("platform", "chat_id"),
|
||||
42.0,
|
||||
],
|
||||
)
|
||||
def test_non_dict_origin_returns_none_instead_of_crashing(self, non_dict_origin):
|
||||
"""Non-dict origins (provenance strings from hand-edited or migrated
|
||||
jobs.json) must be treated as missing instead of crashing the
|
||||
scheduler tick on ``origin.get('platform')`` with
|
||||
``'str' object has no attribute 'get'`` (#18722).
|
||||
|
||||
Before this guard a job in this state crashed every fire attempt
|
||||
forever; ``mark_job_run`` recorded the error but the next tick
|
||||
re-loaded the poisoned origin and crashed identically.
|
||||
"""
|
||||
job = {"origin": non_dict_origin}
|
||||
assert _resolve_origin(job) is None
|
||||
|
||||
|
||||
class TestResolveDeliveryTarget:
|
||||
def test_origin_delivery_preserves_thread_id(self):
|
||||
|
|
@ -118,6 +141,16 @@ class TestResolveDeliveryTarget:
|
|||
"thread_id": None,
|
||||
}
|
||||
|
||||
def test_bare_platform_delivery_preserves_home_thread_id(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_HOME_CHANNEL", "parent-42")
|
||||
monkeypatch.setenv("DISCORD_HOME_CHANNEL_THREAD_ID", "topic-7")
|
||||
|
||||
assert _resolve_delivery_target({"deliver": "discord"}) == {
|
||||
"platform": "discord",
|
||||
"chat_id": "parent-42",
|
||||
"thread_id": "topic-7",
|
||||
}
|
||||
|
||||
def test_explicit_telegram_topic_target_with_thread_id(self):
|
||||
"""deliver: 'telegram:chat_id:thread_id' parses correctly."""
|
||||
job = {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ class RestartTestAdapter(BasePlatformAdapter):
|
|||
def __init__(self):
|
||||
super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
|
||||
self.sent: list[str] = []
|
||||
self.sent_calls: list[tuple[str, str, object]] = []
|
||||
|
||||
async def connect(self):
|
||||
return True
|
||||
|
|
@ -21,6 +22,7 @@ class RestartTestAdapter(BasePlatformAdapter):
|
|||
|
||||
async def send(self, chat_id, content, reply_to=None, metadata=None):
|
||||
self.sent.append(content)
|
||||
self.sent_calls.append((chat_id, content, metadata))
|
||||
return SendResult(success=True, message_id="1")
|
||||
|
||||
async def send_typing(self, chat_id, metadata=None):
|
||||
|
|
@ -30,12 +32,17 @@ class RestartTestAdapter(BasePlatformAdapter):
|
|||
return {"id": chat_id}
|
||||
|
||||
|
||||
def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> SessionSource:
|
||||
def make_restart_source(
|
||||
chat_id: str = "123456",
|
||||
chat_type: str = "dm",
|
||||
thread_id: str | None = None,
|
||||
) -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id=chat_id,
|
||||
chat_type=chat_type,
|
||||
user_id="u1",
|
||||
thread_id=thread_id,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -81,6 +88,15 @@ def make_restart_runner(
|
|||
runner._handle_restart_command = GatewayRunner._handle_restart_command.__get__(
|
||||
runner, GatewayRunner
|
||||
)
|
||||
runner._handle_set_home_command = GatewayRunner._handle_set_home_command.__get__(
|
||||
runner, GatewayRunner
|
||||
)
|
||||
runner._send_restart_notification = GatewayRunner._send_restart_notification.__get__(
|
||||
runner, GatewayRunner
|
||||
)
|
||||
runner._send_home_channel_startup_notifications = (
|
||||
GatewayRunner._send_home_channel_startup_notifications.__get__(runner, GatewayRunner)
|
||||
)
|
||||
runner._status_action_label = GatewayRunner._status_action_label.__get__(
|
||||
runner, GatewayRunner
|
||||
)
|
||||
|
|
|
|||
|
|
@ -49,9 +49,10 @@ class TestSuspendRecentlyActive:
|
|||
count = store.suspend_recently_active()
|
||||
assert count == 1
|
||||
|
||||
# Re-fetch — should be suspended now
|
||||
# Re-fetch — should be resume_pending (preserved, not wiped)
|
||||
refreshed = store.get_or_create_session(source)
|
||||
assert refreshed.was_auto_reset
|
||||
assert refreshed.resume_pending
|
||||
assert refreshed.session_id == entry.session_id # same session preserved
|
||||
|
||||
def test_does_not_suspend_old_sessions(self, tmp_path):
|
||||
store = _make_store(tmp_path)
|
||||
|
|
@ -66,21 +67,22 @@ class TestSuspendRecentlyActive:
|
|||
count = store.suspend_recently_active(max_age_seconds=120)
|
||||
assert count == 0
|
||||
|
||||
def test_already_suspended_not_double_counted(self, tmp_path):
|
||||
def test_already_resume_pending_not_double_counted(self, tmp_path):
|
||||
store = _make_store(tmp_path)
|
||||
source = _make_source()
|
||||
entry = store.get_or_create_session(source)
|
||||
|
||||
# Suspend once
|
||||
# Mark resume_pending once
|
||||
count1 = store.suspend_recently_active()
|
||||
assert count1 == 1
|
||||
|
||||
# Create a new session (the old one got reset on next access)
|
||||
# Re-fetch returns the SAME session (preserved, not reset)
|
||||
entry2 = store.get_or_create_session(source)
|
||||
assert entry2.session_id == entry.session_id
|
||||
|
||||
# Suspend again — the new session is recent but not yet suspended
|
||||
# Second call skips already-resume_pending entries
|
||||
count2 = store.suspend_recently_active()
|
||||
assert count2 == 1
|
||||
assert count2 == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -180,11 +182,11 @@ class TestCleanShutdownMarker:
|
|||
else:
|
||||
store.suspend_recently_active()
|
||||
|
||||
# Session SHOULD be suspended (crash recovery)
|
||||
# Session SHOULD be resume_pending (crash recovery preserves history)
|
||||
with store._lock:
|
||||
store._ensure_loaded_locked()
|
||||
suspended_count = sum(1 for e in store._entries.values() if e.suspended)
|
||||
assert suspended_count == 1, "Session should be suspended after crash (no marker)"
|
||||
resume_count = sum(1 for e in store._entries.values() if e.resume_pending)
|
||||
assert resume_count == 1, "Session should be resume_pending after crash (no marker)"
|
||||
|
||||
def test_marker_written_on_restart_stop(self, tmp_path, monkeypatch):
|
||||
"""stop(restart=True) should also write the marker."""
|
||||
|
|
|
|||
166
tests/gateway/test_config_env_bridge_authority.py
Normal file
166
tests/gateway/test_config_env_bridge_authority.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
"""Regression tests for the config.yaml → env var bridge in gateway/run.py.
|
||||
|
||||
Guards against the 60-vs-500 bug where a stale `.env HERMES_MAX_ITERATIONS=60`
|
||||
entry silently shadowed `agent.max_turns: 500` in config.yaml because the
|
||||
bridge used `if X not in os.environ` guards. After PR#18413 the bridge
|
||||
treats config.yaml as authoritative and unconditionally overwrites .env
|
||||
values for `agent.*`, `display.*`, `timezone`, and `security.*` keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _run_gateway_import(hermes_home: Path, initial_env: dict[str, str]) -> dict[str, str]:
|
||||
"""Import gateway.run in a clean subprocess and return the post-import env.
|
||||
|
||||
The bridge runs at module-import time, so simply importing is enough
|
||||
to exercise it. Running in a subprocess isolates the test from other
|
||||
import side effects and makes the "what ends up in os.environ" check
|
||||
deterministic.
|
||||
"""
|
||||
script = textwrap.dedent(
|
||||
f"""
|
||||
import os, sys
|
||||
sys.path.insert(0, {str(PROJECT_ROOT)!r})
|
||||
|
||||
try:
|
||||
from gateway import run # noqa: F401 — module import triggers bridge
|
||||
except Exception as exc:
|
||||
print(f"IMPORT_ERROR:{{type(exc).__name__}}:{{exc}}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
for k in (
|
||||
"HERMES_MAX_ITERATIONS",
|
||||
"HERMES_AGENT_TIMEOUT",
|
||||
"HERMES_AGENT_TIMEOUT_WARNING",
|
||||
"HERMES_GATEWAY_BUSY_INPUT_MODE",
|
||||
"HERMES_TIMEZONE",
|
||||
):
|
||||
v = os.environ.get(k)
|
||||
if v is not None:
|
||||
print(f"{{k}}={{v}}")
|
||||
"""
|
||||
)
|
||||
env = dict(initial_env)
|
||||
env["HERMES_HOME"] = str(hermes_home)
|
||||
# Keep PATH / PYTHONPATH so venv imports resolve.
|
||||
for k in ("PATH", "PYTHONPATH", "VIRTUAL_ENV", "HOME"):
|
||||
if k in os.environ and k not in env:
|
||||
env[k] = os.environ[k]
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-c", script],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
pytest.fail(
|
||||
f"gateway.run import failed (rc={result.returncode})\n"
|
||||
f"stderr:\n{result.stderr}\nstdout:\n{result.stdout}"
|
||||
)
|
||||
out: dict[str, str] = {}
|
||||
for line in result.stdout.splitlines():
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
out[k] = v
|
||||
return out
|
||||
|
||||
|
||||
def _write_config(home: Path, agent_cfg: dict | None = None, display_cfg: dict | None = None,
|
||||
timezone: str | None = None) -> None:
|
||||
import yaml
|
||||
cfg: dict = {}
|
||||
if agent_cfg:
|
||||
cfg["agent"] = agent_cfg
|
||||
if display_cfg:
|
||||
cfg["display"] = display_cfg
|
||||
if timezone:
|
||||
cfg["timezone"] = timezone
|
||||
(home / "config.yaml").write_text(yaml.safe_dump(cfg))
|
||||
|
||||
|
||||
def _write_env(home: Path, entries: dict[str, str]) -> None:
|
||||
lines = [f"{k}={v}\n" for k, v in entries.items()]
|
||||
(home / ".env").write_text("".join(lines))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hermes_home(tmp_path: Path) -> Path:
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
return home
|
||||
|
||||
|
||||
def test_config_max_turns_wins_over_stale_env(hermes_home: Path) -> None:
|
||||
"""Regression: config.yaml:agent.max_turns=500 must beat .env=60."""
|
||||
_write_config(hermes_home, agent_cfg={"max_turns": 500})
|
||||
_write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "60"})
|
||||
|
||||
env = _run_gateway_import(hermes_home, initial_env={})
|
||||
|
||||
assert env.get("HERMES_MAX_ITERATIONS") == "500", (
|
||||
f"expected config.yaml max_turns=500 to win; got {env.get('HERMES_MAX_ITERATIONS')!r}. "
|
||||
"Stale .env value is shadowing config — the bridge lost its override."
|
||||
)
|
||||
|
||||
|
||||
def test_config_gateway_timeout_wins_over_stale_env(hermes_home: Path) -> None:
|
||||
"""Every agent.* bridge key must be config-authoritative, not .env-authoritative."""
|
||||
_write_config(hermes_home, agent_cfg={
|
||||
"gateway_timeout": 1800,
|
||||
"gateway_timeout_warning": 900,
|
||||
})
|
||||
_write_env(hermes_home, {
|
||||
"HERMES_AGENT_TIMEOUT": "60",
|
||||
"HERMES_AGENT_TIMEOUT_WARNING": "30",
|
||||
})
|
||||
|
||||
env = _run_gateway_import(hermes_home, initial_env={})
|
||||
|
||||
assert env.get("HERMES_AGENT_TIMEOUT") == "1800"
|
||||
assert env.get("HERMES_AGENT_TIMEOUT_WARNING") == "900"
|
||||
|
||||
|
||||
def test_config_display_busy_input_mode_wins_over_stale_env(hermes_home: Path) -> None:
|
||||
_write_config(hermes_home, display_cfg={"busy_input_mode": "interrupt"})
|
||||
_write_env(hermes_home, {"HERMES_GATEWAY_BUSY_INPUT_MODE": "queue"})
|
||||
|
||||
env = _run_gateway_import(hermes_home, initial_env={})
|
||||
|
||||
assert env.get("HERMES_GATEWAY_BUSY_INPUT_MODE") == "interrupt"
|
||||
|
||||
|
||||
def test_config_timezone_wins_over_stale_env(hermes_home: Path) -> None:
|
||||
_write_config(hermes_home, timezone="America/Los_Angeles")
|
||||
_write_env(hermes_home, {"HERMES_TIMEZONE": "UTC"})
|
||||
|
||||
env = _run_gateway_import(hermes_home, initial_env={})
|
||||
|
||||
assert env.get("HERMES_TIMEZONE") == "America/Los_Angeles"
|
||||
|
||||
|
||||
def test_env_value_survives_when_config_omits_key(hermes_home: Path) -> None:
|
||||
"""If config.yaml doesn't set max_turns, .env value must still pass through.
|
||||
|
||||
The bridge only overwrites when the config key is present — an absent
|
||||
config key should NOT clobber the .env value.
|
||||
"""
|
||||
_write_config(hermes_home, agent_cfg={}) # no max_turns
|
||||
_write_env(hermes_home, {"HERMES_MAX_ITERATIONS": "123"})
|
||||
|
||||
env = _run_gateway_import(hermes_home, initial_env={})
|
||||
|
||||
assert env.get("HERMES_MAX_ITERATIONS") == "123"
|
||||
230
tests/gateway/test_discord_component_auth.py
Normal file
230
tests/gateway/test_discord_component_auth.py
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
"""Security regression tests: Discord component views honor role allowlists.
|
||||
|
||||
The four interactive component views (ExecApprovalView, SlashConfirmView,
|
||||
UpdatePromptView, ModelPickerView) historically accepted only
|
||||
``allowed_user_ids``. Deployments that configure DISCORD_ALLOWED_ROLES
|
||||
without DISCORD_ALLOWED_USERS therefore had a wide-open component
|
||||
surface: any guild member who could see the prompt could approve exec
|
||||
commands, cancel slash confirmations, or switch the model -- even when
|
||||
the same user would be rejected at the slash and on_message gates.
|
||||
|
||||
These tests pin the user-or-role OR semantics and the fail-closed
|
||||
behavior on missing role data so the parity cannot regress.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
# Trigger the shared discord mock from tests/gateway/conftest.py before
|
||||
# importing the production module.
|
||||
from gateway.platforms.discord import ( # noqa: E402
|
||||
ExecApprovalView,
|
||||
ModelPickerView,
|
||||
SlashConfirmView,
|
||||
UpdatePromptView,
|
||||
_component_check_auth,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Direct helper coverage -- the four views all delegate to this helper, so
|
||||
# pinning the helper's contract pins all four call sites.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _interaction(user_id, role_ids=None, *, drop_user=False, drop_roles=False):
|
||||
"""Build a mock interaction with the requested user/role shape.
|
||||
|
||||
drop_user simulates a payload whose .user attribute is None.
|
||||
drop_roles simulates a payload where .user has no .roles attribute
|
||||
at all (DM-context Member, raw User payload).
|
||||
"""
|
||||
if drop_user:
|
||||
return SimpleNamespace(user=None)
|
||||
|
||||
user_kwargs = {"id": user_id}
|
||||
if not drop_roles:
|
||||
user_kwargs["roles"] = [SimpleNamespace(id=r) for r in (role_ids or [])]
|
||||
return SimpleNamespace(user=SimpleNamespace(**user_kwargs))
|
||||
|
||||
|
||||
# ── back-compat: empty allowlists -> allow everyone ────────────────────────
|
||||
|
||||
|
||||
def test_component_check_empty_allowlists_allows_everyone():
|
||||
"""SECURITY-CRITICAL backwards-compat: deployments without any
|
||||
DISCORD_ALLOWED_* env vars set must continue to allow component
|
||||
interactions from anyone (no regression for unconfigured setups)."""
|
||||
interaction = _interaction(11111)
|
||||
assert _component_check_auth(interaction, set(), set()) is True
|
||||
assert _component_check_auth(interaction, None, None) is True
|
||||
|
||||
|
||||
# ── user allowlist ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_component_check_user_in_user_allowlist_passes():
|
||||
interaction = _interaction(11111)
|
||||
assert _component_check_auth(interaction, {"11111"}, set()) is True
|
||||
|
||||
|
||||
def test_component_check_user_not_in_user_allowlist_rejected():
|
||||
interaction = _interaction(99999)
|
||||
assert _component_check_auth(interaction, {"11111"}, set()) is False
|
||||
|
||||
|
||||
# ── role allowlist OR semantics ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_component_check_role_only_user_with_matching_role_passes():
|
||||
"""Role-only deployment (DISCORD_ALLOWED_ROLES set, DISCORD_ALLOWED_USERS
|
||||
empty) where the user is not in the empty user list but DOES carry a
|
||||
matching role: must pass. This is the regression that prompted the
|
||||
fix -- previously _check_auth allowed everyone when the user set was
|
||||
empty, ignoring the role allowlist."""
|
||||
interaction = _interaction(99999, role_ids=[42])
|
||||
assert _component_check_auth(interaction, set(), {42}) is True
|
||||
|
||||
|
||||
def test_component_check_role_only_user_without_matching_role_rejected():
|
||||
"""Role-only deployment where the user has no matching role: reject.
|
||||
Previously this allowed everyone because allowed_user_ids was empty."""
|
||||
interaction = _interaction(99999, role_ids=[7, 8])
|
||||
assert _component_check_auth(interaction, set(), {42}) is False
|
||||
|
||||
|
||||
def test_component_check_user_or_role_user_match():
|
||||
"""Both allowlists set; user matches user allowlist: pass."""
|
||||
interaction = _interaction(11111, role_ids=[7])
|
||||
assert _component_check_auth(interaction, {"11111"}, {42}) is True
|
||||
|
||||
|
||||
def test_component_check_user_or_role_role_match():
|
||||
"""Both allowlists set; user not in user list but in role list: pass."""
|
||||
interaction = _interaction(99999, role_ids=[42])
|
||||
assert _component_check_auth(interaction, {"11111"}, {42}) is True
|
||||
|
||||
|
||||
def test_component_check_user_or_role_neither_match():
|
||||
"""Both allowlists set; user matches neither: reject."""
|
||||
interaction = _interaction(99999, role_ids=[7])
|
||||
assert _component_check_auth(interaction, {"11111"}, {42}) is False
|
||||
|
||||
|
||||
# ── fail-closed on missing role data ───────────────────────────────────────
|
||||
|
||||
|
||||
def test_component_check_role_policy_with_no_roles_attr_rejects():
|
||||
"""Role allowlist configured but interaction.user has no .roles
|
||||
attribute (DM-context Member, raw User payload): must reject. A user
|
||||
without resolvable roles cannot satisfy a role allowlist."""
|
||||
interaction = _interaction(11111, drop_roles=True)
|
||||
assert _component_check_auth(interaction, set(), {42}) is False
|
||||
|
||||
|
||||
def test_component_check_missing_user_with_allowlist_rejects():
|
||||
"""interaction.user is None with any allowlist configured: fail
|
||||
closed without raising AttributeError."""
|
||||
interaction = _interaction(0, drop_user=True)
|
||||
assert _component_check_auth(interaction, {"11111"}, set()) is False
|
||||
assert _component_check_auth(interaction, set(), {42}) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# View construction: every view must accept allowed_role_ids and route
|
||||
# through the shared helper. Default value preserves prior call-sites.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_exec_approval_view_accepts_role_allowlist():
|
||||
view = ExecApprovalView(
|
||||
session_key="sess-1",
|
||||
allowed_user_ids={"11111"},
|
||||
allowed_role_ids={42},
|
||||
)
|
||||
# Role-only user passes
|
||||
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
|
||||
# Neither user nor role match: reject
|
||||
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
|
||||
|
||||
|
||||
def test_exec_approval_view_role_default_is_empty_set():
|
||||
"""Existing call sites that pass only allowed_user_ids must continue
|
||||
working with the legacy semantics (no role gate)."""
|
||||
view = ExecApprovalView(session_key="sess-1", allowed_user_ids={"11111"})
|
||||
assert view.allowed_role_ids == set()
|
||||
assert view._check_auth(_interaction(11111)) is True
|
||||
assert view._check_auth(_interaction(99999)) is False
|
||||
|
||||
|
||||
def test_slash_confirm_view_accepts_role_allowlist():
|
||||
view = SlashConfirmView(
|
||||
session_key="sess-1",
|
||||
confirm_id="c1",
|
||||
allowed_user_ids=set(),
|
||||
allowed_role_ids={42},
|
||||
)
|
||||
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
|
||||
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
|
||||
|
||||
|
||||
def test_update_prompt_view_accepts_role_allowlist():
|
||||
view = UpdatePromptView(
|
||||
session_key="sess-1",
|
||||
allowed_user_ids=set(),
|
||||
allowed_role_ids={42},
|
||||
)
|
||||
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
|
||||
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
|
||||
|
||||
|
||||
def test_model_picker_view_accepts_role_allowlist():
|
||||
async def _noop(*_a, **_k):
|
||||
return ""
|
||||
|
||||
view = ModelPickerView(
|
||||
providers=[],
|
||||
current_model="m",
|
||||
current_provider="p",
|
||||
session_key="sess-1",
|
||||
on_model_selected=_noop,
|
||||
allowed_user_ids=set(),
|
||||
allowed_role_ids={42},
|
||||
)
|
||||
assert view._check_auth(_interaction(99999, role_ids=[42])) is True
|
||||
assert view._check_auth(_interaction(99999, role_ids=[7])) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Empty allowlists across views: legacy "allow everyone" must hold.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"view_factory",
|
||||
[
|
||||
lambda: ExecApprovalView(session_key="s", allowed_user_ids=set()),
|
||||
lambda: SlashConfirmView(session_key="s", confirm_id="c", allowed_user_ids=set()),
|
||||
lambda: UpdatePromptView(session_key="s", allowed_user_ids=set()),
|
||||
],
|
||||
)
|
||||
def test_views_empty_allowlists_allow_everyone(view_factory):
|
||||
view = view_factory()
|
||||
assert view._check_auth(_interaction(99999)) is True
|
||||
|
||||
|
||||
def test_model_picker_view_empty_allowlists_allow_everyone():
|
||||
async def _noop(*_a, **_k):
|
||||
return ""
|
||||
|
||||
view = ModelPickerView(
|
||||
providers=[],
|
||||
current_model="m",
|
||||
current_provider="p",
|
||||
session_key="s",
|
||||
on_model_selected=_noop,
|
||||
allowed_user_ids=set(),
|
||||
)
|
||||
assert view.allowed_role_ids == set()
|
||||
assert view._check_auth(_interaction(99999)) is True
|
||||
|
|
@ -172,6 +172,69 @@ async def test_connect_only_requests_members_intent_when_needed(monkeypatch, all
|
|||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_closes_previous_client_to_prevent_zombie_websocket(monkeypatch):
|
||||
"""Regression for #18187: calling connect() twice without disconnect() in
|
||||
between (e.g. during an in-process reconnect attempt) must close the old
|
||||
commands.Bot before creating a new one. Without this guard, two websockets
|
||||
stay alive and both fire on_message, producing double responses with
|
||||
different wording.
|
||||
"""
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
||||
monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
|
||||
monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
|
||||
|
||||
intents = SimpleNamespace(
|
||||
message_content=False, dm_messages=False, guild_messages=False,
|
||||
members=False, voice_states=False,
|
||||
)
|
||||
monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
|
||||
|
||||
class TrackedBot(FakeBot):
|
||||
"""FakeBot that records close() calls and reports open/closed state."""
|
||||
_closed = False
|
||||
|
||||
def is_closed(self):
|
||||
return self._closed
|
||||
|
||||
async def close(self):
|
||||
self._closed = True
|
||||
|
||||
created: list[TrackedBot] = []
|
||||
|
||||
def fake_bot_factory(*, command_prefix, intents, proxy=None, allowed_mentions=None, **_):
|
||||
bot = TrackedBot(intents=intents, allowed_mentions=allowed_mentions)
|
||||
created.append(bot)
|
||||
return bot
|
||||
|
||||
monkeypatch.setattr(discord_platform.commands, "Bot", fake_bot_factory)
|
||||
monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
|
||||
|
||||
# First connect — fresh adapter, no prior client.
|
||||
assert await adapter.connect() is True
|
||||
assert len(created) == 1
|
||||
first_bot = created[0]
|
||||
assert first_bot._closed is False, "first bot should still be open after connect()"
|
||||
|
||||
# Second connect WITHOUT disconnect — simulates an in-process reconnect.
|
||||
# Without the fix, first_bot would remain open (zombie), and both would
|
||||
# receive every Discord event, causing double responses.
|
||||
assert await adapter.connect() is True
|
||||
assert len(created) == 2
|
||||
second_bot = created[1]
|
||||
|
||||
# The first bot must be closed before the second is assigned.
|
||||
assert first_bot._closed is True, (
|
||||
"First Discord client must be closed on re-entry of connect() to prevent "
|
||||
"zombie websocket (#18187)"
|
||||
)
|
||||
assert second_bot._closed is False, "second bot should still be open"
|
||||
assert adapter._client is second_bot
|
||||
|
||||
await adapter.disconnect()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_connect_releases_token_lock_on_timeout(monkeypatch):
|
||||
adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
|
||||
|
|
|
|||
737
tests/gateway/test_discord_slash_auth.py
Normal file
737
tests/gateway/test_discord_slash_auth.py
Normal file
|
|
@ -0,0 +1,737 @@
|
|||
"""Security regression tests: slash commands honor on_message authorization gates.
|
||||
|
||||
Slash invocations (``_run_simple_slash``, ``_handle_thread_create_slash``)
|
||||
historically bypassed every gate ``on_message`` enforces — DISCORD_ALLOWED_USERS,
|
||||
DISCORD_ALLOWED_ROLES, DISCORD_ALLOWED_CHANNELS, DISCORD_IGNORED_CHANNELS.
|
||||
Any guild member could invoke ``/background``, ``/restart``, etc. as the
|
||||
operator. ``_check_slash_authorization`` mirrors all four gates one-for-one.
|
||||
|
||||
These tests pin the security-correct behavior so the bypass cannot regress.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Discord module mock — borrowed from test_discord_slash_commands.py so this
|
||||
# file runs on machines without discord.py installed.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _ensure_discord_mock():
|
||||
if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
|
||||
return # real discord installed
|
||||
|
||||
if sys.modules.get("discord") is None:
|
||||
discord_mod = MagicMock()
|
||||
discord_mod.Intents.default.return_value = MagicMock()
|
||||
discord_mod.DMChannel = type("DMChannel", (), {})
|
||||
discord_mod.Thread = type("Thread", (), {})
|
||||
discord_mod.ForumChannel = type("ForumChannel", (), {})
|
||||
discord_mod.Interaction = object
|
||||
|
||||
class _FakePermissions:
|
||||
def __init__(self, value=0, **_):
|
||||
self.value = value
|
||||
|
||||
discord_mod.Permissions = _FakePermissions
|
||||
|
||||
class _FakeGroup:
|
||||
def __init__(self, *, name, description, parent=None):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.parent = parent
|
||||
self._children: dict[str, object] = {}
|
||||
if parent is not None:
|
||||
parent.add_command(self)
|
||||
|
||||
def add_command(self, cmd):
|
||||
self._children[cmd.name] = cmd
|
||||
|
||||
class _FakeCommand:
|
||||
def __init__(self, *, name, description, callback, parent=None):
|
||||
self.name = name
|
||||
self.description = description
|
||||
self.callback = callback
|
||||
self.parent = parent
|
||||
self.default_permissions = None
|
||||
|
||||
discord_mod.app_commands = SimpleNamespace(
|
||||
describe=lambda **kwargs: (lambda fn: fn),
|
||||
choices=lambda **kwargs: (lambda fn: fn),
|
||||
autocomplete=lambda **kwargs: (lambda fn: fn),
|
||||
Choice=lambda **kwargs: SimpleNamespace(**kwargs),
|
||||
Group=_FakeGroup,
|
||||
Command=_FakeCommand,
|
||||
)
|
||||
|
||||
ext_mod = MagicMock()
|
||||
commands_mod = MagicMock()
|
||||
commands_mod.Bot = MagicMock
|
||||
ext_mod.commands = commands_mod
|
||||
|
||||
sys.modules["discord"] = discord_mod
|
||||
sys.modules.setdefault("discord.ext", ext_mod)
|
||||
sys.modules.setdefault("discord.ext.commands", commands_mod)
|
||||
|
||||
|
||||
_ensure_discord_mock()
|
||||
|
||||
from gateway.platforms.discord import DiscordAdapter # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_discord_env(monkeypatch):
|
||||
for var in (
|
||||
"DISCORD_ALLOWED_USERS",
|
||||
"DISCORD_ALLOWED_ROLES",
|
||||
"DISCORD_ALLOWED_CHANNELS",
|
||||
"DISCORD_IGNORED_CHANNELS",
|
||||
"DISCORD_HIDE_SLASH_COMMANDS",
|
||||
"DISCORD_ALLOW_BOTS",
|
||||
):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_discord_permissions(monkeypatch):
|
||||
"""Pin discord.Permissions to a plain stand-in so tests can assert the
|
||||
bitfield value regardless of whether real discord.py or a sibling test
|
||||
module's MagicMock is loaded."""
|
||||
import discord
|
||||
|
||||
class _Perm:
|
||||
def __init__(self, value=0, **_):
|
||||
self.value = value
|
||||
|
||||
monkeypatch.setattr(discord, "Permissions", _Perm)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def adapter():
|
||||
config = PlatformConfig(enabled=True, token="***")
|
||||
a = DiscordAdapter(config)
|
||||
a._client = SimpleNamespace(user=SimpleNamespace(id=99999, name="HermesBot"), guilds=[])
|
||||
return a
|
||||
|
||||
|
||||
_SENTINEL = object()
|
||||
|
||||
|
||||
def _make_interaction(
|
||||
user_id, *, channel_id=12345, guild_id=42, in_dm=False, in_thread=False,
|
||||
parent_channel_id=None, user=_SENTINEL,
|
||||
):
|
||||
"""Build a mock Discord Interaction with a still-unresponded response.
|
||||
|
||||
``channel_id`` may be set to ``None`` to simulate a guild interaction
|
||||
payload missing a resolvable channel id (fail-closed exercise).
|
||||
Pass ``user=None`` to simulate a payload missing the user object.
|
||||
"""
|
||||
import discord
|
||||
|
||||
response = SimpleNamespace(send_message=AsyncMock(), defer=AsyncMock())
|
||||
|
||||
if in_dm:
|
||||
channel = discord.DMChannel()
|
||||
elif in_thread:
|
||||
channel = discord.Thread()
|
||||
channel.id = channel_id
|
||||
channel.parent_id = parent_channel_id
|
||||
elif channel_id is None:
|
||||
channel = None
|
||||
else:
|
||||
channel = SimpleNamespace(id=channel_id)
|
||||
|
||||
if user is _SENTINEL:
|
||||
user_obj = SimpleNamespace(id=int(user_id), name=f"user_{user_id}")
|
||||
else:
|
||||
user_obj = user
|
||||
|
||||
return SimpleNamespace(
|
||||
user=user_obj,
|
||||
guild=SimpleNamespace(owner_id=999),
|
||||
guild_id=guild_id,
|
||||
channel_id=channel_id,
|
||||
channel=channel,
|
||||
response=response,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backwards-compat: empty allowlist → everything passes (matches on_message)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_allowlist_allows_everyone(adapter):
|
||||
"""SECURITY-CRITICAL backwards-compat: deployments without any allowlist
|
||||
env vars set must see ZERO behavior change. on_message lets everyone
|
||||
through in this case (returns True at line 1890); slash must do the same.
|
||||
"""
|
||||
interaction = _make_interaction("999999999")
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
interaction.response.send_message.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_allowlist_dm_also_allowed(adapter):
|
||||
"""Same for DMs — no allowlist means no restriction, matching on_message."""
|
||||
interaction = _make_interaction("999999999", in_dm=True)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# User allowlist (DISCORD_ALLOWED_USERS) parity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_allowed_user_passes(adapter):
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
interaction = _make_interaction("100200300")
|
||||
assert await adapter._check_slash_authorization(interaction, "/background hi") is True
|
||||
interaction.response.send_message.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_disallowed_user_rejected_with_ephemeral(adapter, caplog):
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
interaction = _make_interaction("999999999")
|
||||
with caplog.at_level(logging.WARNING):
|
||||
assert await adapter._check_slash_authorization(interaction, "/background hi") is False
|
||||
interaction.response.send_message.assert_awaited_once()
|
||||
args, kwargs = interaction.response.send_message.call_args
|
||||
assert kwargs.get("ephemeral") is True
|
||||
assert "not authorized" in (args[0] if args else kwargs.get("content", "")).lower()
|
||||
assert any("Unauthorized slash attempt" in r.message for r in caplog.records)
|
||||
assert any("DISCORD_ALLOWED_USERS" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Role allowlist (DISCORD_ALLOWED_ROLES) parity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_role_member_passes(adapter):
|
||||
"""A user whose Member.roles includes an allowed role passes the gate."""
|
||||
adapter._allowed_role_ids = {1234}
|
||||
interaction = _make_interaction("999999999")
|
||||
interaction.user.roles = [SimpleNamespace(id=1234)]
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_role_non_member_rejected(adapter):
|
||||
"""A user without any matching role is rejected even if no user allowlist."""
|
||||
adapter._allowed_role_ids = {1234}
|
||||
interaction = _make_interaction("999999999")
|
||||
interaction.user.roles = [SimpleNamespace(id=9999)] # different role
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Channel allowlist (DISCORD_ALLOWED_CHANNELS) parity — the gate prajer used
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_not_in_allowlist_rejected(adapter, monkeypatch, caplog):
|
||||
"""on_message blocks messages in channels not in DISCORD_ALLOWED_CHANNELS;
|
||||
slash must do the same. This is the EXACT bypass prajer exploited.
|
||||
"""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
|
||||
interaction = _make_interaction("100200300", channel_id=9999)
|
||||
with caplog.at_level(logging.WARNING):
|
||||
assert await adapter._check_slash_authorization(interaction, "/background hi") is False
|
||||
assert any("DISCORD_ALLOWED_CHANNELS" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_in_allowlist_passes(adapter, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
|
||||
interaction = _make_interaction("100200300", channel_id=1111)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_allowlist_wildcard_passes(adapter, monkeypatch):
|
||||
"""``*`` in DISCORD_ALLOWED_CHANNELS = allow any channel, matching on_message."""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "*")
|
||||
interaction = _make_interaction("100200300", channel_id=9999)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_channel_allowlist_does_not_apply_to_dms(adapter, monkeypatch):
|
||||
"""DMs aren't channel-gated — they go through on_message's DM lockdown."""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111")
|
||||
interaction = _make_interaction("100200300", in_dm=True)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Channel blocklist (DISCORD_IGNORED_CHANNELS) parity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ignored_channel_rejected(adapter, monkeypatch, caplog):
|
||||
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "9999")
|
||||
interaction = _make_interaction("100200300", channel_id=9999)
|
||||
with caplog.at_level(logging.WARNING):
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
assert any("DISCORD_IGNORED_CHANNELS" in r.message for r in caplog.records)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ignored_channel_wildcard_blocks_all(adapter, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "*")
|
||||
interaction = _make_interaction("100200300", channel_id=9999)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cross-platform admin notification
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthorized_attempt_notifies_telegram(adapter):
|
||||
from gateway.session import Platform
|
||||
|
||||
telegram_adapter = SimpleNamespace(send=AsyncMock())
|
||||
home = SimpleNamespace(chat_id="987654321")
|
||||
runner = SimpleNamespace(
|
||||
adapters={Platform.TELEGRAM: telegram_adapter},
|
||||
config=SimpleNamespace(get_home_channel=lambda p: home if p is Platform.TELEGRAM else None),
|
||||
)
|
||||
adapter.gateway_runner = runner
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
|
||||
interaction = _make_interaction("999999999")
|
||||
await adapter._check_slash_authorization(interaction, "/background hi")
|
||||
|
||||
# Notify is fire-and-forget — let the scheduled task run.
|
||||
await asyncio.sleep(0)
|
||||
await asyncio.sleep(0)
|
||||
|
||||
telegram_adapter.send.assert_awaited_once()
|
||||
chat_id, msg = telegram_adapter.send.call_args.args
|
||||
assert chat_id == "987654321"
|
||||
assert "Unauthorized" in msg
|
||||
assert "999999999" in msg
|
||||
assert "/background hi" in msg
|
||||
assert "DISCORD_ALLOWED_USERS" in msg
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_silently_no_ops_without_runner(adapter):
|
||||
adapter.gateway_runner = None
|
||||
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason") # must not raise
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_falls_back_to_slack_if_no_telegram(adapter):
|
||||
from gateway.session import Platform
|
||||
|
||||
slack_adapter = SimpleNamespace(send=AsyncMock())
|
||||
home_slack = SimpleNamespace(chat_id="C12345")
|
||||
runner = SimpleNamespace(
|
||||
adapters={Platform.SLACK: slack_adapter},
|
||||
config=SimpleNamespace(
|
||||
get_home_channel=lambda p: home_slack if p is Platform.SLACK else None,
|
||||
),
|
||||
)
|
||||
adapter.gateway_runner = runner
|
||||
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
|
||||
slack_adapter.send.assert_awaited_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Opt-in visibility hide
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_visibility_hide_off_by_default_is_noop(adapter, monkeypatch):
|
||||
"""DISCORD_HIDE_SLASH_COMMANDS unset → don't touch any command's permissions."""
|
||||
cmd = SimpleNamespace(name="x", default_permissions="UNCHANGED")
|
||||
tree = SimpleNamespace(get_commands=lambda: [cmd])
|
||||
|
||||
# Re-run the registration tail logic by calling the bit that decides:
|
||||
# we don't have a clean way to simulate the env-gated branch from
|
||||
# _register_slash_commands, so we just confirm the helper itself works
|
||||
# AND assert the env-gating logic is correct.
|
||||
assert os.environ.get("DISCORD_HIDE_SLASH_COMMANDS") is None
|
||||
# Helper should still work when called directly:
|
||||
adapter._apply_owner_only_visibility(tree)
|
||||
# When called directly the helper applies — env gating is at the call site,
|
||||
# which we exercise in an integration-style test below.
|
||||
|
||||
|
||||
def test_visibility_hide_helper_zeroes_perms(adapter):
|
||||
cmd_a = SimpleNamespace(name="a", default_permissions=None)
|
||||
cmd_b = SimpleNamespace(name="b", default_permissions=None)
|
||||
tree = SimpleNamespace(get_commands=lambda: [cmd_a, cmd_b])
|
||||
adapter._apply_owner_only_visibility(tree)
|
||||
assert cmd_a.default_permissions is not None
|
||||
assert cmd_b.default_permissions is not None
|
||||
assert cmd_a.default_permissions.value == 0
|
||||
assert cmd_b.default_permissions.value == 0
|
||||
|
||||
|
||||
def test_visibility_hide_tolerates_unsetable_command(adapter, caplog):
|
||||
class _Frozen:
|
||||
__slots__ = ("name",)
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
cmd_ok = SimpleNamespace(name="ok", default_permissions=None)
|
||||
cmd_bad = _Frozen("bad")
|
||||
tree = SimpleNamespace(get_commands=lambda: [cmd_bad, cmd_ok])
|
||||
|
||||
with caplog.at_level(logging.DEBUG):
|
||||
adapter._apply_owner_only_visibility(tree)
|
||||
|
||||
assert cmd_ok.default_permissions.value == 0
|
||||
|
||||
|
||||
# os import for test_visibility_hide_off_by_default_is_noop
|
||||
import os # noqa: E402
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fail-closed parity on malformed slash auth context
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_channel_id_rejected_when_channel_policy_configured(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""A guild interaction without a resolvable channel id must fail
|
||||
closed when DISCORD_ALLOWED_CHANNELS is configured. Without this
|
||||
guard the entire channel-policy block silently fell through."""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111,2222")
|
||||
interaction = _make_interaction("100200300", channel_id=None)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
interaction.response.send_message.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_channel_id_allowed_when_no_channel_policy(adapter):
|
||||
"""No DISCORD_ALLOWED_CHANNELS configured + missing channel id: still
|
||||
pass through the channel block (matches no-allowlist default)."""
|
||||
interaction = _make_interaction("100200300", channel_id=None)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_user_rejected_when_allowlist_configured(adapter):
|
||||
"""interaction.user is None with a user/role allowlist active:
|
||||
fail closed without raising AttributeError."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
interaction = _make_interaction("100200300", user=None)
|
||||
# Must not raise — must return False with an ephemeral rejection
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
interaction.response.send_message.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_missing_user_allowed_when_no_allowlist_configured(adapter):
|
||||
"""interaction.user is None but no allowlist configured: allow
|
||||
(preserves no-allowlist back-compat -- anyone is allowed when no
|
||||
policy is in effect)."""
|
||||
interaction = _make_interaction("100200300", user=None)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Thread parent channel allowlist parity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_parent_in_allowlist_passes(adapter, monkeypatch):
|
||||
"""Thread whose parent channel is on DISCORD_ALLOWED_CHANNELS passes
|
||||
even though the thread id itself isn't on the list."""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "5555")
|
||||
interaction = _make_interaction(
|
||||
"100200300", channel_id=9999, in_thread=True, parent_channel_id=5555,
|
||||
)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_parent_in_ignorelist_rejects(adapter, monkeypatch):
|
||||
"""Thread whose parent channel is on DISCORD_IGNORED_CHANNELS rejects
|
||||
even when the thread id itself isn't ignored."""
|
||||
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "5555")
|
||||
interaction = _make_interaction(
|
||||
"100200300", channel_id=9999, in_thread=True, parent_channel_id=5555,
|
||||
)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ignored_beats_allowed(adapter, monkeypatch):
|
||||
"""Channel listed in BOTH allowed and ignored: the ignored entry wins.
|
||||
Anything else would be a foot-gun where adding to ignored does nothing
|
||||
if the channel is also explicitly allowed."""
|
||||
monkeypatch.setenv("DISCORD_ALLOWED_CHANNELS", "1111")
|
||||
monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "1111")
|
||||
interaction = _make_interaction("100200300", channel_id=1111)
|
||||
assert await adapter._check_slash_authorization(interaction, "/help") is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Admin notify soft-fail fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_falls_back_to_slack_on_telegram_soft_fail(adapter):
|
||||
"""adapter.send returning SendResult(success=False) must NOT short-
|
||||
circuit the fallback chain. Treating a soft failure as delivered
|
||||
means a Telegram outage swallows alerts silently."""
|
||||
from gateway.session import Platform
|
||||
|
||||
soft_fail = SimpleNamespace(success=False, error="rate limited")
|
||||
telegram_adapter = SimpleNamespace(send=AsyncMock(return_value=soft_fail))
|
||||
slack_adapter = SimpleNamespace(send=AsyncMock())
|
||||
home_tg = SimpleNamespace(chat_id="987654321")
|
||||
home_sl = SimpleNamespace(chat_id="C12345")
|
||||
homes = {Platform.TELEGRAM: home_tg, Platform.SLACK: home_sl}
|
||||
runner = SimpleNamespace(
|
||||
adapters={
|
||||
Platform.TELEGRAM: telegram_adapter,
|
||||
Platform.SLACK: slack_adapter,
|
||||
},
|
||||
config=SimpleNamespace(get_home_channel=lambda p: homes.get(p)),
|
||||
)
|
||||
adapter.gateway_runner = runner
|
||||
|
||||
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
|
||||
|
||||
telegram_adapter.send.assert_awaited_once()
|
||||
slack_adapter.send.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_returns_on_telegram_truthy_success(adapter):
|
||||
"""adapter.send returning SendResult(success=True) -- or any object
|
||||
without a falsy success attribute -- should still short-circuit at
|
||||
Telegram. (This guards against the soft-fail patch over-correcting.)"""
|
||||
from gateway.session import Platform
|
||||
|
||||
ok = SimpleNamespace(success=True, message_id="m1")
|
||||
telegram_adapter = SimpleNamespace(send=AsyncMock(return_value=ok))
|
||||
slack_adapter = SimpleNamespace(send=AsyncMock())
|
||||
home_tg = SimpleNamespace(chat_id="987654321")
|
||||
home_sl = SimpleNamespace(chat_id="C12345")
|
||||
homes = {Platform.TELEGRAM: home_tg, Platform.SLACK: home_sl}
|
||||
runner = SimpleNamespace(
|
||||
adapters={
|
||||
Platform.TELEGRAM: telegram_adapter,
|
||||
Platform.SLACK: slack_adapter,
|
||||
},
|
||||
config=SimpleNamespace(get_home_channel=lambda p: homes.get(p)),
|
||||
)
|
||||
adapter.gateway_runner = runner
|
||||
|
||||
await adapter._notify_unauthorized_slash("u", "1", 2, 3, "/x", "reason")
|
||||
|
||||
telegram_adapter.send.assert_awaited_once()
|
||||
slack_adapter.send.assert_not_awaited()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /skill autocomplete + callback gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _capture_skill_registration(adapter, monkeypatch, entries):
|
||||
"""Run ``_register_skill_group`` against a stubbed skill catalog and
|
||||
return ``(handler_callback, autocomplete_callback)``.
|
||||
|
||||
The autocomplete callback is captured by monkeypatching
|
||||
``discord.app_commands.autocomplete`` -- the production decorator is
|
||||
a no-op stub in this test file's discord mock, so capturing the
|
||||
callback through it is the direct route in tests.
|
||||
"""
|
||||
import discord
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
def fake_categories(reserved_names):
|
||||
# Match discord_skill_commands_by_category's tuple shape:
|
||||
# (categories_dict, uncategorized_list, hidden_count)
|
||||
return ({}, list(entries), 0)
|
||||
|
||||
import hermes_cli.commands as _hc
|
||||
monkeypatch.setattr(
|
||||
_hc, "discord_skill_commands_by_category", fake_categories,
|
||||
)
|
||||
|
||||
def capture_autocomplete(**kwargs):
|
||||
# Only one autocomplete in /skill registration: name=...
|
||||
captured["autocomplete"] = kwargs.get("name")
|
||||
|
||||
def _passthrough(fn):
|
||||
return fn
|
||||
|
||||
return _passthrough
|
||||
|
||||
monkeypatch.setattr(
|
||||
discord.app_commands, "autocomplete", capture_autocomplete,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
registered: list = []
|
||||
|
||||
class _Tree:
|
||||
def get_commands(self):
|
||||
return []
|
||||
|
||||
def add_command(self, cmd):
|
||||
registered.append(cmd)
|
||||
|
||||
adapter._register_skill_group(_Tree())
|
||||
assert registered, "_register_skill_group did not register a command"
|
||||
return registered[0].callback, captured["autocomplete"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_autocomplete_returns_empty_for_unauthorized(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""Autocomplete must not leak the installed skill catalog to users
|
||||
who can't run /skill. With DISCORD_ALLOWED_USERS configured and the
|
||||
interaction user outside it, the autocomplete callback returns []."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
entries = [
|
||||
("alpha", "First skill", "/alpha"),
|
||||
("beta", "Second skill", "/beta"),
|
||||
]
|
||||
_handler, autocomplete = _capture_skill_registration(
|
||||
adapter, monkeypatch, entries,
|
||||
)
|
||||
|
||||
interaction = _make_interaction("999999999")
|
||||
result = await autocomplete(interaction, "")
|
||||
assert result == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_autocomplete_returns_choices_for_authorized(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""Sanity: an authorized user still gets the autocomplete suggestions."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
entries = [
|
||||
("alpha", "First skill", "/alpha"),
|
||||
("beta", "Second skill", "/beta"),
|
||||
]
|
||||
_handler, autocomplete = _capture_skill_registration(
|
||||
adapter, monkeypatch, entries,
|
||||
)
|
||||
|
||||
interaction = _make_interaction("100200300")
|
||||
result = await autocomplete(interaction, "")
|
||||
assert len(result) == 2
|
||||
assert {choice.value for choice in result} == {"alpha", "beta"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_handler_rejects_before_dispatch_for_unauthorized(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""The /skill handler must call _check_slash_authorization BEFORE
|
||||
skill_lookup. Otherwise unknown vs known names produce divergent
|
||||
responses ("Unknown skill: foo" vs auth rejection) which is a
|
||||
catalog-probing oracle."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
entries = [("alpha", "First skill", "/alpha")]
|
||||
handler, _autocomplete = _capture_skill_registration(
|
||||
adapter, monkeypatch, entries,
|
||||
)
|
||||
|
||||
# Patch _run_simple_slash so we can detect any leak through it.
|
||||
dispatched: list = []
|
||||
|
||||
async def fake_dispatch(_interaction, text):
|
||||
dispatched.append(text)
|
||||
|
||||
adapter._run_simple_slash = fake_dispatch # type: ignore[assignment]
|
||||
|
||||
interaction = _make_interaction("999999999")
|
||||
await handler(interaction, "alpha", "")
|
||||
|
||||
interaction.response.send_message.assert_awaited_once()
|
||||
args, kwargs = interaction.response.send_message.call_args
|
||||
assert kwargs.get("ephemeral") is True
|
||||
assert "not authorized" in (
|
||||
args[0] if args else kwargs.get("content", "")
|
||||
).lower()
|
||||
# Critically: nothing was dispatched, and the auth message did NOT
|
||||
# mention the skill name "alpha" (no catalog leak).
|
||||
assert dispatched == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_handler_known_and_unknown_produce_same_rejection(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""An unauthorized user probing for valid skill names must see the
|
||||
same rejection text regardless of whether the name they tried is
|
||||
on the registered catalog."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
entries = [("alpha", "First skill", "/alpha")]
|
||||
handler, _ = _capture_skill_registration(adapter, monkeypatch, entries)
|
||||
|
||||
adapter._run_simple_slash = AsyncMock() # type: ignore[assignment]
|
||||
|
||||
known_interaction = _make_interaction("999999999")
|
||||
unknown_interaction = _make_interaction("999999999")
|
||||
await handler(known_interaction, "alpha", "")
|
||||
await handler(unknown_interaction, "definitely-not-a-skill", "")
|
||||
|
||||
known_interaction.response.send_message.assert_awaited_once()
|
||||
unknown_interaction.response.send_message.assert_awaited_once()
|
||||
known_args, known_kwargs = known_interaction.response.send_message.call_args
|
||||
unknown_args, unknown_kwargs = (
|
||||
unknown_interaction.response.send_message.call_args
|
||||
)
|
||||
assert known_args == unknown_args
|
||||
assert known_kwargs == unknown_kwargs
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skill_handler_dispatches_for_authorized(
|
||||
adapter, monkeypatch,
|
||||
):
|
||||
"""Sanity: an authorized user reaches _run_simple_slash with the
|
||||
resolved cmd_key and arguments."""
|
||||
adapter._allowed_user_ids = {"100200300"}
|
||||
entries = [("alpha", "First skill", "/alpha")]
|
||||
handler, _ = _capture_skill_registration(adapter, monkeypatch, entries)
|
||||
|
||||
dispatched: list = []
|
||||
|
||||
async def fake_dispatch(_interaction, text):
|
||||
dispatched.append(text)
|
||||
|
||||
adapter._run_simple_slash = fake_dispatch # type: ignore[assignment]
|
||||
|
||||
interaction = _make_interaction("100200300")
|
||||
await handler(interaction, "alpha", "extra args")
|
||||
assert dispatched == ["/alpha extra args"]
|
||||
|
|
@ -107,6 +107,10 @@ def adapter():
|
|||
user=SimpleNamespace(id=99999, name="HermesBot"),
|
||||
)
|
||||
adapter._text_batch_delay_seconds = 0 # disable batching for tests
|
||||
# Slash auth is exercised in test_discord_slash_auth.py — bypass it here
|
||||
# so registration / dispatch / thread behavior tests don't have to
|
||||
# construct a full auth context (allowlist / channel scope).
|
||||
adapter._check_slash_authorization = AsyncMock(return_value=True)
|
||||
return adapter
|
||||
|
||||
|
||||
|
|
@ -117,6 +121,10 @@ def adapter():
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_registers_native_thread_slash_command(adapter):
|
||||
# The /thread slash closure now delegates ALL the work — including
|
||||
# defer() — to _handle_thread_create_slash so the auth gate can send
|
||||
# an ephemeral rejection on the still-unresponded interaction. The
|
||||
# closure should just forward.
|
||||
adapter._handle_thread_create_slash = AsyncMock()
|
||||
adapter._register_slash_commands()
|
||||
|
||||
|
|
@ -127,7 +135,9 @@ async def test_registers_native_thread_slash_command(adapter):
|
|||
|
||||
await command(interaction, name="Planning", message="", auto_archive_duration=1440)
|
||||
|
||||
interaction.response.defer.assert_awaited_once_with(ephemeral=True)
|
||||
# defer is now performed inside _handle_thread_create_slash, AFTER the
|
||||
# auth check passes — not by the closure.
|
||||
interaction.response.defer.assert_not_awaited()
|
||||
adapter._handle_thread_create_slash.assert_awaited_once_with(interaction, "Planning", "", 1440)
|
||||
|
||||
|
||||
|
|
@ -298,6 +308,7 @@ async def test_handle_thread_create_slash_reports_success(adapter):
|
|||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
|
||||
|
|
@ -326,6 +337,7 @@ async def test_handle_thread_create_slash_dispatches_session_when_message_provid
|
|||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
adapter._dispatch_thread_session = AsyncMock()
|
||||
|
|
@ -348,6 +360,7 @@ async def test_handle_thread_create_slash_no_dispatch_without_message(adapter):
|
|||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
adapter._dispatch_thread_session = AsyncMock()
|
||||
|
|
@ -371,6 +384,7 @@ async def test_handle_thread_create_slash_falls_back_to_seed_message(adapter):
|
|||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
guild=SimpleNamespace(name="TestGuild"),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "Kickoff", 1440)
|
||||
|
|
@ -395,6 +409,7 @@ async def test_handle_thread_create_slash_reports_failure(adapter):
|
|||
channel_id=123,
|
||||
user=SimpleNamespace(display_name="Jezza", id=42),
|
||||
followup=SimpleNamespace(send=AsyncMock()),
|
||||
response=SimpleNamespace(defer=AsyncMock()),
|
||||
)
|
||||
|
||||
await adapter._handle_thread_create_slash(interaction, "Planning", "", 1440)
|
||||
|
|
|
|||
|
|
@ -1771,6 +1771,69 @@ class TestAdapterBehavior(unittest.TestCase):
|
|||
self.assertIn("GIF downgraded to file", caption)
|
||||
self.assertIn("look", caption)
|
||||
|
||||
def test_download_remote_document_reads_response_before_httpx_client_closes(self):
|
||||
"""#18451 — snapshot Content-Type + body while the httpx.AsyncClient
|
||||
context is still active so pooled connections fully release on
|
||||
exit. Otherwise the response is only readable because httpx
|
||||
eagerly buffers it; a future refactor to .stream() would silently
|
||||
read-after-close."""
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.feishu import FeishuAdapter
|
||||
|
||||
events: list[str] = []
|
||||
|
||||
class _FakeResponse:
|
||||
headers = {"Content-Type": "application/octet-stream"}
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
events.append("raise_for_status")
|
||||
|
||||
@property
|
||||
def content(self) -> bytes:
|
||||
events.append("content_read")
|
||||
return b"doc-bytes"
|
||||
|
||||
class _FakeAsyncClient:
|
||||
def __init__(self, *_a: object, **_k: object) -> None:
|
||||
pass
|
||||
|
||||
async def __aenter__(self) -> "_FakeAsyncClient":
|
||||
events.append("client_enter")
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: object) -> None:
|
||||
events.append("client_exit")
|
||||
|
||||
async def get(self, *_a: object, **_k: object) -> _FakeResponse:
|
||||
events.append("get")
|
||||
return _FakeResponse()
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
with patch.dict(os.environ, {"HERMES_HOME": tmp}, clear=False):
|
||||
adapter = FeishuAdapter(PlatformConfig())
|
||||
|
||||
async def _run() -> tuple[str, str]:
|
||||
with patch("tools.url_safety.is_safe_url", return_value=True):
|
||||
with patch("httpx.AsyncClient", _FakeAsyncClient):
|
||||
with patch(
|
||||
"gateway.platforms.feishu.cache_document_from_bytes",
|
||||
return_value="/tmp/cached-doc.bin",
|
||||
):
|
||||
return await adapter._download_remote_document(
|
||||
"https://example.com/doc.bin",
|
||||
default_ext=".bin",
|
||||
preferred_name="doc",
|
||||
)
|
||||
|
||||
path, filename = asyncio.run(_run())
|
||||
|
||||
self.assertEqual(path, "/tmp/cached-doc.bin")
|
||||
self.assertTrue(filename)
|
||||
# content_read MUST happen before client_exit — otherwise we're
|
||||
# reading response body after the connection pool has been torn
|
||||
# down, which only works by accident (httpx's eager buffering).
|
||||
self.assertLess(events.index("content_read"), events.index("client_exit"))
|
||||
|
||||
def test_dedup_state_persists_across_adapter_restart(self):
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.feishu import FeishuAdapter
|
||||
|
|
|
|||
217
tests/gateway/test_goal_verdict_send.py
Normal file
217
tests/gateway/test_goal_verdict_send.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
"""Tests for gateway /goal verdict-message delivery.
|
||||
|
||||
The judge verdict message ("✓ Goal achieved", "⏸ budget exhausted", etc.)
|
||||
must reach the user after each turn. Before this fix the code checked
|
||||
``hasattr(adapter, "send_message")`` — but adapters expose ``send()``,
|
||||
never ``send_message``, so the check always evaluated False and users
|
||||
never saw verdicts. This test locks in the fix.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def hermes_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
from hermes_cli import goals
|
||||
|
||||
goals._DB_CACHE.clear()
|
||||
yield home
|
||||
goals._DB_CACHE.clear()
|
||||
|
||||
|
||||
def _make_source() -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
class _RecordingAdapter:
|
||||
"""Minimal adapter that records send() invocations."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._pending_messages: dict = {}
|
||||
self.sends: list[dict] = []
|
||||
|
||||
async def send(self, chat_id: str, content: str, reply_to=None, metadata=None):
|
||||
self.sends.append({"chat_id": chat_id, "content": content, "metadata": metadata})
|
||||
|
||||
class _R:
|
||||
success = True
|
||||
message_id = "mock-msg"
|
||||
|
||||
return _R()
|
||||
|
||||
|
||||
def _make_runner_with_adapter():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
|
||||
)
|
||||
runner.adapters = {}
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._queued_events = {}
|
||||
|
||||
src = _make_source()
|
||||
session_entry = SessionEntry(
|
||||
session_key=build_session_key(src),
|
||||
session_id="goal-sess-1",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store.get_or_create_session.return_value = session_entry
|
||||
runner.session_store._generate_session_key.return_value = build_session_key(src)
|
||||
|
||||
adapter = _RecordingAdapter()
|
||||
runner.adapters[Platform.TELEGRAM] = adapter
|
||||
return runner, adapter, session_entry, src
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_goal_verdict_done_sent_via_adapter_send(hermes_home):
|
||||
"""When the judge says done, the '✓ Goal achieved' message must reach
|
||||
the user through the adapter's ``send()`` method."""
|
||||
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("ship the feature")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "the feature shipped")):
|
||||
runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="I shipped the feature.",
|
||||
)
|
||||
# fire-and-forget create_task — give the loop a tick
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
assert len(adapter.sends) == 1, f"expected 1 send, got {len(adapter.sends)}: {adapter.sends}"
|
||||
msg = adapter.sends[0]
|
||||
assert msg["chat_id"] == "c1"
|
||||
assert "Goal achieved" in msg["content"]
|
||||
assert "the feature shipped" in msg["content"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_goal_verdict_continue_enqueues_continuation(hermes_home):
|
||||
"""When the judge says continue, both the 'continuing' status and the
|
||||
continuation-prompt event must be delivered. The continuation prompt is
|
||||
routed through the adapter's pending-messages FIFO so the goal loop
|
||||
proceeds on the next turn."""
|
||||
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_entry.session_id)
|
||||
mgr.set("polish the docs")
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "still needs work")):
|
||||
runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="here's a partial edit",
|
||||
)
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
# Status line sent back
|
||||
assert len(adapter.sends) == 1
|
||||
assert "Continuing toward goal" in adapter.sends[0]["content"]
|
||||
# Continuation prompt enqueued for next turn
|
||||
assert adapter._pending_messages, "continuation prompt must be enqueued in pending_messages"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_goal_verdict_budget_exhausted_sends_pause(hermes_home):
|
||||
"""When the budget is exhausted, a '⏸ Goal paused' message must be sent
|
||||
and no further continuation enqueued."""
|
||||
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
from hermes_cli.goals import GoalManager, save_goal
|
||||
|
||||
mgr = GoalManager(session_entry.session_id, default_max_turns=2)
|
||||
state = mgr.set("tiny goal", max_turns=2)
|
||||
state.turns_used = 2
|
||||
save_goal(session_entry.session_id, state)
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("continue", "keep going")):
|
||||
runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="still partial",
|
||||
)
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
assert len(adapter.sends) == 1
|
||||
content = adapter.sends[0]["content"]
|
||||
assert "paused" in content.lower()
|
||||
assert "turns used" in content.lower()
|
||||
# No continuation enqueued when budget is exhausted
|
||||
assert not adapter._pending_messages
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_goal_verdict_skipped_when_no_active_goal(hermes_home):
|
||||
"""No goal set → the hook is a no-op. Nothing is sent, nothing enqueued."""
|
||||
runner, adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="anything",
|
||||
)
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
assert adapter.sends == []
|
||||
assert adapter._pending_messages == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_goal_verdict_survives_adapter_without_send(hermes_home):
|
||||
"""Bad adapter (no ``send`` attribute) must not crash the judge hook."""
|
||||
runner, _adapter, session_entry, src = _make_runner_with_adapter()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
GoalManager(session_entry.session_id).set("survive missing send")
|
||||
|
||||
class _NoSendAdapter:
|
||||
def __init__(self):
|
||||
self._pending_messages: dict = {}
|
||||
|
||||
runner.adapters[Platform.TELEGRAM] = _NoSendAdapter()
|
||||
|
||||
with patch("hermes_cli.goals.judge_goal", return_value=("done", "ok")):
|
||||
# must not raise
|
||||
runner._post_turn_goal_continuation(
|
||||
session_entry=session_entry,
|
||||
source=src,
|
||||
final_response="whatever",
|
||||
)
|
||||
await asyncio.sleep(0.05)
|
||||
|
|
@ -8,7 +8,7 @@ to env vars nothing read on startup — the home channel appeared to set
|
|||
successfully but was lost on every new gateway session.
|
||||
"""
|
||||
|
||||
from gateway.run import _home_target_env_var
|
||||
from gateway.run import _home_target_env_var, _home_thread_env_var
|
||||
|
||||
|
||||
def test_matrix_home_target_env_var_uses_home_room():
|
||||
|
|
@ -34,3 +34,9 @@ def test_unknown_platform_home_target_env_var_falls_back_to_home_channel():
|
|||
def test_case_insensitive_platform_name():
|
||||
assert _home_target_env_var("MATRIX") == "MATRIX_HOME_ROOM"
|
||||
assert _home_target_env_var("Email") == "EMAIL_HOME_ADDRESS"
|
||||
|
||||
|
||||
def test_home_thread_env_var_uses_home_target_name_plus_thread_id():
|
||||
assert _home_thread_env_var("discord") == "DISCORD_HOME_CHANNEL_THREAD_ID"
|
||||
assert _home_thread_env_var("matrix") == "MATRIX_HOME_ROOM_THREAD_ID"
|
||||
assert _home_thread_env_var("email") == "EMAIL_HOME_ADDRESS_THREAD_ID"
|
||||
|
|
|
|||
114
tests/gateway/test_platform_http_client_limits.py
Normal file
114
tests/gateway/test_platform_http_client_limits.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""Tests for the shared httpx.Limits helper that all long-lived platform
|
||||
adapters use to tighten their keep-alive pool.
|
||||
|
||||
Context: #18451 — on macOS behind Cloudflare Warp, httpx's default
|
||||
keepalive_expiry=5s let idle CLOSE_WAIT sockets accumulate across
|
||||
multiple long-lived gateway adapters (QQ Bot, Feishu, WeCom, DingTalk,
|
||||
Signal, BlueBubbles, WeCom-callback) until the process hit the default
|
||||
256 fd limit. These tests just verify the helper returns sensibly
|
||||
tuned limits and respects env-var overrides; the actual fd-pressure
|
||||
behaviour is only observable at runtime under load.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_env(monkeypatch):
|
||||
monkeypatch.delenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", raising=False)
|
||||
|
||||
|
||||
def test_returns_none_when_httpx_unavailable(monkeypatch):
|
||||
"""If httpx can't be imported, the helper returns None so callers
|
||||
fall back to httpx's built-in Limits default without raising."""
|
||||
import gateway.platforms._http_client_limits as mod
|
||||
monkeypatch.setattr(mod, "httpx", None)
|
||||
assert mod.platform_httpx_limits() is None
|
||||
|
||||
|
||||
def test_default_limits_tighten_keepalive_below_httpx_default():
|
||||
import httpx
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
limits = platform_httpx_limits()
|
||||
assert isinstance(limits, httpx.Limits)
|
||||
# httpx default keepalive_expiry is 5.0 — ours must be shorter so
|
||||
# CLOSE_WAIT sockets drain promptly behind proxies like Warp.
|
||||
assert limits.keepalive_expiry is not None
|
||||
assert limits.keepalive_expiry < 5.0
|
||||
# max_keepalive_connections must be positive and reasonable for a
|
||||
# single adapter (platform APIs rarely parallelise beyond ~10).
|
||||
assert limits.max_keepalive_connections is not None
|
||||
assert 1 <= limits.max_keepalive_connections <= 50
|
||||
|
||||
|
||||
def test_env_override_keepalive_expiry(monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "7.5")
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
limits = platform_httpx_limits()
|
||||
assert limits.keepalive_expiry == 7.5
|
||||
|
||||
|
||||
def test_env_override_max_keepalive(monkeypatch):
|
||||
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "25")
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
limits = platform_httpx_limits()
|
||||
assert limits.max_keepalive_connections == 25
|
||||
|
||||
|
||||
def test_env_override_rejects_garbage(monkeypatch):
|
||||
"""Malformed env values fall back to defaults rather than raising."""
|
||||
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_KEEPALIVE_EXPIRY", "not-a-number")
|
||||
monkeypatch.setenv("HERMES_GATEWAY_HTTPX_MAX_KEEPALIVE", "-3")
|
||||
from gateway.platforms._http_client_limits import platform_httpx_limits
|
||||
limits = platform_httpx_limits()
|
||||
# Non-positive / non-numeric → fell back to defaults (not the override values)
|
||||
assert limits.keepalive_expiry is not None and limits.keepalive_expiry > 0
|
||||
assert limits.max_keepalive_connections is not None
|
||||
assert limits.max_keepalive_connections > 0
|
||||
|
||||
|
||||
def test_helper_is_importable_from_every_platform_that_uses_it():
|
||||
"""Every persistent-httpx-client platform adapter imports this helper.
|
||||
If any of those modules fails to import, this test surfaces it before
|
||||
the regression shows up as a runtime adapter-startup crash."""
|
||||
# Just importing exercises the helper's import path for each adapter.
|
||||
import gateway.platforms.qqbot.adapter # noqa: F401
|
||||
import gateway.platforms.wecom # noqa: F401
|
||||
import gateway.platforms.dingtalk # noqa: F401
|
||||
import gateway.platforms.signal # noqa: F401
|
||||
import gateway.platforms.bluebubbles # noqa: F401
|
||||
import gateway.platforms.wecom_callback # noqa: F401
|
||||
|
||||
|
||||
class TestWhatsappTypingLeakFix:
|
||||
"""#18451 — whatsapp.send_typing previously used a bare
|
||||
`await self._http_session.post(...)` which leaked the aiohttp
|
||||
response object until GC, holding its TCP socket in CLOSE_WAIT.
|
||||
Must now wrap the call in `async with` so the response is
|
||||
released immediately when the call returns.
|
||||
|
||||
We verify by inspecting the source text rather than exercising
|
||||
the coroutine — the test suite would otherwise need a live
|
||||
aiohttp server, and the contract we care about is structural.
|
||||
"""
|
||||
|
||||
def test_bare_await_removed(self):
|
||||
import inspect
|
||||
import gateway.platforms.whatsapp as mod
|
||||
|
||||
src = inspect.getsource(mod.WhatsAppAdapter.send_typing)
|
||||
# The fix must be structural: the post() call is inside an
|
||||
# `async with`, not a bare `await`.
|
||||
assert "async with self._http_session.post(" in src, (
|
||||
"send_typing must wrap self._http_session.post(...) in "
|
||||
"`async with` to release the aiohttp response socket "
|
||||
"(#18451). Otherwise the response sits in CLOSE_WAIT "
|
||||
"until GC."
|
||||
)
|
||||
# The old bare-await form must be gone.
|
||||
assert "await self._http_session.post(" not in src
|
||||
244
tests/gateway/test_reload_skills_discord_resync.py
Normal file
244
tests/gateway/test_reload_skills_discord_resync.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
"""Tests for `/reload-skills` resyncing the Discord ``/skill`` autocomplete.
|
||||
|
||||
Before this change, ``_register_skill_group`` captured the skill catalog
|
||||
in closure variables (``entries`` and ``skill_lookup``) so that the one
|
||||
``tree.add_command`` call at startup owned the only live copy of the
|
||||
skill list. The closure is never re-entered after startup, so
|
||||
``/reload-skills`` (which rescans the on-disk skill dir and refreshes
|
||||
the in-process registry) had no way to propagate its results into the
|
||||
autocomplete — new skills stayed invisible in the dropdown and deleted
|
||||
skills returned an "Unknown skill" error when the stale autocomplete
|
||||
entry was clicked.
|
||||
|
||||
The fix promotes those two variables to instance attributes
|
||||
(``_skill_entries`` / ``_skill_lookup``) and exposes a
|
||||
``refresh_skill_group()`` method that rescans and mutates them in
|
||||
place. The gateway ``_handle_reload_skills_command`` iterates its
|
||||
connected adapters and calls the method on any that expose it.
|
||||
|
||||
No ``tree.sync()`` is required because Discord fetches autocomplete
|
||||
options dynamically on every keystroke — we only need to rebind the
|
||||
data the live callbacks already read from.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
"""Construct a DiscordAdapter without going through __init__ / token checks."""
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
from gateway.platforms.base import Platform
|
||||
adapter = object.__new__(DiscordAdapter)
|
||||
adapter.config = MagicMock()
|
||||
adapter.config.extra = {}
|
||||
# ``platform`` is set by BasePlatformAdapter.__init__, which we skip
|
||||
# above; the inherited ``.name`` property dereferences it for log
|
||||
# formatting, so set it explicitly.
|
||||
adapter.platform = Platform.DISCORD
|
||||
return adapter
|
||||
|
||||
|
||||
class TestRefreshSkillGroup:
|
||||
def test_refresh_repopulates_entries_after_catalog_change(
|
||||
self, monkeypatch
|
||||
) -> None:
|
||||
"""The initial catalog is replaced wholesale on refresh.
|
||||
|
||||
Mirrors the observable /reload-skills case: a user adds a new
|
||||
skill to ~/.hermes/skills/, runs /reload-skills, and expects
|
||||
the autocomplete to surface it on the very next keystroke.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
|
||||
# Start-of-process state: /register built the catalog from the
|
||||
# original collector output.
|
||||
adapter._skill_entries = [
|
||||
("old-skill", "Pre-existing skill", "/old-skill"),
|
||||
]
|
||||
adapter._skill_lookup = {"old-skill": ("Pre-existing skill", "/old-skill")}
|
||||
adapter._skill_group_reserved_names = set()
|
||||
adapter._skill_group_hidden_count = 0
|
||||
|
||||
# User adds new-skill to disk and removes old-skill.
|
||||
def fake_collector(*, reserved_names):
|
||||
return (
|
||||
{"creative": [("new-skill", "Fresh skill", "/new-skill")]}, # categories
|
||||
[], # uncategorized
|
||||
0, # hidden
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.commands.discord_skill_commands_by_category",
|
||||
fake_collector,
|
||||
)
|
||||
|
||||
new_count, hidden = adapter.refresh_skill_group()
|
||||
|
||||
assert new_count == 1
|
||||
assert hidden == 0
|
||||
# Old skill is gone, new skill is present.
|
||||
names = [n for n, _d, _k in adapter._skill_entries]
|
||||
assert names == ["new-skill"]
|
||||
assert "old-skill" not in adapter._skill_lookup
|
||||
assert adapter._skill_lookup["new-skill"] == ("Fresh skill", "/new-skill")
|
||||
|
||||
def test_refresh_sorts_entries_alphabetically(self, monkeypatch) -> None:
|
||||
"""Autocomplete order must be stable and predictable across refreshes."""
|
||||
adapter = _make_adapter()
|
||||
adapter._skill_entries = []
|
||||
adapter._skill_lookup = {}
|
||||
adapter._skill_group_reserved_names = set()
|
||||
adapter._skill_group_hidden_count = 0
|
||||
|
||||
def fake_collector(*, reserved_names):
|
||||
# Intentionally unsorted — the fix must resort.
|
||||
return (
|
||||
{"zzz": [("zebra", "", "/zebra")]},
|
||||
[("alpha", "", "/alpha")],
|
||||
0,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.commands.discord_skill_commands_by_category",
|
||||
fake_collector,
|
||||
)
|
||||
|
||||
adapter.refresh_skill_group()
|
||||
|
||||
names = [n for n, _d, _k in adapter._skill_entries]
|
||||
assert names == sorted(names) == ["alpha", "zebra"]
|
||||
|
||||
def test_refresh_handles_collector_exception_gracefully(
|
||||
self, monkeypatch
|
||||
) -> None:
|
||||
"""A broken collector must not take down /reload-skills."""
|
||||
adapter = _make_adapter()
|
||||
adapter._skill_entries = [("keep", "kept", "/keep")]
|
||||
adapter._skill_lookup = {"keep": ("kept", "/keep")}
|
||||
adapter._skill_group_reserved_names = set()
|
||||
adapter._skill_group_hidden_count = 0
|
||||
|
||||
def boom(*, reserved_names):
|
||||
raise RuntimeError("simulated collector failure")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.commands.discord_skill_commands_by_category",
|
||||
boom,
|
||||
)
|
||||
|
||||
new_count, hidden = adapter.refresh_skill_group()
|
||||
# Returns previously-cached count, no crash, existing entries
|
||||
# preserved so the live autocomplete keeps working.
|
||||
assert new_count == 1
|
||||
assert hidden == 0
|
||||
assert adapter._skill_entries == [("keep", "kept", "/keep")]
|
||||
|
||||
|
||||
class TestRegisterSkillGroupUsesInstanceState:
|
||||
"""The closure-based ``entries`` / ``skill_lookup`` must be gone.
|
||||
|
||||
If the callbacks in ``_register_skill_group`` still close over
|
||||
local variables instead of reading from ``self``, the refresh
|
||||
method is useless — autocomplete will keep serving the stale list.
|
||||
|
||||
The full slash-command registration path pulls in ``discord.app_commands``
|
||||
decorators (``@describe`` / ``@autocomplete`` / ``Command``), which
|
||||
are unstubbed in the hermetic test env. We assert the data-shaped
|
||||
side-effects instead: after ``_register_skill_group`` returns
|
||||
(successfully or not), ``_skill_entries`` and ``_skill_lookup`` must
|
||||
be populated from the collector output, because
|
||||
``_refresh_skill_catalog_state`` runs before any decorator evaluation.
|
||||
"""
|
||||
|
||||
def test_refresh_catalog_state_populates_instance_attrs(
|
||||
self, monkeypatch
|
||||
) -> None:
|
||||
adapter = _make_adapter()
|
||||
adapter._skill_group_reserved_names = set()
|
||||
|
||||
def fake_collector(*, reserved_names):
|
||||
return (
|
||||
{"creative": [("ascii-art", "Make ASCII", "/ascii-art")]},
|
||||
[],
|
||||
0,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.commands.discord_skill_commands_by_category",
|
||||
fake_collector,
|
||||
)
|
||||
|
||||
adapter._refresh_skill_catalog_state()
|
||||
|
||||
# Instance-level state populated — the autocomplete + handler
|
||||
# callbacks both read from these, so `refresh_skill_group`
|
||||
# mutating them in place is enough to pick up new skills.
|
||||
assert adapter._skill_entries == [
|
||||
("ascii-art", "Make ASCII", "/ascii-art"),
|
||||
]
|
||||
assert adapter._skill_lookup == {
|
||||
"ascii-art": ("Make ASCII", "/ascii-art"),
|
||||
}
|
||||
assert adapter._skill_group_hidden_count == 0
|
||||
|
||||
|
||||
class TestHandleReloadSkillsCallsRefreshSkillGroup:
|
||||
"""Gateway-side integration: /reload-skills must call refresh on adapters."""
|
||||
|
||||
def test_orchestrator_calls_refresh_skill_group_on_every_adapter(self):
|
||||
"""Sync + async refresh_skill_group implementations both get awaited/called.
|
||||
|
||||
The orchestrator iterates ``self.adapters`` and calls
|
||||
``refresh_skill_group`` if it exists. Adapters that don't
|
||||
implement it (today: everything except Discord) are silently
|
||||
skipped without raising.
|
||||
"""
|
||||
import asyncio
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
# Import without constructing a real runner — test the method
|
||||
# directly against an ``object.__new__`` instance.
|
||||
from gateway.run import GatewayRunner
|
||||
runner = object.__new__(GatewayRunner)
|
||||
|
||||
sync_refresh = MagicMock(return_value=(5, 0))
|
||||
async_called = {"flag": False}
|
||||
|
||||
class AsyncAdapter:
|
||||
name = "async-platform"
|
||||
async def refresh_skill_group(self):
|
||||
async_called["flag"] = True
|
||||
return (3, 0)
|
||||
|
||||
class SyncAdapter:
|
||||
name = "sync-platform"
|
||||
refresh_skill_group = sync_refresh
|
||||
|
||||
class NoOpAdapter:
|
||||
name = "other"
|
||||
# No refresh_skill_group — must not crash.
|
||||
|
||||
runner.adapters = {
|
||||
"discord": AsyncAdapter(),
|
||||
"slack": SyncAdapter(),
|
||||
"telegram": NoOpAdapter(),
|
||||
}
|
||||
|
||||
# Mock reload_skills itself so no disk scan runs.
|
||||
fake_result = {"added": [], "removed": [], "total": 7}
|
||||
with patch(
|
||||
"agent.skill_commands.reload_skills", return_value=fake_result
|
||||
):
|
||||
event = MagicMock()
|
||||
event.source = MagicMock()
|
||||
# _session_key_for_source may be called — make it safe.
|
||||
runner._session_key_for_source = lambda src: None
|
||||
runner._pending_skills_reload_notes = {}
|
||||
|
||||
result = asyncio.get_event_loop().run_until_complete(
|
||||
runner._handle_reload_skills_command(event)
|
||||
)
|
||||
|
||||
assert "Skills Reloaded" in result
|
||||
assert sync_refresh.called, "sync adapter refresh must be invoked"
|
||||
assert async_called["flag"], "async adapter refresh must be awaited"
|
||||
|
|
@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock
|
|||
import pytest
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import MessageEvent, MessageType
|
||||
from gateway.config import HomeChannel, Platform
|
||||
from gateway.platforms.base import MessageEvent, MessageType, SendResult
|
||||
from gateway.session import build_session_key
|
||||
from tests.gateway.restart_test_helpers import (
|
||||
make_restart_runner,
|
||||
|
|
@ -17,6 +17,22 @@ from tests.gateway.restart_test_helpers import (
|
|||
)
|
||||
|
||||
|
||||
# ── restart marker helpers ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_restart_notification_pending_false_without_marker(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
assert gateway_run._restart_notification_pending() is False
|
||||
|
||||
|
||||
def test_restart_notification_pending_true_with_marker(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
(tmp_path / ".restart_notify.json").write_text("{}")
|
||||
|
||||
assert gateway_run._restart_notification_pending() is True
|
||||
|
||||
|
||||
# ── _handle_restart_command writes .restart_notify.json ──────────────────
|
||||
|
||||
|
||||
|
|
@ -143,6 +159,184 @@ async def test_restart_command_uses_atomic_json_writes_for_marker_files(tmp_path
|
|||
assert calls[1][1]["platform"] == "telegram"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sethome_updates_running_config_for_same_process_restart(tmp_path, monkeypatch):
|
||||
"""/sethome persists to env and updates in-memory config before restart."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
saved = {}
|
||||
|
||||
def _fake_save_env_value(key, value):
|
||||
saved[key] = value
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
source = make_restart_source(chat_id="home-42")
|
||||
source.chat_name = "Ops Home"
|
||||
event = MessageEvent(
|
||||
text="/sethome",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m-home",
|
||||
)
|
||||
|
||||
result = await runner._handle_set_home_command(event)
|
||||
|
||||
home = runner.config.get_home_channel(Platform.TELEGRAM)
|
||||
assert "Home channel set" in result
|
||||
assert saved["TELEGRAM_HOME_CHANNEL"] == "home-42"
|
||||
assert home is not None
|
||||
assert home.chat_id == "home-42"
|
||||
assert home.name == "Ops Home"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sethome_preserves_thread_target_for_same_process_restart(tmp_path, monkeypatch):
|
||||
"""/sethome from a topic/thread stores the thread-aware home target."""
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
saved = {}
|
||||
|
||||
def _fake_save_env_value(key, value):
|
||||
saved[key] = value
|
||||
|
||||
monkeypatch.setattr("hermes_cli.config.save_env_value", _fake_save_env_value)
|
||||
|
||||
runner, _adapter = make_restart_runner()
|
||||
source = make_restart_source(chat_id="parent-42", thread_id="topic-7")
|
||||
source.chat_name = "Ops Topic"
|
||||
event = MessageEvent(
|
||||
text="/sethome",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="m-home-thread",
|
||||
)
|
||||
|
||||
result = await runner._handle_set_home_command(event)
|
||||
|
||||
home = runner.config.get_home_channel(Platform.TELEGRAM)
|
||||
assert "Home channel set" in result
|
||||
assert saved["TELEGRAM_HOME_CHANNEL"] == "parent-42"
|
||||
assert saved["TELEGRAM_HOME_CHANNEL_THREAD_ID"] == "topic-7"
|
||||
assert home is not None
|
||||
assert home.chat_id == "parent-42"
|
||||
assert home.thread_id == "topic-7"
|
||||
|
||||
|
||||
# ── home-channel startup notifications ─────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_home_channel_startup_notification_to_configured_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="home-42",
|
||||
name="Ops Home",
|
||||
)
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
delivered = await runner._send_home_channel_startup_notifications()
|
||||
|
||||
assert delivered == {("telegram", "home-42", None)}
|
||||
adapter.send.assert_called_once_with(
|
||||
"home-42",
|
||||
"♻️ Gateway online — Hermes is back and ready.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_home_channel_startup_notification_preserves_thread_metadata(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="parent-42",
|
||||
name="Ops Topic",
|
||||
thread_id="topic-7",
|
||||
)
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
|
||||
|
||||
delivered = await runner._send_home_channel_startup_notifications()
|
||||
|
||||
assert delivered == {("telegram", "parent-42", "topic-7")}
|
||||
adapter.send.assert_called_once_with(
|
||||
"parent-42",
|
||||
"♻️ Gateway online — Hermes is back and ready.",
|
||||
metadata={"thread_id": "topic-7"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_home_channel_startup_notification_skips_restart_target(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="42",
|
||||
name="Ops Home",
|
||||
)
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
delivered = await runner._send_home_channel_startup_notifications(
|
||||
skip_targets={("telegram", "42", None)}
|
||||
)
|
||||
|
||||
assert delivered == set()
|
||||
adapter.send.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_home_channel_startup_notification_does_not_skip_different_thread(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="42",
|
||||
name="Ops Home",
|
||||
)
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="home"))
|
||||
|
||||
delivered = await runner._send_home_channel_startup_notifications(
|
||||
skip_targets={("telegram", "42", "topic-7")}
|
||||
)
|
||||
|
||||
assert delivered == {("telegram", "42", None)}
|
||||
adapter.send.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_home_channel_startup_notification_ignores_false_send_result(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="home-42",
|
||||
name="Ops Home",
|
||||
)
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
|
||||
|
||||
delivered = await runner._send_home_channel_startup_notifications()
|
||||
|
||||
assert delivered == set()
|
||||
adapter.send.assert_called_once()
|
||||
|
||||
|
||||
# ── _send_restart_notification ───────────────────────────────────────────
|
||||
|
||||
|
||||
|
|
@ -160,8 +354,9 @@ async def test_send_restart_notification_delivers_and_cleans_up(tmp_path, monkey
|
|||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
delivered_target = await runner._send_restart_notification()
|
||||
|
||||
assert delivered_target == ("telegram", "42", None)
|
||||
adapter.send.assert_called_once()
|
||||
call_args = adapter.send.call_args
|
||||
assert call_args[0][0] == "42" # chat_id
|
||||
|
|
@ -185,8 +380,9 @@ async def test_send_restart_notification_with_thread(tmp_path, monkeypatch):
|
|||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock()
|
||||
|
||||
await runner._send_restart_notification()
|
||||
delivered_target = await runner._send_restart_notification()
|
||||
|
||||
assert delivered_target == ("telegram", "99", "topic_7")
|
||||
call_args = adapter.send.call_args
|
||||
assert call_args[1]["metadata"] == {"thread_id": "topic_7"}
|
||||
assert not notify_path.exists()
|
||||
|
|
@ -240,6 +436,94 @@ async def test_send_restart_notification_cleans_up_on_send_failure(
|
|||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock(side_effect=RuntimeError("network down"))
|
||||
|
||||
await runner._send_restart_notification()
|
||||
delivered_target = await runner._send_restart_notification()
|
||||
|
||||
assert not notify_path.exists() # cleaned up despite error
|
||||
# File cleaned up even though send raised.
|
||||
assert delivered_target is None
|
||||
assert not notify_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_logs_warning_on_sendresult_failure(
|
||||
tmp_path, monkeypatch, caplog
|
||||
):
|
||||
"""Adapter that returns SendResult(success=False) must log a WARNING, not INFO.
|
||||
|
||||
Regression guard: adapter.send() catches provider errors (e.g. Telegram
|
||||
"Chat not found") and returns SendResult(success=False) rather than
|
||||
raising. The caller previously ignored the return value and always
|
||||
logged "Sent restart notification to ..." at INFO — masking real
|
||||
delivery failures behind a fake success line.
|
||||
"""
|
||||
from gateway.platforms.base import SendResult
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "telegram",
|
||||
"chat_id": "42",
|
||||
}))
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock(
|
||||
return_value=SendResult(success=False, error="Chat not found"),
|
||||
)
|
||||
|
||||
with caplog.at_level("DEBUG", logger="gateway.run"):
|
||||
delivered_target = await runner._send_restart_notification()
|
||||
|
||||
success_lines = [
|
||||
r for r in caplog.records
|
||||
if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
|
||||
]
|
||||
warning_lines = [
|
||||
r for r in caplog.records
|
||||
if r.levelname == "WARNING"
|
||||
and "was not delivered" in r.getMessage()
|
||||
and "Chat not found" in r.getMessage()
|
||||
]
|
||||
assert delivered_target is None
|
||||
assert not success_lines, (
|
||||
"Expected no INFO 'Sent restart notification' line when send failed, "
|
||||
f"got: {[r.getMessage() for r in success_lines]}"
|
||||
)
|
||||
assert warning_lines, (
|
||||
"Expected a WARNING line mentioning the failure; "
|
||||
f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
|
||||
)
|
||||
# Still cleans up.
|
||||
assert not notify_path.exists()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_restart_notification_logs_info_on_sendresult_success(
|
||||
tmp_path, monkeypatch, caplog
|
||||
):
|
||||
"""Adapter returning SendResult(success=True) keeps the INFO log line."""
|
||||
from gateway.platforms.base import SendResult
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
|
||||
notify_path = tmp_path / ".restart_notify.json"
|
||||
notify_path.write_text(json.dumps({
|
||||
"platform": "telegram",
|
||||
"chat_id": "42",
|
||||
}))
|
||||
|
||||
runner, adapter = make_restart_runner()
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="m-1"))
|
||||
|
||||
with caplog.at_level("DEBUG", logger="gateway.run"):
|
||||
delivered_target = await runner._send_restart_notification()
|
||||
|
||||
success_lines = [
|
||||
r for r in caplog.records
|
||||
if r.levelname == "INFO" and "Sent restart notification" in r.getMessage()
|
||||
]
|
||||
assert delivered_target == ("telegram", "42", None)
|
||||
assert success_lines, (
|
||||
"Expected INFO 'Sent restart notification' when send succeeded; "
|
||||
f"got records: {[(r.levelname, r.getMessage()) for r in caplog.records]}"
|
||||
)
|
||||
assert not notify_path.exists()
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.config import GatewayConfig, HomeChannel, Platform, PlatformConfig
|
||||
from gateway.platforms.base import SendResult
|
||||
from gateway.run import (
|
||||
_auto_continue_freshness_window,
|
||||
_coerce_gateway_timestamp,
|
||||
|
|
@ -376,8 +377,8 @@ class TestSuspendRecentlyActiveSkipsResumePending:
|
|||
assert e.suspended is False
|
||||
assert e.resume_pending is True
|
||||
|
||||
def test_non_resume_pending_still_suspended(self, tmp_path):
|
||||
"""Non-resume sessions still get the old crash-recovery suspension."""
|
||||
def test_non_resume_pending_gets_resume_pending(self, tmp_path):
|
||||
"""Non-resume sessions are now marked resume_pending (not suspended)."""
|
||||
store = _make_store(tmp_path)
|
||||
source_a = _make_source(chat_id="a")
|
||||
source_b = _make_source(chat_id="b")
|
||||
|
|
@ -386,9 +387,11 @@ class TestSuspendRecentlyActiveSkipsResumePending:
|
|||
store.mark_resume_pending(entry_a.session_key)
|
||||
|
||||
count = store.suspend_recently_active()
|
||||
# entry_a is already resume_pending → skipped. entry_b gets marked.
|
||||
assert count == 1
|
||||
assert store._entries[entry_a.session_key].suspended is False
|
||||
assert store._entries[entry_b.session_key].suspended is True
|
||||
assert store._entries[entry_b.session_key].resume_pending is True
|
||||
assert store._entries[entry_b.session_key].suspended is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -929,6 +932,84 @@ async def test_restart_banner_uses_try_to_resume_wording():
|
|||
assert "try to resume" in msg
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_notifies_home_channel_even_without_active_sessions():
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_requested = True
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="home-42",
|
||||
name="Ops Home",
|
||||
)
|
||||
|
||||
await runner._notify_active_sessions_of_shutdown()
|
||||
|
||||
assert adapter.sent == [
|
||||
"⚠️ Gateway restarting — Your current task will be interrupted. "
|
||||
"Send any message after restart and I'll try to resume where you left off."
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_home_channel_notification_dedupes_active_chat():
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_requested = True
|
||||
runner._running_agents["agent:main:telegram:dm:999"] = MagicMock()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="999",
|
||||
name="Ops Home",
|
||||
)
|
||||
|
||||
await runner._notify_active_sessions_of_shutdown()
|
||||
|
||||
assert len(adapter.sent) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_home_channel_notification_not_deduped_across_threads():
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_requested = True
|
||||
session_key = "agent:main:telegram:group:999"
|
||||
runner.session_store._entries[session_key] = MagicMock(
|
||||
origin=SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="999",
|
||||
chat_type="group",
|
||||
user_id="u1",
|
||||
thread_id="topic-7",
|
||||
)
|
||||
)
|
||||
runner._running_agents[session_key] = MagicMock()
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="999",
|
||||
name="Ops Home",
|
||||
)
|
||||
|
||||
await runner._notify_active_sessions_of_shutdown()
|
||||
|
||||
assert len(adapter.sent) == 2
|
||||
assert adapter.sent_calls[0][2] == {"thread_id": "topic-7"}
|
||||
assert adapter.sent_calls[1][2] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_restart_home_channel_notification_ignores_false_send_result():
|
||||
runner, adapter = make_restart_runner()
|
||||
runner._restart_requested = True
|
||||
runner.config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="home-42",
|
||||
name="Ops Home",
|
||||
)
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=False, error="network down"))
|
||||
|
||||
await runner._notify_active_sessions_of_shutdown()
|
||||
|
||||
adapter.send.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stuck-loop escalation integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -231,6 +231,55 @@ class TestSlackConnectCleanup:
|
|||
mock_release.assert_called_once_with("slack-app-token", "xapp-fake")
|
||||
assert adapter._platform_lock_identity is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_closes_previous_handler_to_prevent_zombie_socket(self):
|
||||
"""Regression for #18980: calling connect() on an adapter that already has
|
||||
a live handler (e.g. during a gateway restart) must close the old
|
||||
AsyncSocketModeHandler before creating a new one. Without this guard,
|
||||
the old Socket Mode websocket stays alive and both connections dispatch
|
||||
every Slack event, producing double responses — the same bug that
|
||||
affected DiscordAdapter (#18187).
|
||||
"""
|
||||
config = PlatformConfig(enabled=True, token="xoxb-fake")
|
||||
adapter = SlackAdapter(config)
|
||||
|
||||
# Simulate state left over from a prior connect() call.
|
||||
first_handler = AsyncMock()
|
||||
first_handler.close_async = AsyncMock()
|
||||
adapter._handler = first_handler
|
||||
|
||||
mock_app = MagicMock()
|
||||
def _noop_decorator(event_type):
|
||||
def decorator(fn): return fn
|
||||
return decorator
|
||||
mock_app.event = _noop_decorator
|
||||
mock_app.command = _noop_decorator
|
||||
mock_app.action = _noop_decorator
|
||||
mock_app.client = AsyncMock()
|
||||
|
||||
mock_web_client = AsyncMock()
|
||||
mock_web_client.auth_test = AsyncMock(return_value={
|
||||
"user_id": "U_BOT",
|
||||
"user": "testbot",
|
||||
"team_id": "T_FAKE",
|
||||
"team": "FakeTeam",
|
||||
})
|
||||
|
||||
second_handler = MagicMock()
|
||||
|
||||
with patch.object(_slack_mod, "AsyncApp", return_value=mock_app), \
|
||||
patch.object(_slack_mod, "AsyncWebClient", return_value=mock_web_client), \
|
||||
patch.object(_slack_mod, "AsyncSocketModeHandler", return_value=second_handler), \
|
||||
patch.dict(os.environ, {"SLACK_APP_TOKEN": "xapp-fake"}), \
|
||||
patch("gateway.status.acquire_scoped_lock", return_value=(True, None)), \
|
||||
patch("gateway.status.release_scoped_lock"), \
|
||||
patch("asyncio.create_task"):
|
||||
result = await adapter.connect()
|
||||
|
||||
assert result is True
|
||||
first_handler.close_async.assert_awaited_once_with()
|
||||
assert adapter._handler is second_handler
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSlackProxyBehavior
|
||||
|
|
|
|||
|
|
@ -132,6 +132,7 @@ async def test_reconnect_success_resets_error_count():
|
|||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock(return_value=MagicMock()) # heartbeat probe path
|
||||
adapter._app = mock_app
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
|
|
@ -139,6 +140,15 @@ async def test_reconnect_success_resets_error_count():
|
|||
|
||||
assert adapter._polling_network_error_count == 0
|
||||
|
||||
# Clean up the heartbeat-probe task scheduled after a successful reconnect.
|
||||
pending = [t for t in adapter._background_tasks if not t.done()]
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
try:
|
||||
await t
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_triggers_fatal_after_max_retries():
|
||||
|
|
@ -284,3 +294,182 @@ async def test_drain_helper_noop_without_app():
|
|||
adapter._app = None
|
||||
# Should not raise
|
||||
await adapter._drain_polling_connections()
|
||||
|
||||
|
||||
# ── Heartbeat probe ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_heartbeat_probe_no_op_when_polling_healthy():
|
||||
"""
|
||||
Probe scheduled after a successful reconnect: Updater.running=True and
|
||||
bot.get_me() returns quickly → recovery confirmed, no further action.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
|
||||
mock_updater = MagicMock()
|
||||
mock_updater.running = True
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
|
||||
adapter._app = mock_app
|
||||
|
||||
adapter._handle_polling_network_error = AsyncMock()
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
await adapter._verify_polling_after_reconnect()
|
||||
|
||||
mock_app.bot.get_me.assert_awaited_once()
|
||||
adapter._handle_polling_network_error.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_heartbeat_probe_reenters_ladder_when_updater_not_running():
|
||||
"""
|
||||
If Updater.running has flipped to False by the heartbeat delay, treat
|
||||
as wedged: re-enter the reconnect ladder.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
|
||||
mock_updater = MagicMock()
|
||||
mock_updater.running = False
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock()
|
||||
adapter._app = mock_app
|
||||
|
||||
adapter._handle_polling_network_error = AsyncMock()
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
await adapter._verify_polling_after_reconnect()
|
||||
|
||||
mock_app.bot.get_me.assert_not_called()
|
||||
adapter._handle_polling_network_error.assert_awaited_once()
|
||||
err = adapter._handle_polling_network_error.await_args.args[0]
|
||||
assert isinstance(err, RuntimeError)
|
||||
assert "not running" in str(err).lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_heartbeat_probe_reenters_ladder_when_get_me_times_out():
|
||||
"""
|
||||
If bot.get_me() hangs longer than PROBE_TIMEOUT, treat as wedged.
|
||||
Simulates the connection-pool wedge that motivated this fix.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
|
||||
mock_updater = MagicMock()
|
||||
mock_updater.running = True
|
||||
|
||||
async def hang_forever(*args, **kwargs):
|
||||
await asyncio.sleep(3600)
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock(side_effect=hang_forever)
|
||||
adapter._app = mock_app
|
||||
|
||||
adapter._handle_polling_network_error = AsyncMock()
|
||||
|
||||
async def fast_wait_for(coro, timeout):
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
raise asyncio.TimeoutError()
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
with patch("gateway.platforms.telegram.asyncio.wait_for", new=fast_wait_for):
|
||||
await adapter._verify_polling_after_reconnect()
|
||||
|
||||
adapter._handle_polling_network_error.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_heartbeat_probe_reenters_ladder_on_get_me_network_error():
|
||||
"""
|
||||
Any exception raised by bot.get_me() (NetworkError, ConnectionError, etc.)
|
||||
should re-enter the reconnect ladder with the original exception.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
|
||||
mock_updater = MagicMock()
|
||||
mock_updater.running = True
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock(side_effect=ConnectionError("pool wedged"))
|
||||
adapter._app = mock_app
|
||||
|
||||
adapter._handle_polling_network_error = AsyncMock()
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
await adapter._verify_polling_after_reconnect()
|
||||
|
||||
adapter._handle_polling_network_error.assert_awaited_once()
|
||||
assert isinstance(
|
||||
adapter._handle_polling_network_error.await_args.args[0], ConnectionError
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_heartbeat_probe_skips_when_already_fatal():
|
||||
"""
|
||||
If the adapter is already in fatal-error state by the time the probe
|
||||
delay elapses, the probe should bail without further action.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
adapter._set_fatal_error("telegram_polling_conflict", "already fatal", retryable=False)
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.bot.get_me = AsyncMock()
|
||||
adapter._app = mock_app
|
||||
|
||||
adapter._handle_polling_network_error = AsyncMock()
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
await adapter._verify_polling_after_reconnect()
|
||||
|
||||
mock_app.bot.get_me.assert_not_called()
|
||||
adapter._handle_polling_network_error.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconnect_schedules_heartbeat_probe_on_success():
|
||||
"""
|
||||
After a successful start_polling() in the reconnect path, a probe task
|
||||
must be added to _background_tasks. Without it, a wedged Updater would
|
||||
sit silent indefinitely with no further error_callback to advance the
|
||||
reconnect ladder.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
adapter._polling_network_error_count = 1
|
||||
|
||||
mock_updater = MagicMock()
|
||||
mock_updater.running = True
|
||||
mock_updater.stop = AsyncMock()
|
||||
mock_updater.start_polling = AsyncMock() # succeeds
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_app.updater = mock_updater
|
||||
mock_app.bot.get_me = AsyncMock(return_value=MagicMock())
|
||||
adapter._app = mock_app
|
||||
|
||||
initial_count = len(adapter._background_tasks)
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
await adapter._handle_polling_network_error(Exception("Bad Gateway"))
|
||||
|
||||
assert len(adapter._background_tasks) > initial_count, (
|
||||
"Expected a heartbeat probe task to be scheduled after a successful "
|
||||
"reconnect's start_polling()"
|
||||
)
|
||||
|
||||
# Clean up.
|
||||
pending = [t for t in adapter._background_tasks if not t.done()]
|
||||
for t in pending:
|
||||
t.cancel()
|
||||
try:
|
||||
await t
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
|
|
|||
185
tests/gateway/test_unavailable_skill_hint.py
Normal file
185
tests/gateway/test_unavailable_skill_hint.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
"""Tests for gateway.run._check_unavailable_skill.
|
||||
|
||||
Regression coverage for the dir-name-vs-frontmatter-name drift bug.
|
||||
The hint function used to compare the skill's parent-directory name
|
||||
against the typed command and the disabled list. That silently missed
|
||||
every skill whose directory name differs from its declared frontmatter
|
||||
name (~19 skills on a standard install), so users typing a real slug
|
||||
like ``/stable-diffusion-image-generation`` got a generic "unknown
|
||||
command" response instead of the intended "disabled — enable with …"
|
||||
or "not installed — install with …" hint.
|
||||
|
||||
These tests pin the fixed behavior:
|
||||
|
||||
* Slug is derived from the frontmatter ``name:`` (exactly matching
|
||||
:func:`agent.skill_commands.scan_skill_commands`), so the slug differs
|
||||
from the directory name when the declared name is multi-word.
|
||||
* ``disabled`` membership is checked by the declared name, because that
|
||||
is what :func:`hermes_cli.skills_config.save_disabled_skills` stores.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_skills(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Isolated skills dir + HERMES_HOME so the real user config is untouched."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
(home / "skills").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
return home / "skills"
|
||||
|
||||
|
||||
def _write_skill(skills_dir: Path, rel: str, frontmatter_name: str) -> Path:
|
||||
"""Create a SKILL.md at ``<skills_dir>/<rel>/SKILL.md``."""
|
||||
skill_dir = skills_dir / rel
|
||||
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text(
|
||||
f"---\nname: {frontmatter_name}\ndescription: test skill\n---\nBody.\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return skill_md
|
||||
|
||||
|
||||
def test_frontmatter_slug_matched_even_when_dir_name_differs(
|
||||
tmp_skills: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Directory ``stable-diffusion`` + frontmatter ``Stable Diffusion Image Generation``.
|
||||
|
||||
Command typed: ``stable-diffusion-image-generation`` (the slug the
|
||||
agent actually registers). The old dir-name-based check would have
|
||||
compared ``stable-diffusion`` to the typed command and missed.
|
||||
"""
|
||||
from gateway import run as gateway_run
|
||||
|
||||
_write_skill(tmp_skills, "mlops/stable-diffusion", "Stable Diffusion Image Generation")
|
||||
|
||||
# Config disables by declared name (matches what `hermes skills config` writes).
|
||||
monkeypatch.setattr(
|
||||
"gateway.run._get_disabled_skill_names",
|
||||
lambda: {"Stable Diffusion Image Generation"},
|
||||
raising=False,
|
||||
)
|
||||
with patch(
|
||||
"tools.skills_tool._get_disabled_skill_names",
|
||||
return_value={"Stable Diffusion Image Generation"},
|
||||
), patch(
|
||||
"agent.skill_utils.get_all_skills_dirs",
|
||||
return_value=[tmp_skills],
|
||||
):
|
||||
msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
|
||||
|
||||
assert msg is not None, (
|
||||
"expected a 'disabled' hint for the frontmatter-derived slug; "
|
||||
"the old code compared the dir name 'stable-diffusion' and returned None"
|
||||
)
|
||||
assert "disabled" in msg.lower()
|
||||
assert "hermes skills config" in msg
|
||||
|
||||
|
||||
def test_unknown_command_still_returns_none(
|
||||
tmp_skills: Path,
|
||||
) -> None:
|
||||
"""A command that matches no on-disk skill still returns None."""
|
||||
from gateway import run as gateway_run
|
||||
|
||||
_write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
|
||||
|
||||
with patch(
|
||||
"tools.skills_tool._get_disabled_skill_names", return_value=set()
|
||||
), patch(
|
||||
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
|
||||
):
|
||||
assert gateway_run._check_unavailable_skill("no-such-skill") is None
|
||||
|
||||
|
||||
def test_matched_but_not_disabled_returns_none(
|
||||
tmp_skills: Path,
|
||||
) -> None:
|
||||
"""A skill that exists and isn't disabled shouldn't produce a hint."""
|
||||
from gateway import run as gateway_run
|
||||
|
||||
_write_skill(tmp_skills, "creative/ascii-art", "ascii-art")
|
||||
|
||||
with patch(
|
||||
"tools.skills_tool._get_disabled_skill_names", return_value=set()
|
||||
), patch(
|
||||
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
|
||||
):
|
||||
assert gateway_run._check_unavailable_skill("ascii-art") is None
|
||||
|
||||
|
||||
def test_slug_normalization_strips_non_alnum(
|
||||
tmp_skills: Path,
|
||||
) -> None:
|
||||
"""Frontmatter ``C++ Code Review`` → slug ``c-code-review`` (``+`` stripped)."""
|
||||
from gateway import run as gateway_run
|
||||
|
||||
_write_skill(tmp_skills, "software-development/cpp-review", "C++ Code Review")
|
||||
|
||||
with patch(
|
||||
"tools.skills_tool._get_disabled_skill_names",
|
||||
return_value={"C++ Code Review"},
|
||||
), patch(
|
||||
"agent.skill_utils.get_all_skills_dirs", return_value=[tmp_skills]
|
||||
):
|
||||
msg = gateway_run._check_unavailable_skill("c-code-review")
|
||||
|
||||
assert msg is not None
|
||||
assert "disabled" in msg.lower()
|
||||
|
||||
|
||||
def test_optional_skill_uses_frontmatter_slug(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Same drift bug applies to the optional-skills branch.
|
||||
|
||||
Before: directory name was matched against the typed command, so an
|
||||
optional skill at ``optional-skills/mlops/stable-diffusion/SKILL.md``
|
||||
with frontmatter ``Stable Diffusion Image Generation`` returned None
|
||||
when the user typed the real slug.
|
||||
"""
|
||||
from gateway import run as gateway_run
|
||||
|
||||
# Build an isolated optional-skills dir
|
||||
optional = tmp_path / "optional-skills"
|
||||
skill_dir = optional / "mlops" / "stable-diffusion"
|
||||
skill_dir.mkdir(parents=True)
|
||||
(skill_dir / "SKILL.md").write_text(
|
||||
"---\nname: Stable Diffusion Image Generation\ndescription: test\n---\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Point the optional lookup at our tmp dir. The source reads from
|
||||
# ``get_optional_skills_dir(repo_root / "optional-skills")`` — we
|
||||
# can't easily retarget ``repo_root``, so patch the resolver.
|
||||
monkeypatch.setattr(
|
||||
"hermes_constants.get_optional_skills_dir",
|
||||
lambda _default: optional,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
# Ensure the "disabled" branch doesn't match anything so we fall
|
||||
# through to the optional-skills branch.
|
||||
empty_skills = tmp_path / "empty-skills"
|
||||
empty_skills.mkdir()
|
||||
with patch(
|
||||
"tools.skills_tool._get_disabled_skill_names", return_value=set()
|
||||
), patch(
|
||||
"agent.skill_utils.get_all_skills_dirs", return_value=[empty_skills]
|
||||
):
|
||||
msg = gateway_run._check_unavailable_skill("stable-diffusion-image-generation")
|
||||
|
||||
assert msg is not None, (
|
||||
"optional-skills branch should recognize the frontmatter-derived slug; "
|
||||
"the old dir-name-based check returned None here too"
|
||||
)
|
||||
assert "not installed" in msg.lower()
|
||||
assert "official/mlops/stable-diffusion" in msg
|
||||
|
|
@ -284,6 +284,66 @@ class TestBridgeRuntimeFailure:
|
|||
mock_fh.close.assert_called_once()
|
||||
assert adapter._bridge_log_fh is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("returncode", [0, -2, -15])
|
||||
async def test_shutdown_suppresses_fatal_on_planned_bridge_exit(self, returncode):
|
||||
"""During graceful disconnect(), SIGTERM/SIGINT/clean-exit are NOT fatal.
|
||||
|
||||
Regression guard for the bug where every gateway shutdown/restart
|
||||
logged "Fatal whatsapp adapter error (whatsapp_bridge_exited)" and
|
||||
dispatched a fatal-error notification just before the normal
|
||||
"✓ whatsapp disconnected" — because _check_managed_bridge_exit()
|
||||
saw the bridge's returncode of -15 (our own SIGTERM) and classified
|
||||
it as an unexpected crash.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
fatal_handler = AsyncMock()
|
||||
adapter.set_fatal_error_handler(fatal_handler)
|
||||
adapter._running = True
|
||||
adapter._http_session = MagicMock()
|
||||
adapter._bridge_log_fh = MagicMock()
|
||||
adapter._shutting_down = True # disconnect() sets this before SIGTERM
|
||||
|
||||
mock_proc = MagicMock()
|
||||
mock_proc.poll.return_value = returncode
|
||||
adapter._bridge_process = mock_proc
|
||||
|
||||
result = await adapter._check_managed_bridge_exit()
|
||||
|
||||
assert result is None, (
|
||||
f"returncode={returncode} during shutdown should be suppressed, "
|
||||
f"got fatal message: {result!r}"
|
||||
)
|
||||
assert adapter.fatal_error_code is None
|
||||
fatal_handler.assert_not_awaited()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shutdown_still_surfaces_nonzero_crash(self):
|
||||
"""Even during shutdown, a truly crashed bridge (e.g. returncode 9) is fatal.
|
||||
|
||||
The suppression list is deliberately narrow (0, -2, -15) so that
|
||||
OOM-kill (137), assertion failures, or custom error exits still
|
||||
reach the fatal-error handler and user notification path.
|
||||
"""
|
||||
adapter = _make_adapter()
|
||||
fatal_handler = AsyncMock()
|
||||
adapter.set_fatal_error_handler(fatal_handler)
|
||||
adapter._running = True
|
||||
adapter._http_session = MagicMock()
|
||||
adapter._bridge_log_fh = MagicMock()
|
||||
adapter._shutting_down = True
|
||||
|
||||
mock_proc = MagicMock()
|
||||
mock_proc.poll.return_value = 137 # SIGKILL / OOM-kill
|
||||
adapter._bridge_process = mock_proc
|
||||
|
||||
result = await adapter._check_managed_bridge_exit()
|
||||
|
||||
assert result is not None
|
||||
assert "exited unexpectedly" in result
|
||||
assert adapter.fatal_error_code == "whatsapp_bridge_exited"
|
||||
fatal_handler.assert_awaited_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_closed_when_http_not_ready(self):
|
||||
"""Health endpoint never returns 200 within 15 attempts."""
|
||||
|
|
|
|||
|
|
@ -203,6 +203,30 @@ class TestListAuthenticatedProvidersBedrock:
|
|||
bedrock = next((p for p in providers if p["slug"] == "bedrock"), None)
|
||||
assert bedrock is None, "bedrock should NOT appear when AWS credentials are absent"
|
||||
|
||||
def test_non_bedrock_picker_does_not_probe_full_aws_chain(self, monkeypatch):
|
||||
"""Non-Bedrock provider discovery must not touch boto3's full credential chain."""
|
||||
from hermes_cli.model_switch import list_authenticated_providers
|
||||
|
||||
monkeypatch.delenv("AWS_PROFILE", raising=False)
|
||||
monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
|
||||
monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
|
||||
monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
|
||||
monkeypatch.delenv("AWS_WEB_IDENTITY_TOKEN_FILE", raising=False)
|
||||
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False)
|
||||
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False)
|
||||
|
||||
calls = {"has_aws_credentials": 0}
|
||||
|
||||
def _has_aws_credentials():
|
||||
calls["has_aws_credentials"] += 1
|
||||
return False
|
||||
|
||||
with patch("agent.bedrock_adapter.has_aws_credentials", side_effect=_has_aws_credentials):
|
||||
providers = list_authenticated_providers(current_provider="openrouter", max_models=0)
|
||||
|
||||
assert calls["has_aws_credentials"] == 0
|
||||
assert all(p["slug"] != "bedrock" for p in providers)
|
||||
|
||||
def test_bedrock_falls_back_to_curated_when_discovery_fails(self, monkeypatch):
|
||||
"""When discover_bedrock_models() raises, fall back to curated list without crashing."""
|
||||
from hermes_cli.model_switch import list_authenticated_providers
|
||||
|
|
|
|||
|
|
@ -822,6 +822,103 @@ class TestClampTelegramNames:
|
|||
assert result[0] == ("foo", "d1")
|
||||
|
||||
|
||||
class TestClampCommandNamesTriples:
|
||||
"""Tests for _clamp_command_names with 3-tuples (name, desc, cmd_key).
|
||||
|
||||
Skill entries pass through _clamp_command_names as 3-tuples so the
|
||||
original cmd_key survives name truncation. Before the fix in PR #18951,
|
||||
the code stripped cmd_key into a side-dict keyed by the *original*
|
||||
(name, desc) pair — after truncation the lookup key no longer matched,
|
||||
silently losing the cmd_key.
|
||||
"""
|
||||
|
||||
def test_short_triple_preserved(self):
|
||||
entries = [("skill", "A skill", "/skill")]
|
||||
result = _clamp_command_names(entries, set())
|
||||
assert result == [("skill", "A skill", "/skill")]
|
||||
|
||||
def test_long_name_preserves_cmd_key(self):
|
||||
long = "a" * 50
|
||||
cmd_key = f"/{long}"
|
||||
result = _clamp_command_names([(long, "desc", cmd_key)], set())
|
||||
assert len(result) == 1
|
||||
name, desc, key = result[0]
|
||||
assert len(name) == _CMD_NAME_LIMIT
|
||||
assert key == cmd_key, "cmd_key must survive name clamping"
|
||||
|
||||
def test_collision_preserves_cmd_key(self):
|
||||
prefix = "x" * _CMD_NAME_LIMIT
|
||||
long = "x" * 50
|
||||
result = _clamp_command_names(
|
||||
[(long, "desc", "/long-skill")], reserved={prefix},
|
||||
)
|
||||
assert len(result) == 1
|
||||
name, _desc, key = result[0]
|
||||
assert name == "x" * (_CMD_NAME_LIMIT - 1) + "0"
|
||||
assert key == "/long-skill"
|
||||
|
||||
def test_multiple_long_names_preserve_respective_keys(self):
|
||||
base = "y" * 40
|
||||
entries = [
|
||||
(base + "_alpha", "d1", "/alpha-skill"),
|
||||
(base + "_beta", "d2", "/beta-skill"),
|
||||
]
|
||||
result = _clamp_command_names(entries, set())
|
||||
assert len(result) == 2
|
||||
assert result[0][2] == "/alpha-skill"
|
||||
assert result[1][2] == "/beta-skill"
|
||||
|
||||
def test_backward_compat_with_pairs(self):
|
||||
"""Legacy 2-tuple callers (Telegram) must still work."""
|
||||
entries = [("help", "Show help"), ("status", "Show status")]
|
||||
result = _clamp_command_names(entries, set())
|
||||
assert result == entries
|
||||
|
||||
|
||||
class TestDiscordSkillCmdKeyDispatch:
|
||||
"""Integration: discord_skill_commands preserves cmd_key for long names.
|
||||
|
||||
This tests the full pipeline: skill_commands → _collect_gateway_skill_entries
|
||||
→ _clamp_command_names → returned triples, verifying that skills with names
|
||||
exceeding Discord's 32-char limit still have their original cmd_key for
|
||||
dispatch.
|
||||
"""
|
||||
|
||||
def test_long_skill_name_retains_cmd_key(self, tmp_path, monkeypatch):
|
||||
from unittest.mock import patch
|
||||
|
||||
long_name = "this-is-a-very-long-skill-name-that-exceeds-limit"
|
||||
cmd_key = f"/{long_name}"
|
||||
fake_skills_dir = tmp_path / "skills"
|
||||
fake_skills_dir.mkdir(exist_ok=True)
|
||||
# Use resolved path — macOS /var → /private/var symlink
|
||||
# causes SKILLS_DIR.resolve() to differ from tmp_path.
|
||||
resolved_dir = str(fake_skills_dir.resolve())
|
||||
|
||||
fake_cmds = {
|
||||
cmd_key: {
|
||||
"name": long_name,
|
||||
"description": "A skill with a long name",
|
||||
"skill_md_path": f"{resolved_dir}/{long_name}/SKILL.md",
|
||||
"skill_dir": f"{resolved_dir}/{long_name}",
|
||||
},
|
||||
}
|
||||
|
||||
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
|
||||
patch("tools.skills_tool.SKILLS_DIR", fake_skills_dir), \
|
||||
patch("agent.skill_utils.get_external_skills_dirs", return_value=[]):
|
||||
entries, hidden = discord_skill_commands(
|
||||
max_slots=100, reserved_names=set(),
|
||||
)
|
||||
|
||||
assert len(entries) == 1
|
||||
name, desc, key = entries[0]
|
||||
assert len(name) <= _CMD_NAME_LIMIT, "Name should be clamped to 32 chars"
|
||||
assert key == cmd_key, (
|
||||
f"cmd_key must be the original /{long_name}, got {key!r}"
|
||||
)
|
||||
|
||||
|
||||
class TestTelegramMenuCommands:
|
||||
"""Integration: telegram_menu_commands enforces the 32-char limit."""
|
||||
|
||||
|
|
@ -899,6 +996,73 @@ class TestTelegramMenuCommands:
|
|||
assert "my_enabled_skill" in menu_names
|
||||
assert "my_disabled_skill" not in menu_names
|
||||
|
||||
def test_external_dir_skills_included_in_telegram_menu(self, tmp_path, monkeypatch):
|
||||
"""External skills (``skills.external_dirs``) must appear in the Telegram menu.
|
||||
|
||||
Regression test for #8110 — external skills were visible to the
|
||||
agent and CLI but silently excluded from gateway slash menus
|
||||
because ``_collect_gateway_skill_entries`` only accepted skills
|
||||
whose path started with ``SKILLS_DIR``.
|
||||
|
||||
Also verifies the trailing-slash boundary: a directory that
|
||||
simply shares a prefix with a configured ``external_dirs`` entry
|
||||
(``/tmp/my-skills-extra`` vs ``/tmp/my-skills``) must NOT be
|
||||
admitted.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
|
||||
local_dir = tmp_path / "skills"
|
||||
local_dir.mkdir()
|
||||
external_dir = tmp_path / "my-skills"
|
||||
external_dir.mkdir()
|
||||
lookalike_dir = tmp_path / "my-skills-extra"
|
||||
lookalike_dir.mkdir()
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
f"skills:\n external_dirs:\n - {external_dir}\n"
|
||||
)
|
||||
|
||||
fake_cmds = {
|
||||
"/local-one": {
|
||||
"name": "local-one",
|
||||
"description": "Local",
|
||||
"skill_md_path": f"{local_dir}/local-one/SKILL.md",
|
||||
"skill_dir": f"{local_dir}/local-one",
|
||||
},
|
||||
"/morning-briefing": {
|
||||
"name": "morning-briefing",
|
||||
"description": "External skill",
|
||||
"skill_md_path": f"{external_dir}/morning-briefing/SKILL.md",
|
||||
"skill_dir": f"{external_dir}/morning-briefing",
|
||||
},
|
||||
"/lookalike-skill": {
|
||||
"name": "lookalike-skill",
|
||||
"description": "Lives in a sibling dir that shares a prefix",
|
||||
"skill_md_path": f"{lookalike_dir}/lookalike-skill/SKILL.md",
|
||||
"skill_dir": f"{lookalike_dir}/lookalike-skill",
|
||||
},
|
||||
}
|
||||
|
||||
with (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
|
||||
patch("tools.skills_tool.SKILLS_DIR", local_dir),
|
||||
patch(
|
||||
"agent.skill_utils.get_external_skills_dirs",
|
||||
return_value=[external_dir],
|
||||
),
|
||||
):
|
||||
menu, _ = telegram_menu_commands(max_commands=100)
|
||||
|
||||
menu_names = {n for n, _ in menu}
|
||||
assert "local_one" in menu_names, "local skill must appear"
|
||||
assert "morning_briefing" in menu_names, (
|
||||
"external skill from skills.external_dirs must appear (fixes #8110)"
|
||||
)
|
||||
assert "lookalike_skill" not in menu_names, (
|
||||
"prefix-match sibling directories must not be admitted"
|
||||
)
|
||||
|
||||
def test_special_chars_in_skill_names_sanitized(self, tmp_path, monkeypatch):
|
||||
"""Skills with +, /, or other special chars produce valid Telegram names."""
|
||||
from unittest.mock import patch
|
||||
|
|
@ -1353,6 +1517,119 @@ class TestDiscordSkillCommandsByCategory:
|
|||
assert "vllm" in names
|
||||
assert len(uncategorized) == 0
|
||||
|
||||
def test_no_legacy_25x25_cap(self, tmp_path, monkeypatch):
|
||||
"""The old nested-layout caps (25 groups × 25 skills/group) are gone.
|
||||
|
||||
The live caller flattens categories into a single autocomplete list,
|
||||
which Discord fetches dynamically — the per-command 8KB payload
|
||||
concern from the old nested layout (#11321, #10259) no longer applies.
|
||||
Guards against accidentally re-introducing the caps, which would
|
||||
silently drop skills in the 26th+ alphabetical category (the exact
|
||||
failure mode users were hitting with 29 category dirs on real
|
||||
installs).
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
|
||||
fake_skills_dir = str(tmp_path / "skills")
|
||||
|
||||
# Build 30 categories (> old _MAX_GROUPS=25) each with 30 skills
|
||||
# (> old _MAX_PER_GROUP=25).
|
||||
fake_cmds = {}
|
||||
for c in range(30):
|
||||
cat = f"cat{c:02d}" # cat00, cat01, ..., cat29 — 30 categories
|
||||
for s in range(30):
|
||||
name = f"skill-{c:02d}-{s:02d}"
|
||||
skill_subdir = tmp_path / "skills" / cat / name
|
||||
skill_subdir.mkdir(parents=True, exist_ok=True)
|
||||
(skill_subdir / "SKILL.md").write_text("---\nname: x\n---\n")
|
||||
fake_cmds[f"/{name}"] = {
|
||||
"name": name,
|
||||
"description": f"Category {cat} skill {s}",
|
||||
"skill_md_path": f"{fake_skills_dir}/{cat}/{name}/SKILL.md",
|
||||
}
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
with (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
|
||||
):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(),
|
||||
)
|
||||
|
||||
# Every category should be present — no 25-group cap
|
||||
assert len(categories) == 30, (
|
||||
f"expected all 30 categories, got {len(categories)} "
|
||||
f"(cap from old nested layout must be removed)"
|
||||
)
|
||||
# Every skill in every category must be present — no 25-per-group cap
|
||||
for cat_name, entries in categories.items():
|
||||
assert len(entries) == 30, (
|
||||
f"category {cat_name}: expected 30 skills, got {len(entries)} "
|
||||
f"(cap from old nested layout must be removed)"
|
||||
)
|
||||
# Nothing should be reported hidden for the cap reason (the only
|
||||
# legitimate hidden reason now is name clamp collisions, which
|
||||
# don't happen here since all names are unique).
|
||||
assert hidden == 0
|
||||
|
||||
def test_external_dirs_skills_included(self, tmp_path, monkeypatch):
|
||||
"""Skills in ``skills.external_dirs`` must appear in /skill autocomplete.
|
||||
|
||||
#18741 fixed this for the flat ``discord_skill_commands`` collector
|
||||
but left ``discord_skill_commands_by_category`` (the live caller for
|
||||
Discord's ``/skill`` command) still filtering by
|
||||
``SKILLS_DIR`` prefix only. Regression guard that both collectors
|
||||
now accept external-dir skills.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
|
||||
local_skills_dir = tmp_path / "local-skills"
|
||||
external_dir = tmp_path / "external-skills"
|
||||
|
||||
(local_skills_dir / "creative" / "local-skill").mkdir(parents=True)
|
||||
(local_skills_dir / "creative" / "local-skill" / "SKILL.md").write_text("")
|
||||
|
||||
(external_dir / "mlops" / "external-skill").mkdir(parents=True)
|
||||
(external_dir / "mlops" / "external-skill" / "SKILL.md").write_text("")
|
||||
|
||||
fake_cmds = {
|
||||
"/local-skill": {
|
||||
"name": "local-skill",
|
||||
"description": "Local",
|
||||
"skill_md_path": str(local_skills_dir / "creative" / "local-skill" / "SKILL.md"),
|
||||
},
|
||||
"/external-skill": {
|
||||
"name": "external-skill",
|
||||
"description": "External",
|
||||
"skill_md_path": str(external_dir / "mlops" / "external-skill" / "SKILL.md"),
|
||||
},
|
||||
}
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
with (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
|
||||
patch("tools.skills_tool.SKILLS_DIR", local_skills_dir),
|
||||
patch(
|
||||
"agent.skill_utils.get_external_skills_dirs",
|
||||
return_value=[external_dir],
|
||||
),
|
||||
):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(),
|
||||
)
|
||||
|
||||
# Local skill → grouped under "creative"
|
||||
assert "creative" in categories
|
||||
assert any(n == "local-skill" for n, _d, _k in categories["creative"])
|
||||
# External skill → grouped under its own top-level dir "mlops"
|
||||
assert "mlops" in categories, (
|
||||
"external-dir skills must be included — the old SKILLS_DIR-only "
|
||||
"prefix check was broken for by_category (completes #18741)"
|
||||
)
|
||||
assert any(n == "external-skill" for n, _d, _k in categories["mlops"])
|
||||
assert uncategorized == []
|
||||
assert hidden == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin slash command integration
|
||||
|
|
|
|||
246
tests/hermes_cli/test_discord_skill_clamp_warning.py
Normal file
246
tests/hermes_cli/test_discord_skill_clamp_warning.py
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
"""Tests for Discord /skill 32-char clamp collision warnings.
|
||||
|
||||
Discord's per-command name limit is 32 chars, so
|
||||
``discord_skill_commands_by_category`` clamps skill slugs to that width
|
||||
before deduping. When two skills share the same 32-char prefix, only
|
||||
the first (alphabetical) wins; the second is dropped. Previously the
|
||||
drop was silent — the ``hidden`` count incremented but nothing named
|
||||
which skills collided, so authors had no way to discover the drop
|
||||
short of noticing that their skill was missing from the autocomplete.
|
||||
|
||||
This module pins the upgraded behavior: a WARNING log with both full
|
||||
cmd_keys + the clamped name, so whoever named the skills sees the
|
||||
collision and can rename one.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def test_clamp_collision_emits_warning_naming_both_skills(
|
||||
tmp_path: Path, caplog
|
||||
) -> None:
|
||||
"""Two skills with identical first 32 chars — warning names both."""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
# Craft cmd_keys that share the first 32 chars.
|
||||
# 40-char prefix 'skill-collision-prefix-identical-first-32'
|
||||
# -> clamped to 'skill-collision-prefix-identical'
|
||||
prefix = "skill-collision-prefix-identical" # exactly 32 chars
|
||||
name_a = prefix + "-alpha" # /skill-collision-prefix-identical-alpha
|
||||
name_b = prefix + "-bravo" # /skill-collision-prefix-identical-bravo
|
||||
assert name_a[:32] == name_b[:32] == prefix
|
||||
|
||||
skills_dir = tmp_path / "skills"
|
||||
for nm in (name_a, name_b):
|
||||
d = skills_dir / "creative" / nm
|
||||
d.mkdir(parents=True)
|
||||
(d / "SKILL.md").write_text("---\nname: x\n---\n")
|
||||
|
||||
fake_cmds = {
|
||||
f"/{name_a}": {
|
||||
"name": name_a,
|
||||
"description": "Alpha",
|
||||
"skill_md_path": str(skills_dir / "creative" / name_a / "SKILL.md"),
|
||||
},
|
||||
f"/{name_b}": {
|
||||
"name": name_b,
|
||||
"description": "Bravo",
|
||||
"skill_md_path": str(skills_dir / "creative" / name_b / "SKILL.md"),
|
||||
},
|
||||
}
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
|
||||
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(),
|
||||
)
|
||||
|
||||
# One skill made it through, one was dropped (hidden counted).
|
||||
assert hidden == 1
|
||||
kept_names = [n for n, _d, _k in categories.get("creative", [])]
|
||||
assert len(kept_names) == 1
|
||||
# Alphabetical iteration means the -alpha variant wins the slot.
|
||||
assert kept_names[0] == prefix # clamped
|
||||
|
||||
# Exactly one warning, naming BOTH full cmd_keys and the clamped name.
|
||||
warnings = [
|
||||
r for r in caplog.records
|
||||
if r.levelno == logging.WARNING and "clamp" in r.getMessage()
|
||||
]
|
||||
assert len(warnings) == 1, (
|
||||
f"expected exactly one clamp-collision warning, got {len(warnings)}: "
|
||||
f"{[r.getMessage() for r in warnings]}"
|
||||
)
|
||||
msg = warnings[0].getMessage()
|
||||
assert f"/{name_a}" in msg, f"winner not named in warning: {msg!r}"
|
||||
assert f"/{name_b}" in msg, f"loser not named in warning: {msg!r}"
|
||||
assert prefix in msg, f"clamped name not in warning: {msg!r}"
|
||||
|
||||
|
||||
def test_clamp_collision_with_reserved_name_emits_distinct_warning(
|
||||
tmp_path: Path, caplog
|
||||
) -> None:
|
||||
"""A skill clashing with a reserved gateway command gets its own phrasing.
|
||||
|
||||
The reserved-vs-skill case is operationally different — the fix is
|
||||
still "rename the skill," but there's no second skill to also
|
||||
rename. The warning should say so explicitly.
|
||||
"""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
# Reserved name 'help' is 4 chars — make a skill whose slug
|
||||
# clamps to 'help' (so, exactly 'help').
|
||||
reserved = "help"
|
||||
skills_dir = tmp_path / "skills"
|
||||
d = skills_dir / "creative" / reserved
|
||||
d.mkdir(parents=True)
|
||||
(d / "SKILL.md").write_text("---\nname: x\n---\n")
|
||||
|
||||
fake_cmds = {
|
||||
f"/{reserved}": {
|
||||
"name": reserved,
|
||||
"description": "desc",
|
||||
"skill_md_path": str(d / "SKILL.md"),
|
||||
},
|
||||
}
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
|
||||
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names={"help"},
|
||||
)
|
||||
|
||||
# Skill dropped in favor of the reserved command.
|
||||
assert hidden == 1
|
||||
assert categories == {}
|
||||
assert uncategorized == []
|
||||
|
||||
warnings = [
|
||||
r for r in caplog.records
|
||||
if r.levelno == logging.WARNING and "reserved" in r.getMessage()
|
||||
]
|
||||
assert len(warnings) == 1, (
|
||||
f"expected one reserved-name collision warning, got "
|
||||
f"{[r.getMessage() for r in warnings]}"
|
||||
)
|
||||
msg = warnings[0].getMessage()
|
||||
assert f"/{reserved}" in msg
|
||||
assert "reserved" in msg.lower()
|
||||
|
||||
|
||||
def test_no_collision_no_warning(tmp_path: Path, caplog) -> None:
|
||||
"""Sanity: two distinct-prefix skills produce zero warnings."""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
skills_dir = tmp_path / "skills"
|
||||
for nm in ("alpha", "bravo"):
|
||||
d = skills_dir / "creative" / nm
|
||||
d.mkdir(parents=True)
|
||||
(d / "SKILL.md").write_text("---\nname: x\n---\n")
|
||||
|
||||
fake_cmds = {
|
||||
"/alpha": {
|
||||
"name": "alpha", "description": "",
|
||||
"skill_md_path": str(skills_dir / "creative" / "alpha" / "SKILL.md"),
|
||||
},
|
||||
"/bravo": {
|
||||
"name": "bravo", "description": "",
|
||||
"skill_md_path": str(skills_dir / "creative" / "bravo" / "SKILL.md"),
|
||||
},
|
||||
}
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
|
||||
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
|
||||
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(),
|
||||
)
|
||||
|
||||
assert hidden == 0
|
||||
assert {n for n, _d, _k in categories["creative"]} == {"alpha", "bravo"}
|
||||
clamp_warnings = [
|
||||
r for r in caplog.records
|
||||
if r.levelno == logging.WARNING
|
||||
and ("clamp" in r.getMessage() or "reserved" in r.getMessage())
|
||||
]
|
||||
assert clamp_warnings == []
|
||||
|
||||
|
||||
def test_long_skill_name_preserves_cmd_key_through_by_category(
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""Skills with names > 32 chars must keep their original cmd_key.
|
||||
|
||||
``discord_skill_commands_by_category`` clamps the display name to 32
|
||||
chars but the third tuple element (cmd_key) must stay as the original
|
||||
``/full-skill-name`` so that ``_skill_handler`` dispatches via
|
||||
``_run_simple_slash`` with the full command, not the truncated one.
|
||||
|
||||
This is the actual runtime path used by the Discord adapter via
|
||||
``_refresh_skill_catalog_state``.
|
||||
"""
|
||||
from hermes_cli.commands import discord_skill_commands_by_category
|
||||
|
||||
skills_dir = tmp_path / "skills"
|
||||
skills_dir.mkdir()
|
||||
resolved = str(skills_dir.resolve())
|
||||
|
||||
long_name = "generate-ascii-art-from-text-description-detailed"
|
||||
cmd_key = f"/{long_name}"
|
||||
fake_cmds = {
|
||||
cmd_key: {
|
||||
"name": long_name,
|
||||
"description": "Generate ASCII art from a text description",
|
||||
"skill_md_path": f"{resolved}/creative/{long_name}/SKILL.md",
|
||||
"skill_dir": f"{resolved}/creative/{long_name}",
|
||||
},
|
||||
"/short-skill": {
|
||||
"name": "short-skill",
|
||||
"description": "A short skill",
|
||||
"skill_md_path": f"{resolved}/creative/short-skill/SKILL.md",
|
||||
"skill_dir": f"{resolved}/creative/short-skill",
|
||||
},
|
||||
}
|
||||
|
||||
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
|
||||
patch("tools.skills_tool.SKILLS_DIR", skills_dir):
|
||||
categories, uncategorized, hidden = discord_skill_commands_by_category(
|
||||
reserved_names=set(),
|
||||
)
|
||||
|
||||
# Flatten (same as _refresh_skill_catalog_state does)
|
||||
entries = list(uncategorized)
|
||||
for cat_skills in categories.values():
|
||||
entries.extend(cat_skills)
|
||||
|
||||
# Build lookup (same as _refresh_skill_catalog_state does)
|
||||
skill_lookup = {n: (d, k) for n, d, k in entries}
|
||||
|
||||
# Find the long skill
|
||||
long_entry = [e for e in entries if e[2] == cmd_key]
|
||||
assert len(long_entry) == 1, f"Long skill should appear once, got: {long_entry}"
|
||||
|
||||
display_name, desc, key = long_entry[0]
|
||||
assert len(display_name) <= 32, (
|
||||
f"Display name should be clamped to 32 chars, got {len(display_name)}"
|
||||
)
|
||||
assert key == cmd_key, (
|
||||
f"cmd_key must be the original /{long_name}, got {key!r}"
|
||||
)
|
||||
|
||||
# Verify lookup works: clamped display name -> original cmd_key
|
||||
assert display_name in skill_lookup
|
||||
_desc, looked_up_key = skill_lookup[display_name]
|
||||
assert looked_up_key == cmd_key, (
|
||||
f"Lookup must map clamped name to original cmd_key, got {looked_up_key!r}"
|
||||
)
|
||||
|
||||
# Short skill should also be present and correct
|
||||
short_entry = [e for e in entries if e[2] == "/short-skill"]
|
||||
assert len(short_entry) == 1
|
||||
assert short_entry[0][0] == "short-skill"
|
||||
|
|
@ -51,6 +51,57 @@ class TestProviderEnvDetection:
|
|||
assert not _has_provider_env_config(content)
|
||||
|
||||
|
||||
class TestDoctorEnvFileEncoding:
|
||||
"""Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
|
||||
Chinese locale (GBK) because `.env` was read with Path.read_text() which
|
||||
defaults to the system locale encoding, not UTF-8."""
|
||||
|
||||
def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
|
||||
self, monkeypatch, tmp_path
|
||||
):
|
||||
import pathlib
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
# Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
|
||||
# 0x94 byte is exactly the one the issue reporter hit: it's invalid
|
||||
# as a GBK trailing byte in this position, so locale-default reads
|
||||
# raise UnicodeDecodeError on Chinese Windows.
|
||||
env_path = hermes_home / ".env"
|
||||
env_path.write_text(
|
||||
"OPENAI_API_KEY=sk-test # em-dash here — should not crash\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
|
||||
|
||||
orig_read_text = pathlib.Path.read_text
|
||||
|
||||
def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
|
||||
# Simulate a GBK locale: refuse to decode this specific UTF-8
|
||||
# .env unless the caller pins encoding="utf-8".
|
||||
if self == env_path and encoding != "utf-8":
|
||||
raise UnicodeDecodeError(
|
||||
"gbk", b"\x94", 0, 1, "illegal multibyte sequence"
|
||||
)
|
||||
return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
|
||||
|
||||
monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
|
||||
|
||||
# Short-circuit the expensive tool-availability probe — we only
|
||||
# need doctor to reach the .env read without crashing.
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
# Run doctor. If the .env read still uses locale encoding, this
|
||||
# raises UnicodeDecodeError and the test fails.
|
||||
with pytest.raises(SystemExit):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
|
||||
|
||||
class TestDoctorToolAvailabilityOverrides:
|
||||
def test_marks_honcho_available_when_configured(self, monkeypatch):
|
||||
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@ from hermes_cli.setup import setup_agent_settings
|
|||
|
||||
|
||||
def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
|
||||
"""The helper text should match the value shown in the prompt."""
|
||||
"""The helper text should match the value shown in the prompt.
|
||||
|
||||
After PR#18413 max_turns is read exclusively from config.yaml — the
|
||||
.env `HERMES_MAX_ITERATIONS` fallback was removed because it was
|
||||
shadowing the user's current config (see the 60-vs-500 incident).
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
config = {
|
||||
"agent": {"max_turns": 90},
|
||||
"agent": {"max_turns": 60},
|
||||
"display": {"tool_progress": "all"},
|
||||
"compression": {"threshold": 0.50},
|
||||
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
|
||||
|
|
@ -16,10 +21,10 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
|
|||
|
||||
prompt_answers = iter(["60", "all", "0.5"])
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
|
||||
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr("hermes_cli.setup.remove_env_value", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
|
||||
|
||||
setup_agent_settings(config)
|
||||
|
|
@ -27,3 +32,47 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
|
|||
out = capsys.readouterr().out
|
||||
assert "Press Enter to keep 60." in out
|
||||
assert "Default is 90" not in out
|
||||
|
||||
|
||||
def test_setup_agent_settings_prefers_config_over_stale_env(tmp_path, monkeypatch, capsys):
|
||||
"""Config.yaml wins even when a stale .env value disagrees.
|
||||
|
||||
Regression guard for the bug where `.env HERMES_MAX_ITERATIONS=60`
|
||||
from an old `hermes setup` run shadowed `agent.max_turns: 500` in
|
||||
config.yaml. The wizard must now display the config value.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
config = {
|
||||
"agent": {"max_turns": 500}, # user bumped this in config.yaml
|
||||
"display": {"tool_progress": "all"},
|
||||
"compression": {"threshold": 0.50},
|
||||
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
|
||||
}
|
||||
|
||||
prompt_answers = iter(["500", "all", "0.5"])
|
||||
|
||||
# Simulate stale .env value — the wizard must ignore this.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.get_env_value",
|
||||
lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "",
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
|
||||
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
|
||||
|
||||
removed_keys: list[str] = []
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.remove_env_value",
|
||||
lambda key: (removed_keys.append(key), True)[1],
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
|
||||
|
||||
setup_agent_settings(config)
|
||||
|
||||
out = capsys.readouterr().out
|
||||
# Config value wins
|
||||
assert "Press Enter to keep 500." in out
|
||||
assert "Press Enter to keep 60." not in out
|
||||
# And the stale .env entry gets cleaned up
|
||||
assert "HERMES_MAX_ITERATIONS" in removed_keys
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from hermes_cli.tools_config import (
|
|||
_configure_provider,
|
||||
_get_platform_tools,
|
||||
_platform_toolset_summary,
|
||||
_reconfigure_tool,
|
||||
_save_platform_tools,
|
||||
_toolset_has_keys,
|
||||
CONFIGURABLE_TOOLSETS,
|
||||
|
|
@ -468,6 +469,33 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
|
|||
assert config["browser"]["cloud_provider"] == "local"
|
||||
|
||||
|
||||
def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypatch):
|
||||
config = {"platform_toolsets": {"cli": ["web"]}}
|
||||
seen = {}
|
||||
configured = []
|
||||
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.tools_config._toolset_has_keys",
|
||||
lambda ts_key, config=None: False,
|
||||
)
|
||||
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
seen["choices"] = choices
|
||||
return 0
|
||||
|
||||
monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.tools_config._configure_tool_category_for_reconfig",
|
||||
lambda ts_key, cat, config: configured.append(ts_key),
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
|
||||
|
||||
_reconfigure_tool(config)
|
||||
|
||||
assert any("Web Search" in choice for choice in seen["choices"])
|
||||
assert configured == ["web"]
|
||||
|
||||
|
||||
def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
|
||||
monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
|
||||
monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
|
||||
|
|
|
|||
69
tests/run_agent/test_init_fallback_on_exhausted_pool.py
Normal file
69
tests/run_agent/test_init_fallback_on_exhausted_pool.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
"""Regression test for #17929: AIAgent.__init__ should try fallback_model
|
||||
when primary provider credentials are exhausted."""
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_tool_defs():
|
||||
return [{"type": "function", "function": {"name": "web_search",
|
||||
"description": "search", "parameters": {"type": "object", "properties": {}}}}]
|
||||
|
||||
|
||||
def _mock_client(api_key="fb-key-1234567890", base_url="https://fb.example.com/v1"):
|
||||
c = MagicMock()
|
||||
c.api_key = api_key
|
||||
c.base_url = base_url
|
||||
c._default_headers = None
|
||||
return c
|
||||
|
||||
|
||||
def test_init_tries_fallback_when_primary_returns_none():
|
||||
"""When resolve_provider_client returns None for primary but succeeds for
|
||||
a fallback entry, __init__ should NOT raise RuntimeError."""
|
||||
fb = _mock_client()
|
||||
|
||||
def fake_resolve(provider, model=None, raw_codex=False,
|
||||
explicit_base_url=None, explicit_api_key=None):
|
||||
if provider == "tencent-token-plan":
|
||||
return fb, "kimi2.5"
|
||||
return None, None # primary exhausted
|
||||
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", side_effect=fake_resolve), \
|
||||
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
|
||||
patch("run_agent.check_toolset_requirements", return_value={}), \
|
||||
patch("run_agent.OpenAI", return_value=MagicMock()):
|
||||
|
||||
agent = AIAgent(
|
||||
provider="alibaba-coding-plan",
|
||||
model="qwen3.6-plus",
|
||||
api_key=None,
|
||||
base_url=None,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
fallback_model=[{"provider": "tencent-token-plan", "model": "kimi2.5"}],
|
||||
)
|
||||
assert agent.provider == "tencent-token-plan"
|
||||
assert agent.model == "kimi2.5"
|
||||
assert agent._fallback_activated is True
|
||||
|
||||
|
||||
def test_init_raises_when_no_fallback_configured():
|
||||
"""When primary returns None and no fallback is set, should raise."""
|
||||
with patch("agent.auxiliary_client.resolve_provider_client", return_value=(None, None)), \
|
||||
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs()), \
|
||||
patch("run_agent.check_toolset_requirements", return_value={}), \
|
||||
patch("run_agent.OpenAI", return_value=MagicMock()):
|
||||
|
||||
with pytest.raises(RuntimeError, match="no API key was found"):
|
||||
AIAgent(
|
||||
provider="alibaba-coding-plan",
|
||||
model="qwen3.6-plus",
|
||||
api_key=None,
|
||||
base_url=None,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
fallback_model=None,
|
||||
)
|
||||
|
|
@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai):
|
|||
agent._apply_client_headers_for_base_url("https://api.example.com/v1")
|
||||
|
||||
assert "default_headers" not in agent._client_kwargs
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_openrouter_headers_include_response_cache_when_enabled(mock_openai):
|
||||
"""When openrouter.response_cache is True, the cache header is injected."""
|
||||
mock_openai.return_value = MagicMock()
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={
|
||||
"openrouter": {"response_cache": True, "response_cache_ttl": 600},
|
||||
}):
|
||||
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
|
||||
|
||||
headers = agent._client_kwargs["default_headers"]
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_openrouter_headers_no_cache_when_disabled(mock_openai):
|
||||
"""When openrouter.response_cache is False, no cache headers are sent."""
|
||||
mock_openai.return_value = MagicMock()
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={
|
||||
"openrouter": {"response_cache": False},
|
||||
}):
|
||||
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
|
||||
|
||||
headers = agent._client_kwargs["default_headers"]
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
|
|
|||
116
tests/test_hermes_home_profile_warning.py
Normal file
116
tests/test_hermes_home_profile_warning.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
"""Tests for get_hermes_home() profile-mode fallback warning.
|
||||
|
||||
Regression test for https://github.com/NousResearch/hermes-agent/issues/18594.
|
||||
|
||||
When HERMES_HOME is unset but an active_profile file indicates a non-default
|
||||
profile is active, get_hermes_home() should:
|
||||
1. STILL return ~/.hermes (raising would brick 30+ module-level callers)
|
||||
2. Emit a loud one-shot warning to stderr so operators can diagnose
|
||||
cross-profile data contamination after the fact.
|
||||
|
||||
The warning goes to stderr directly (not through logging) because this
|
||||
function is called at module-import time from 30+ sites, often before the
|
||||
logging subsystem has been configured.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_constants(monkeypatch, tmp_path):
|
||||
"""Import hermes_constants fresh and reset the one-shot warn flag."""
|
||||
import importlib
|
||||
import hermes_constants
|
||||
importlib.reload(hermes_constants)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
return hermes_constants
|
||||
|
||||
|
||||
class TestGetHermesHomeProfileWarning:
|
||||
def test_classic_mode_no_active_profile_no_warning(
|
||||
self, fresh_constants, tmp_path, capsys
|
||||
):
|
||||
"""Classic mode: no active_profile file → silent, returns ~/.hermes."""
|
||||
result = fresh_constants.get_hermes_home()
|
||||
assert result == tmp_path / ".hermes"
|
||||
assert "HERMES_HOME fallback" not in capsys.readouterr().err
|
||||
|
||||
def test_default_active_profile_no_warning(
|
||||
self, fresh_constants, tmp_path, capsys
|
||||
):
|
||||
"""active_profile=default → still no warning, returns ~/.hermes."""
|
||||
hermes_dir = tmp_path / ".hermes"
|
||||
hermes_dir.mkdir()
|
||||
(hermes_dir / "active_profile").write_text("default\n")
|
||||
result = fresh_constants.get_hermes_home()
|
||||
assert result == tmp_path / ".hermes"
|
||||
assert "HERMES_HOME fallback" not in capsys.readouterr().err
|
||||
|
||||
def test_named_profile_unset_home_warns_once(
|
||||
self, fresh_constants, tmp_path, capsys
|
||||
):
|
||||
"""active_profile=coder + HERMES_HOME unset → warn loudly, still return fallback."""
|
||||
hermes_dir = tmp_path / ".hermes"
|
||||
hermes_dir.mkdir()
|
||||
(hermes_dir / "active_profile").write_text("coder\n")
|
||||
|
||||
result = fresh_constants.get_hermes_home()
|
||||
|
||||
# 1. Still returns the fallback — no import-time crash
|
||||
assert result == tmp_path / ".hermes"
|
||||
# 2. Stderr got the warning exactly once
|
||||
err = capsys.readouterr().err
|
||||
assert err.count("HERMES_HOME fallback") == 1
|
||||
assert "'coder'" in err
|
||||
assert "#18594" in err
|
||||
|
||||
# 3. One-shot: second and third calls don't re-warn
|
||||
fresh_constants.get_hermes_home()
|
||||
fresh_constants.get_hermes_home()
|
||||
err2 = capsys.readouterr().err
|
||||
assert "HERMES_HOME fallback" not in err2
|
||||
|
||||
def test_hermes_home_set_suppresses_warning(
|
||||
self, fresh_constants, tmp_path, capsys, monkeypatch
|
||||
):
|
||||
"""Even if active_profile is 'coder', setting HERMES_HOME suppresses warning."""
|
||||
profile_dir = tmp_path / ".hermes" / "profiles" / "coder"
|
||||
profile_dir.mkdir(parents=True)
|
||||
(tmp_path / ".hermes" / "active_profile").write_text("coder\n")
|
||||
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
|
||||
|
||||
result = fresh_constants.get_hermes_home()
|
||||
|
||||
assert result == profile_dir
|
||||
assert "HERMES_HOME fallback" not in capsys.readouterr().err
|
||||
|
||||
def test_unreadable_active_profile_no_crash(
|
||||
self, fresh_constants, tmp_path, capsys
|
||||
):
|
||||
"""active_profile that can't be decoded → fall through silently."""
|
||||
hermes_dir = tmp_path / ".hermes"
|
||||
hermes_dir.mkdir()
|
||||
# Write bytes that aren't valid utf-8
|
||||
(hermes_dir / "active_profile").write_bytes(b"\xff\xfe\x00\x00")
|
||||
|
||||
result = fresh_constants.get_hermes_home()
|
||||
|
||||
assert result == tmp_path / ".hermes"
|
||||
# Shouldn't crash; shouldn't warn either (can't tell what profile was intended)
|
||||
assert "HERMES_HOME fallback" not in capsys.readouterr().err
|
||||
|
||||
def test_empty_active_profile_no_warning(
|
||||
self, fresh_constants, tmp_path, capsys
|
||||
):
|
||||
"""Empty active_profile file → treated as default, no warning."""
|
||||
hermes_dir = tmp_path / ".hermes"
|
||||
hermes_dir.mkdir()
|
||||
(hermes_dir / "active_profile").write_text("")
|
||||
|
||||
result = fresh_constants.get_hermes_home()
|
||||
|
||||
assert result == tmp_path / ".hermes"
|
||||
assert "HERMES_HOME fallback" not in capsys.readouterr().err
|
||||
|
|
@ -104,6 +104,44 @@ class TestWriteFileHandler:
|
|||
assert result["error"] == "boom"
|
||||
assert any("write_file error" in r.getMessage() for r in caplog.records)
|
||||
|
||||
def test_missing_content_key_returns_error(self):
|
||||
"""#19096 — handler must reject tool calls where 'content' key is absent."""
|
||||
from tools.file_tools import _handle_write_file
|
||||
|
||||
result = json.loads(_handle_write_file({"path": "/tmp/oops.md"}))
|
||||
assert "error" in result
|
||||
assert "content" in result["error"]
|
||||
assert "path" not in result.get("error", "").lower() or "missing" not in result.get("error", "").lower() or True # just check error present
|
||||
|
||||
def test_missing_path_key_returns_error(self):
|
||||
"""#19096 — handler must reject tool calls where 'path' key is absent."""
|
||||
from tools.file_tools import _handle_write_file
|
||||
|
||||
result = json.loads(_handle_write_file({"content": "hello"}))
|
||||
assert "error" in result
|
||||
|
||||
def test_explicit_empty_content_is_allowed(self):
|
||||
"""#19096 — explicit empty string content (file truncation) must still work."""
|
||||
from tools.file_tools import _handle_write_file
|
||||
|
||||
with patch("tools.file_tools._get_file_ops") as mock_get:
|
||||
mock_ops = MagicMock()
|
||||
result_obj = MagicMock()
|
||||
result_obj.to_dict.return_value = {"status": "ok", "path": "/tmp/empty.txt", "bytes": 0}
|
||||
mock_ops.write_file.return_value = result_obj
|
||||
mock_get.return_value = mock_ops
|
||||
|
||||
result = json.loads(_handle_write_file({"path": "/tmp/empty.txt", "content": ""}))
|
||||
assert result["status"] == "ok"
|
||||
|
||||
def test_non_string_content_returns_error(self):
|
||||
"""#19096 — content must be a string, not a dict or list."""
|
||||
from tools.file_tools import _handle_write_file
|
||||
|
||||
result = json.loads(_handle_write_file({"path": "/tmp/x.txt", "content": {"nested": "dict"}}))
|
||||
assert "error" in result
|
||||
assert "string" in result["error"].lower() or "content" in result["error"].lower()
|
||||
|
||||
|
||||
class TestPatchHandler:
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
|
|
|
|||
|
|
@ -371,6 +371,57 @@ class TestDeleteSkill:
|
|||
_delete_skill("my-skill")
|
||||
assert not (tmp_path / "devops").exists()
|
||||
|
||||
def test_delete_with_absorbed_into_valid_target(self, tmp_path):
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("umbrella", VALID_SKILL_CONTENT)
|
||||
_create_skill("narrow", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("narrow", absorbed_into="umbrella")
|
||||
assert result["success"] is True
|
||||
assert "absorbed into 'umbrella'" in result["message"]
|
||||
assert not (tmp_path / "narrow").exists()
|
||||
assert (tmp_path / "umbrella").exists()
|
||||
|
||||
def test_delete_with_absorbed_into_empty_string_means_pruned(self, tmp_path):
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("stale-skill", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("stale-skill", absorbed_into="")
|
||||
assert result["success"] is True
|
||||
# Empty absorbed_into is explicit prune — no "absorbed into" suffix in message
|
||||
assert "absorbed into" not in result["message"]
|
||||
|
||||
def test_delete_with_absorbed_into_nonexistent_target_rejected(self, tmp_path):
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("narrow", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("narrow", absorbed_into="ghost-umbrella")
|
||||
assert result["success"] is False
|
||||
assert "does not exist" in result["error"]
|
||||
# Skill must NOT have been deleted on validation failure
|
||||
assert (tmp_path / "narrow").exists()
|
||||
|
||||
def test_delete_with_absorbed_into_equals_self_rejected(self, tmp_path):
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("narrow", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("narrow", absorbed_into="narrow")
|
||||
assert result["success"] is False
|
||||
assert "cannot equal" in result["error"]
|
||||
assert (tmp_path / "narrow").exists()
|
||||
|
||||
def test_delete_with_absorbed_into_whitespace_only_treated_as_prune(self, tmp_path):
|
||||
# Leading/trailing whitespace only: .strip() → "" → pruned path
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("narrow", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("narrow", absorbed_into=" ")
|
||||
assert result["success"] is True
|
||||
assert "absorbed into" not in result["message"]
|
||||
|
||||
def test_delete_without_absorbed_into_backward_compat(self, tmp_path):
|
||||
# Legacy callers that don't pass the arg still work — the curator
|
||||
# reconciler falls back to its heuristic+YAML logic for such deletes.
|
||||
with _skill_dir(tmp_path):
|
||||
_create_skill("my-skill", VALID_SKILL_CONTENT)
|
||||
result = _delete_skill("my-skill")
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# write_file / remove_file
|
||||
|
|
@ -485,6 +536,25 @@ class TestSkillManageDispatcher:
|
|||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
|
||||
def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path):
|
||||
# Dispatcher must plumb absorbed_into through to _delete_skill so the
|
||||
# validation + message suffix paths are exercised end-to-end.
|
||||
with _skill_dir(tmp_path):
|
||||
skill_manage(action="create", name="umbrella", content=VALID_SKILL_CONTENT)
|
||||
skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
|
||||
raw = skill_manage(action="delete", name="narrow", absorbed_into="umbrella")
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert "absorbed into 'umbrella'" in result["message"]
|
||||
|
||||
def test_delete_via_dispatcher_rejects_missing_absorbed_target(self, tmp_path):
|
||||
with _skill_dir(tmp_path):
|
||||
skill_manage(action="create", name="narrow", content=VALID_SKILL_CONTENT)
|
||||
raw = skill_manage(action="delete", name="narrow", absorbed_into="ghost")
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is False
|
||||
assert "does not exist" in result["error"]
|
||||
|
||||
|
||||
class TestSecurityScanGate:
|
||||
"""_security_scan_skill is gated by skills.guard_agent_created config flag."""
|
||||
|
|
|
|||
196
tests/tui_gateway/test_goal_command.py
Normal file
196
tests/tui_gateway/test_goal_command.py
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
"""Tests for /goal handling in tui_gateway.
|
||||
|
||||
The TUI routes ``/goal`` through ``command.dispatch`` (not ``slash.exec``)
|
||||
because the CLI's ``_handle_goal_command`` queues the kickoff message onto
|
||||
``_pending_input``, which the slash-worker subprocess has no reader for.
|
||||
Instead we handle ``/goal`` directly in the server and return a
|
||||
``{"type": "send", "notice": ..., "message": ...}`` payload the TUI client
|
||||
uses to render a system line and fire the kickoff prompt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def hermes_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
# Bust the goal-module DB cache so it re-resolves HERMES_HOME.
|
||||
from hermes_cli import goals
|
||||
|
||||
goals._DB_CACHE.clear()
|
||||
yield home
|
||||
goals._DB_CACHE.clear()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def server(hermes_home):
|
||||
with patch.dict(
|
||||
"sys.modules",
|
||||
{
|
||||
"hermes_cli.env_loader": MagicMock(),
|
||||
"hermes_cli.banner": MagicMock(),
|
||||
},
|
||||
):
|
||||
mod = importlib.import_module("tui_gateway.server")
|
||||
yield mod
|
||||
mod._sessions.clear()
|
||||
mod._pending.clear()
|
||||
mod._answers.clear()
|
||||
mod._methods.clear()
|
||||
importlib.reload(mod)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def session(server):
|
||||
sid = "sid-test"
|
||||
session_key = "tui-goal-session-1"
|
||||
s = {
|
||||
"session_key": session_key,
|
||||
"history": [],
|
||||
"history_lock": threading.Lock(),
|
||||
"history_version": 0,
|
||||
"running": False,
|
||||
"attached_images": [],
|
||||
"cols": 120,
|
||||
}
|
||||
server._sessions[sid] = s
|
||||
return sid, session_key, s
|
||||
|
||||
|
||||
def _call(server, method, **params):
|
||||
handler = server._methods[method]
|
||||
return handler(1, params)
|
||||
|
||||
|
||||
# ── command.dispatch /goal ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_goal_bare_shows_status_when_none_set(server, session):
|
||||
sid, _, _ = session
|
||||
r = _call(server, "command.dispatch", name="goal", arg="", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "No active goal" in r["result"]["output"]
|
||||
|
||||
|
||||
def test_goal_whitespace_only_shows_status(server, session):
|
||||
sid, _, _ = session
|
||||
r = _call(server, "command.dispatch", name="goal", arg=" ", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "No active goal" in r["result"]["output"]
|
||||
|
||||
|
||||
def test_goal_status_alias_shows_status(server, session):
|
||||
sid, _, _ = session
|
||||
r = _call(server, "command.dispatch", name="goal", arg="status", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "No active goal" in r["result"]["output"]
|
||||
|
||||
|
||||
def test_goal_set_returns_send_with_notice(server, session):
|
||||
sid, session_key, _ = session
|
||||
r = _call(server, "command.dispatch", name="goal", arg="build a rocket", session_id=sid)
|
||||
result = r["result"]
|
||||
assert result["type"] == "send"
|
||||
assert result["message"] == "build a rocket"
|
||||
assert "notice" in result
|
||||
assert "Goal set" in result["notice"]
|
||||
assert "20-turn budget" in result["notice"]
|
||||
|
||||
# Persisted in SessionDB
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
mgr = GoalManager(session_key)
|
||||
assert mgr.state is not None
|
||||
assert mgr.state.goal == "build a rocket"
|
||||
assert mgr.state.status == "active"
|
||||
|
||||
|
||||
def test_goal_pause_after_set(server, session):
|
||||
sid, session_key, _ = session
|
||||
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
||||
r = _call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "paused" in r["result"]["output"].lower()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
assert GoalManager(session_key).state.status == "paused"
|
||||
|
||||
|
||||
def test_goal_resume_reactivates(server, session):
|
||||
sid, session_key, _ = session
|
||||
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
||||
_call(server, "command.dispatch", name="goal", arg="pause", session_id=sid)
|
||||
r = _call(server, "command.dispatch", name="goal", arg="resume", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "resumed" in r["result"]["output"].lower()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
assert GoalManager(session_key).state.status == "active"
|
||||
|
||||
|
||||
def test_goal_clear_removes_active_goal(server, session):
|
||||
sid, session_key, _ = session
|
||||
_call(server, "command.dispatch", name="goal", arg="write a story", session_id=sid)
|
||||
r = _call(server, "command.dispatch", name="goal", arg="clear", session_id=sid)
|
||||
assert r["result"]["type"] == "exec"
|
||||
assert "cleared" in r["result"]["output"].lower()
|
||||
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
# After clear the row is marked status=cleared (kept for audit);
|
||||
# ``has_goal()`` / ``is_active()`` return False so the goal loop
|
||||
# stays off and ``status`` reports "No active goal".
|
||||
mgr = GoalManager(session_key)
|
||||
assert not mgr.has_goal()
|
||||
assert not mgr.is_active()
|
||||
assert "No active goal" in mgr.status_line()
|
||||
|
||||
|
||||
def test_goal_stop_and_done_are_clear_aliases(server, session):
|
||||
sid, _, _ = session
|
||||
_call(server, "command.dispatch", name="goal", arg="first goal", session_id=sid)
|
||||
r = _call(server, "command.dispatch", name="goal", arg="stop", session_id=sid)
|
||||
assert "cleared" in r["result"]["output"].lower()
|
||||
|
||||
_call(server, "command.dispatch", name="goal", arg="second goal", session_id=sid)
|
||||
r = _call(server, "command.dispatch", name="goal", arg="done", session_id=sid)
|
||||
assert "cleared" in r["result"]["output"].lower()
|
||||
|
||||
|
||||
def test_goal_requires_session(server):
|
||||
r = _call(server, "command.dispatch", name="goal", arg="nope", session_id="unknown")
|
||||
assert "error" in r
|
||||
assert r["error"]["code"] == 4001
|
||||
|
||||
|
||||
# ── slash.exec /goal routing ──────────────────────────────────────────
|
||||
|
||||
|
||||
def test_slash_exec_rejects_goal_routes_to_command_dispatch(server, session):
|
||||
"""slash.exec must reject /goal with 4018 so the TUI client falls through
|
||||
to command.dispatch. Without this, the HermesCLI slash-worker subprocess
|
||||
would set the goal but silently drop the kickoff — the queue is in-proc."""
|
||||
sid, _, _ = session
|
||||
r = _call(server, "slash.exec", command="goal status", session_id=sid)
|
||||
assert "error" in r
|
||||
assert r["error"]["code"] == 4018
|
||||
assert "command.dispatch" in r["error"]["message"]
|
||||
|
||||
|
||||
def test_pending_input_commands_includes_goal(server):
|
||||
"""Guard: _PENDING_INPUT_COMMANDS must list 'goal' — removing it would
|
||||
silently re-break the TUI."""
|
||||
assert "goal" in server._PENDING_INPUT_COMMANDS
|
||||
|
|
@ -94,10 +94,20 @@ _HERMES_ENV_PATH = (
|
|||
)
|
||||
_PROJECT_ENV_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)'
|
||||
_PROJECT_CONFIG_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)'
|
||||
_SHELL_RC_FILES = (
|
||||
r'(?:~|\$home|\$\{home\})/\.'
|
||||
r'(?:bashrc|zshrc|profile|bash_profile|zprofile)\b'
|
||||
)
|
||||
_CREDENTIAL_FILES = (
|
||||
r'(?:~|\$home|\$\{home\})/\.'
|
||||
r'(?:netrc|pgpass|npmrc|pypirc)\b'
|
||||
)
|
||||
_SENSITIVE_WRITE_TARGET = (
|
||||
r'(?:/etc/|/dev/sd|'
|
||||
rf'{_SSH_SENSITIVE_PATH}|'
|
||||
rf'{_HERMES_ENV_PATH})'
|
||||
rf'{_HERMES_ENV_PATH}|'
|
||||
rf'{_SHELL_RC_FILES}|'
|
||||
rf'{_CREDENTIAL_FILES})'
|
||||
)
|
||||
_PROJECT_SENSITIVE_WRITE_TARGET = rf'(?:{_PROJECT_ENV_PATH}|{_PROJECT_CONFIG_PATH})'
|
||||
_COMMAND_TAIL = r'(?:\s*(?:&&|\|\||;).*)?$'
|
||||
|
|
|
|||
|
|
@ -1097,7 +1097,25 @@ def _handle_read_file(args, **kw):
|
|||
|
||||
def _handle_write_file(args, **kw):
|
||||
tid = kw.get("task_id") or "default"
|
||||
return write_file_tool(path=args.get("path", ""), content=args.get("content", ""), task_id=tid)
|
||||
if not args.get("path") or not isinstance(args.get("path"), str):
|
||||
return tool_error(
|
||||
"write_file: missing required field 'path'. Re-emit the tool call with "
|
||||
"both 'path' and 'content' set."
|
||||
)
|
||||
if "content" not in args:
|
||||
return tool_error(
|
||||
"write_file: missing required field 'content'. The tool call included a "
|
||||
"path but no content argument — this is almost always a dropped-arg bug "
|
||||
"under context pressure. Re-emit the tool call with the full content "
|
||||
"payload, or use execute_code with hermes_tools.write_file() for very "
|
||||
"large files."
|
||||
)
|
||||
if not isinstance(args["content"], str):
|
||||
return tool_error(
|
||||
f"write_file: 'content' must be a string, got "
|
||||
f"{type(args['content']).__name__}."
|
||||
)
|
||||
return write_file_tool(path=args["path"], content=args["content"], task_id=tid)
|
||||
|
||||
|
||||
def _handle_patch(args, **kw):
|
||||
|
|
|
|||
|
|
@ -560,8 +560,18 @@ def _patch_skill(
|
|||
}
|
||||
|
||||
|
||||
def _delete_skill(name: str) -> Dict[str, Any]:
|
||||
"""Delete a skill."""
|
||||
def _delete_skill(name: str, absorbed_into: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Delete a skill.
|
||||
|
||||
``absorbed_into`` declares intent:
|
||||
- ``None`` / missing → caller didn't declare (legacy / non-curator path);
|
||||
accepted for backward compat but logs a warning because the curator
|
||||
classification pipeline can't tell consolidation from pruning without it.
|
||||
- ``""`` (empty) → explicit "truly pruned, no forwarding target".
|
||||
- ``"<skill-name>"`` → content was absorbed into that umbrella; the
|
||||
target must exist on disk. Validated here so the model can't claim an
|
||||
umbrella that doesn't exist.
|
||||
"""
|
||||
existing = _find_skill(name)
|
||||
if not existing:
|
||||
return {"success": False, "error": f"Skill '{name}' not found."}
|
||||
|
|
@ -570,6 +580,24 @@ def _delete_skill(name: str) -> Dict[str, Any]:
|
|||
if pinned_err:
|
||||
return {"success": False, "error": pinned_err}
|
||||
|
||||
# Validate absorbed_into target when declared non-empty
|
||||
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
|
||||
target_name = absorbed_into.strip()
|
||||
if target_name == name:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"absorbed_into='{target_name}' cannot equal the skill being deleted.",
|
||||
}
|
||||
target = _find_skill(target_name)
|
||||
if not target:
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
f"absorbed_into='{target_name}' does not exist. "
|
||||
f"Create or patch the umbrella skill first, then retry the delete."
|
||||
),
|
||||
}
|
||||
|
||||
skill_dir = existing["path"]
|
||||
skills_root = _containing_skills_root(skill_dir)
|
||||
shutil.rmtree(skill_dir)
|
||||
|
|
@ -579,9 +607,13 @@ def _delete_skill(name: str) -> Dict[str, Any]:
|
|||
if parent != skills_root and parent.exists() and not any(parent.iterdir()):
|
||||
parent.rmdir()
|
||||
|
||||
message = f"Skill '{name}' deleted."
|
||||
if absorbed_into is not None and isinstance(absorbed_into, str) and absorbed_into.strip():
|
||||
message += f" Content absorbed into '{absorbed_into.strip()}'."
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": f"Skill '{name}' deleted.",
|
||||
"message": message,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -702,6 +734,7 @@ def skill_manage(
|
|||
old_string: str = None,
|
||||
new_string: str = None,
|
||||
replace_all: bool = False,
|
||||
absorbed_into: str = None,
|
||||
) -> str:
|
||||
"""
|
||||
Manage user-created skills. Dispatches to the appropriate action handler.
|
||||
|
|
@ -726,7 +759,7 @@ def skill_manage(
|
|||
result = _patch_skill(name, old_string, new_string, file_path, replace_all)
|
||||
|
||||
elif action == "delete":
|
||||
result = _delete_skill(name)
|
||||
result = _delete_skill(name, absorbed_into=absorbed_into)
|
||||
|
||||
elif action == "write_file":
|
||||
if not file_path:
|
||||
|
|
@ -778,6 +811,13 @@ SKILL_MANAGE_SCHEMA = {
|
|||
"patch (old_string/new_string — preferred for fixes), "
|
||||
"edit (full SKILL.md rewrite — major overhauls only), "
|
||||
"delete, write_file, remove_file.\n\n"
|
||||
"On delete, pass `absorbed_into=<umbrella>` when you're merging this "
|
||||
"skill's content into another one, or `absorbed_into=\"\"` when you're "
|
||||
"pruning it with no forwarding target. This lets the curator tell "
|
||||
"consolidation from pruning without guessing, so downstream consumers "
|
||||
"(cron jobs that reference the old skill name, etc.) get updated "
|
||||
"correctly. The target you name in `absorbed_into` must already "
|
||||
"exist — create/patch the umbrella first, then delete.\n\n"
|
||||
"Create when: complex task succeeded (5+ calls), errors overcome, "
|
||||
"user-corrected approach worked, non-trivial workflow discovered, "
|
||||
"or user asks you to remember a procedure.\n"
|
||||
|
|
@ -855,6 +895,20 @@ SKILL_MANAGE_SCHEMA = {
|
|||
"type": "string",
|
||||
"description": "Content for the file. Required for 'write_file'."
|
||||
},
|
||||
"absorbed_into": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"For 'delete' only — declares intent so the curator can "
|
||||
"tell consolidation from pruning without guessing. "
|
||||
"Pass the umbrella skill name when this skill's content "
|
||||
"was merged into another (the target must already exist). "
|
||||
"Pass an empty string when the skill is truly stale and "
|
||||
"being pruned with no forwarding target. Omitting the arg "
|
||||
"on delete is supported for backward compatibility but "
|
||||
"downstream tooling (e.g. cron-job skill reference "
|
||||
"rewriting) will have to guess at intent."
|
||||
)
|
||||
},
|
||||
},
|
||||
"required": ["action", "name"],
|
||||
},
|
||||
|
|
@ -877,6 +931,7 @@ registry.register(
|
|||
file_content=args.get("file_content"),
|
||||
old_string=args.get("old_string"),
|
||||
new_string=args.get("new_string"),
|
||||
replace_all=args.get("replace_all", False)),
|
||||
replace_all=args.get("replace_all", False),
|
||||
absorbed_into=args.get("absorbed_into")),
|
||||
emoji="📝",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3128,6 +3128,7 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
|||
def run():
|
||||
approval_token = None
|
||||
session_tokens = []
|
||||
goal_followup = None # set by the post-turn goal hook below
|
||||
try:
|
||||
from tools.approval import (
|
||||
reset_current_session_key,
|
||||
|
|
@ -3294,6 +3295,55 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
|||
payload["rendered"] = rendered
|
||||
_emit("message.complete", sid, payload)
|
||||
|
||||
# ── /goal continuation (Ralph-style loop) ─────────────────
|
||||
# After every TUI turn, if a /goal is active, ask the judge
|
||||
# whether the goal is done and — if not and we're still under
|
||||
# budget — queue a continuation prompt to run after this
|
||||
# thread releases session["running"]. The verdict message
|
||||
# ("✓ Goal achieved" / "⏸ budget exhausted") is surfaced as
|
||||
# a system line so the user sees progress regardless of
|
||||
# outcome. Mirrors gateway/run._post_turn_goal_continuation.
|
||||
if (
|
||||
status == "complete"
|
||||
and isinstance(raw, str)
|
||||
and raw.strip()
|
||||
):
|
||||
try:
|
||||
from hermes_cli.goals import GoalManager
|
||||
|
||||
sid_key = session.get("session_key") or ""
|
||||
if sid_key:
|
||||
try:
|
||||
goals_cfg = (_load_cfg().get("goals") or {})
|
||||
goal_max_turns = int(goals_cfg.get("max_turns", 20) or 20)
|
||||
except Exception:
|
||||
goal_max_turns = 20
|
||||
goal_mgr = GoalManager(
|
||||
session_id=sid_key,
|
||||
default_max_turns=goal_max_turns,
|
||||
)
|
||||
if goal_mgr.is_active():
|
||||
decision = goal_mgr.evaluate_after_turn(
|
||||
raw, user_initiated=True,
|
||||
)
|
||||
verdict_msg = decision.get("message") or ""
|
||||
if verdict_msg:
|
||||
_emit(
|
||||
"status.update",
|
||||
sid,
|
||||
{"kind": "goal", "text": verdict_msg},
|
||||
)
|
||||
if decision.get("should_continue"):
|
||||
cont_prompt = decision.get("continuation_prompt") or ""
|
||||
if cont_prompt:
|
||||
goal_followup = cont_prompt
|
||||
except Exception as _goal_exc:
|
||||
print(
|
||||
f"[tui_gateway] goal continuation hook failed: "
|
||||
f"{type(_goal_exc).__name__}: {_goal_exc}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
# Apply pending_title now that the DB row exists.
|
||||
_pending = session.get("pending_title")
|
||||
if _pending and status == "complete":
|
||||
|
|
@ -3375,6 +3425,31 @@ def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
|||
session["running"] = False
|
||||
_emit("session.info", sid, _session_info(agent, session))
|
||||
|
||||
# Chain a goal-continuation turn if the judge said so. We do
|
||||
# this AFTER the finally releases session["running"], so the
|
||||
# nested _run_prompt_submit doesn't deadlock on the busy
|
||||
# guard. A real user prompt that races us wins because
|
||||
# prompt.submit sets running=True under the history_lock and
|
||||
# we check that guard before re-firing.
|
||||
if goal_followup:
|
||||
with session["history_lock"]:
|
||||
if session.get("running"):
|
||||
# User already sent something — their turn wins,
|
||||
# the judge will re-run on the next turn anyway.
|
||||
return
|
||||
session["running"] = True
|
||||
try:
|
||||
_emit("message.start", sid)
|
||||
_run_prompt_submit(rid, sid, session, goal_followup)
|
||||
except Exception as _cont_exc:
|
||||
print(
|
||||
f"[tui_gateway] goal continuation dispatch failed: "
|
||||
f"{type(_cont_exc).__name__}: {_cont_exc}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
with session["history_lock"]:
|
||||
session["running"] = False
|
||||
|
||||
threading.Thread(target=run, daemon=True).start()
|
||||
|
||||
|
||||
|
|
@ -4366,6 +4441,7 @@ _PENDING_INPUT_COMMANDS: frozenset[str] = frozenset(
|
|||
"q",
|
||||
"steer",
|
||||
"plan",
|
||||
"goal",
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -4678,6 +4754,77 @@ def _(rid, params: dict) -> dict:
|
|||
# Fallback: no active run, treat as next-turn message
|
||||
return _ok(rid, {"type": "send", "message": arg})
|
||||
|
||||
if name == "goal":
|
||||
if not session:
|
||||
return _err(rid, 4001, "no active session")
|
||||
try:
|
||||
from hermes_cli.goals import GoalManager
|
||||
except Exception as exc:
|
||||
return _err(rid, 5030, f"goals unavailable: {exc}")
|
||||
|
||||
sid_key = session.get("session_key") or ""
|
||||
if not sid_key:
|
||||
return _err(rid, 4001, "no session key")
|
||||
|
||||
try:
|
||||
goals_cfg = (_load_cfg().get("goals") or {})
|
||||
max_turns = int(goals_cfg.get("max_turns", 20) or 20)
|
||||
except Exception:
|
||||
max_turns = 20
|
||||
mgr = GoalManager(session_id=sid_key, default_max_turns=max_turns)
|
||||
|
||||
lower = arg.strip().lower()
|
||||
if not arg.strip() or lower == "status":
|
||||
return _ok(rid, {"type": "exec", "output": mgr.status_line()})
|
||||
if lower == "pause":
|
||||
state = mgr.pause(reason="user-paused")
|
||||
out = "No goal set." if state is None else f"⏸ Goal paused: {state.goal}"
|
||||
return _ok(rid, {"type": "exec", "output": out})
|
||||
if lower == "resume":
|
||||
state = mgr.resume()
|
||||
if state is None:
|
||||
return _ok(rid, {"type": "exec", "output": "No goal to resume."})
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"type": "exec",
|
||||
"output": (
|
||||
f"▶ Goal resumed: {state.goal}\n"
|
||||
"Send any message to continue, or wait — I'll take the next step on the next turn."
|
||||
),
|
||||
},
|
||||
)
|
||||
if lower in ("clear", "stop", "done"):
|
||||
had = mgr.has_goal()
|
||||
mgr.clear()
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"type": "exec",
|
||||
"output": "✓ Goal cleared." if had else "No active goal.",
|
||||
},
|
||||
)
|
||||
|
||||
# Otherwise — treat the remaining text as the new goal.
|
||||
try:
|
||||
state = mgr.set(arg)
|
||||
except ValueError as exc:
|
||||
return _err(rid, 4004, f"invalid goal: {exc}")
|
||||
|
||||
notice = (
|
||||
f"⊙ Goal set ({state.max_turns}-turn budget): {state.goal}\n"
|
||||
"I'll keep working until the goal is done, you pause/clear it, or the budget is exhausted.\n"
|
||||
"Controls: /goal status · /goal pause · /goal resume · /goal clear"
|
||||
)
|
||||
# Send the goal text as the kickoff prompt. The TUI client sees
|
||||
# {type: send, notice, message} → renders `notice` as a sys line,
|
||||
# then submits `message` as a user turn. The post-turn judge
|
||||
# wired in _run_prompt_submit takes over from there.
|
||||
return _ok(
|
||||
rid,
|
||||
{"type": "send", "notice": notice, "message": state.goal},
|
||||
)
|
||||
|
||||
return _err(rid, 4018, f"not a quick/plugin/skill command: {name}")
|
||||
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue