Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-16 08:23:20 -05:00
commit f81dba0da2
128 changed files with 8357 additions and 842 deletions

View file

@ -24,6 +24,15 @@
# Optional base URL override (default: Google's OpenAI-compatible endpoint)
# GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai
# =============================================================================
# LLM PROVIDER (Ollama Cloud)
# =============================================================================
# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
# Get your key at: https://ollama.com/settings
# OLLAMA_API_KEY=your_ollama_key_here
# Optional base URL override (default: https://ollama.com/v1)
# OLLAMA_BASE_URL=https://ollama.com/v1
# =============================================================================
# LLM PROVIDER (z.ai / GLM)
# =============================================================================

View file

@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
"google": "gemini",
"google-gemini": "gemini",
"google-ai-studio": "gemini",
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
"glm": "zai",
"z-ai": "zai",
"z.ai": "zai",
@ -104,6 +107,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"opencode-zen": "gemini-3-flash",
"opencode-go": "glm-5",
"kilocode": "google/gemini-3-flash-preview",
"ollama-cloud": "nemotron-3-nano:30b",
}
# Vision-specific model overrides for direct providers.

View file

@ -600,6 +600,45 @@ class KawaiiSpinner:
"analyzing", "computing", "synthesizing", "formulating", "brainstorming",
]
@classmethod
def get_waiting_faces(cls) -> list:
"""Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("waiting_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_WAITING
@classmethod
def get_thinking_faces(cls) -> list:
"""Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
try:
skin = _get_skin()
if skin:
faces = skin.spinner.get("thinking_faces", [])
if faces:
return faces
except Exception:
pass
return cls.KAWAII_THINKING
@classmethod
def get_thinking_verbs(cls) -> list:
"""Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
try:
skin = _get_skin()
if skin:
verbs = skin.spinner.get("thinking_verbs", [])
if verbs:
return verbs
except Exception:
pass
return cls.THINKING_VERBS
def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
self.message = message
self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])

View file

@ -28,6 +28,7 @@ Usage in run_agent.py:
from __future__ import annotations
import json
import logging
import re
from typing import Any, Dict, List, Optional
@ -43,11 +44,22 @@ logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
_FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
_INTERNAL_CONTEXT_RE = re.compile(
r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
re.IGNORECASE,
)
_INTERNAL_NOTE_RE = re.compile(
r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
re.IGNORECASE,
)
def sanitize_context(text: str) -> str:
"""Strip fence-escape sequences from provider output."""
return _FENCE_TAG_RE.sub('', text)
"""Strip fence tags, injected context blocks, and system notes from provider output."""
text = _INTERNAL_CONTEXT_RE.sub('', text)
text = _INTERNAL_NOTE_RE.sub('', text)
text = _FENCE_TAG_RE.sub('', text)
return text
def build_memory_context_block(raw_context: str) -> str:

View file

@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
# are preserved so the full model name reaches cache lookups and server queries.
_PROVIDER_PREFIXES: frozenset[str] = frozenset({
"openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
"gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
"opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
"qwen-oauth",
"xiaomi",
@ -33,6 +33,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"google", "google-gemini", "google-ai-studio",
"glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
"github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
"ollama",
"opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
@ -239,6 +240,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.x.ai": "xai",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
}

View file

@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
"togetherai": "togetherai",
"perplexity": "perplexity",
"cohere": "cohere",
"ollama-cloud": "ollama-cloud",
}
# Reverse mapping: models.dev → Hermes (built lazily)

View file

@ -295,7 +295,9 @@ PLATFORM_HINTS = {
),
"telegram": (
"You are on a text messaging communication platform, Telegram. "
"Please do not use markdown as it does not render. "
"Standard markdown is automatically converted to Telegram format. "
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
"`inline code`, ```code blocks```, [links](url), and ## headers. "
"You can send media files natively: to deliver a file to the user, "
"include MEDIA:/absolute/path/to/file in your response. Images "
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "

View file

@ -72,7 +72,14 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
skill_name = str(loaded_skill.get("name") or normalized)
skill_path = str(loaded_skill.get("path") or "")
skill_dir = None
if skill_path:
# Prefer the absolute skill_dir returned by skill_view() — this is
# correct for both local and external skills. Fall back to the old
# SKILLS_DIR-relative reconstruction only when skill_dir is absent
# (e.g. legacy skill_view responses).
abs_skill_dir = loaded_skill.get("skill_dir")
if abs_skill_dir:
skill_dir = Path(abs_skill_dir)
elif skill_path:
try:
skill_dir = SKILLS_DIR / Path(skill_path).parent
except Exception:

View file

@ -16,7 +16,7 @@ model:
# "nous" - Nous Portal OAuth (requires: hermes login)
# "nous-api" - Nous Portal API key (requires: NOUS_API_KEY)
# "anthropic" - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
# "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
# "openai-codex" - OpenAI Codex (requires: hermes auth)
# "copilot" - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
# "gemini" - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "zai" - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@ -26,6 +26,7 @@ model:
# "huggingface" - Hugging Face Inference (requires: HF_TOKEN)
# "xiaomi" - Xiaomi MiMo (requires: XIAOMI_API_KEY)
# "arcee" - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
#
@ -37,12 +38,6 @@ model:
# base_url: "http://localhost:1234/v1"
# No API key needed — local servers typically ignore auth.
#
# For Ollama Cloud (https://ollama.com/pricing):
# provider: "custom"
# base_url: "https://ollama.com/v1"
# Set OLLAMA_API_KEY in .env — automatically picked up when base_url
# points to ollama.com.
#
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
provider: "auto"
@ -337,6 +332,7 @@ compression:
# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
# "nous" - Force Nous Portal (requires: hermes login)
# "gemini" - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
# "codex" - Force Codex OAuth (requires: hermes model → Codex).
# Uses gpt-5.3-codex which supports vision.
# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@ -564,6 +560,18 @@ platform_toolsets:
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
# =============================================================================
# Gateway Platform Settings
# =============================================================================
# Optional per-platform messaging settings.
# Platform-specific knobs live under `extra`.
#
# platforms:
# telegram:
# reply_to_mode: "first" # off | first | all
# extra:
# disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages
# ─────────────────────────────────────────────────────────────────────────────
# Available toolsets (use these names in platform_toolsets or the toolsets list)
#

69
cli.py
View file

@ -2057,7 +2057,17 @@ class HermesCLI:
"""Return the visible height for the spinner/status text line above the status bar."""
if not getattr(self, "_spinner_text", ""):
return 0
return 0 if self._use_minimal_tui_chrome(width=width) else 1
if self._use_minimal_tui_chrome(width=width):
return 0
# Compute how many lines the spinner text needs when wrapped.
# The rendered text is " {emoji} {label} ({elapsed})" — about
# len(_spinner_text) + 16 chars for indent + timer suffix.
width = width or self._get_tui_terminal_width()
if width and width > 10:
import math
text_len = len(self._spinner_text) + 16 # indent + timer
return max(1, math.ceil(text_len / width))
return 1
def _get_voice_status_fragments(self, width: Optional[int] = None):
"""Return the voice status bar fragments for the interactive TUI."""
@ -4001,23 +4011,14 @@ class HermesCLI:
def _handle_profile_command(self):
"""Display active profile name and home directory."""
from hermes_constants import get_hermes_home, display_hermes_home
from hermes_constants import display_hermes_home
from hermes_cli.profiles import get_active_profile_name
home = get_hermes_home()
display = display_hermes_home()
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
profile_name = get_active_profile_name()
print()
if profile_name:
print(f" Profile: {profile_name}")
else:
print(" Profile: default")
print(f" Profile: {profile_name}")
print(f" Home: {display}")
print()
@ -5599,7 +5600,8 @@ class HermesCLI:
version = f" v{p['version']}" if p["version"] else ""
tools = f"{p['tools']} tools" if p["tools"] else ""
hooks = f"{p['hooks']} hooks" if p["hooks"] else ""
parts = [x for x in [tools, hooks] if x]
commands = f"{p['commands']} commands" if p.get("commands") else ""
parts = [x for x in [tools, hooks, commands] if x]
detail = f" ({', '.join(parts)})" if parts else ""
error = f"{p['error']}" if p["error"] else ""
print(f" {status} {p['name']}{version}{detail}{error}")
@ -7866,7 +7868,33 @@ class HermesCLI:
# Fallback for non-interactive mode (e.g., single-query)
agent_thread.join(0.1)
agent_thread.join() # Ensure agent thread completes
# Wait for the agent thread to finish. After an interrupt the
# agent may take a few seconds to clean up (kill subprocess, persist
# session). Poll instead of a blocking join so the process_loop
# stays responsive — if the user sent another interrupt or the
# agent gets stuck, we can break out instead of freezing forever.
if interrupt_msg is not None:
# Interrupt path: poll briefly, then move on. The agent
# thread is daemon — it dies on process exit regardless.
for _wait_tick in range(50): # 50 * 0.2s = 10s max
agent_thread.join(timeout=0.2)
if not agent_thread.is_alive():
break
# Check if user fired ANOTHER interrupt (Ctrl+C sets
# _should_exit which process_loop checks on next pass).
if getattr(self, '_should_exit', False):
break
if agent_thread.is_alive():
logger.warning(
"Agent thread still alive after interrupt "
"(thread %s). Daemon thread will be cleaned up "
"on exit.",
agent_thread.ident,
)
else:
# Normal completion: agent thread should be done already,
# but guard against edge cases.
agent_thread.join(timeout=30)
# Proactively clean up async clients whose event loop is dead.
# The agent thread may have created AsyncOpenAI clients bound
@ -9159,6 +9187,7 @@ class HermesCLI:
spinner_widget = Window(
content=FormattedTextControl(get_spinner_text),
height=get_spinner_height,
wrap_lines=True,
)
spacer = Window(
@ -10118,6 +10147,11 @@ def main(
):
cli.agent.quiet_mode = True
cli.agent.suppress_status_output = True
# Suppress streaming display callbacks so stdout stays
# machine-readable (no styled "Hermes" box, no tool-gen
# status lines). The response is printed once below.
cli.agent.stream_delta_callback = None
cli.agent.tool_gen_callback = None
result = cli.agent.run_conversation(
user_message=effective_query,
conversation_history=cli.conversation_history,
@ -10125,7 +10159,8 @@ def main(
response = result.get("final_response", "") if isinstance(result, dict) else str(result)
if response:
print(response)
print(f"\nsession_id: {cli.session_id}")
# Session ID goes to stderr so piped stdout is clean.
print(f"\nsession_id: {cli.session_id}", file=sys.stderr)
# Ensure proper exit code for automation wrappers
sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)

View file

@ -837,6 +837,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
)
final_response = result.get("final_response", "") or ""
# Strip leaked placeholder text that upstream may inject on empty completions.
if final_response.strip() == "(No response generated)":
final_response = ""
# Use a separate variable for log display; keep final_response clean
# for delivery logic (empty response = no delivery).
logged_response = final_response if final_response else "(No response generated)"

View file

@ -638,6 +638,18 @@ def load_gateway_config() -> GatewayConfig:
os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
if "disable_link_previews" in telegram_cfg:
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[Platform.TELEGRAM.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]
whatsapp_cfg = yaml_cfg.get("whatsapp", {})
if isinstance(whatsapp_cfg, dict):

View file

@ -902,7 +902,7 @@ class APIServerAdapter(BasePlatformAdapter):
return time.monotonic()
# Stream content chunks as they arrive from the agent
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
while True:
try:
delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@ -1241,7 +1241,7 @@ class APIServerAdapter(BasePlatformAdapter):
await _emit_text_delta(it)
# Other types (non-string, non-tuple) are silently dropped.
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
while True:
try:
item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@ -2004,7 +2004,7 @@ class APIServerAdapter(BasePlatformAdapter):
callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
another thread to stop in-progress LLM calls.
"""
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
def _run():
agent = self._create_agent(

View file

@ -734,25 +734,56 @@ def merge_pending_message_event(
pending_messages: Dict[str, MessageEvent],
session_key: str,
event: MessageEvent,
*,
merge_text: bool = False,
) -> None:
"""Store or merge a pending event for a session.
Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
events. Merge those into the existing queued event so the next turn sees
the whole burst, while non-photo follow-ups still replace the pending
event normally.
the whole burst.
When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
instead of replacing the pending turn. This is used for Telegram bursty
follow-ups so a multi-part user thought is not silently truncated to only
the last queued fragment.
"""
existing = pending_messages.get(session_key)
if (
existing
and getattr(existing, "message_type", None) == MessageType.PHOTO
and event.message_type == MessageType.PHOTO
):
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing:
existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
incoming_is_photo = event.message_type == MessageType.PHOTO
existing_has_media = bool(existing.media_urls)
incoming_has_media = bool(event.media_urls)
if existing_is_photo and incoming_is_photo:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
return
if existing_has_media or incoming_has_media:
if incoming_has_media:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
else:
existing.text = event.text
if existing_is_photo or incoming_is_photo:
existing.message_type = MessageType.PHOTO
return
if (
merge_text
and getattr(existing, "message_type", None) == MessageType.TEXT
and event.message_type == MessageType.TEXT
):
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
return
pending_messages[session_key] = event
@ -839,6 +870,11 @@ class BasePlatformAdapter(ABC):
# Gateway shutdown cancels these so an old gateway instance doesn't keep
# working on a task after --replace or manual restarts.
self._background_tasks: set[asyncio.Task] = set()
# One-shot callbacks to fire after the main response is delivered.
# Keyed by session_key. GatewayRunner uses this to defer
# background-review notifications ("💾 Skill created") until the
# primary reply has been sent.
self._post_delivery_callbacks: Dict[str, Callable] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
@ -1905,6 +1941,14 @@ class BasePlatformAdapter(ABC):
except Exception:
pass # Last resort — don't let error reporting crash the handler
finally:
# Fire any one-shot post-delivery callback registered for this
# session (e.g. deferred background-review notifications).
_post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
if callable(_post_cb):
try:
_post_cb()
except Exception:
pass
# Stop typing indicator
typing_task.cancel()
try:

View file

@ -366,6 +366,20 @@ class SlackAdapter(BasePlatformAdapter):
# in an assistant-enabled context. Falls back to reactions.
logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)
def _dm_top_level_threads_as_sessions(self) -> bool:
"""Whether top-level Slack DMs get per-message session threads.
Defaults to ``True`` so each visible DM reply thread is isolated as its
own Hermes session matching the per-thread behavior channels already
have. Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
to ``false`` in config.yaml to revert to the legacy behavior where all
top-level DMs share one continuous session.
"""
raw = self.config.extra.get("dm_top_level_threads_as_sessions")
if raw is None:
return True # default: each DM thread is its own session
return str(raw).strip().lower() in ("1", "true", "yes", "on")
def _resolve_thread_ts(
self,
reply_to: Optional[str] = None,
@ -996,10 +1010,14 @@ class SlackAdapter(BasePlatformAdapter):
# Build thread_ts for session keying.
# In channels: fall back to ts so each top-level @mention starts a
# new thread/session (the bot always replies in a thread).
# In DMs: only use the real thread_ts — top-level DMs should share
# one continuous session, threaded DMs get their own session.
# In DMs: fall back to ts so each top-level DM reply thread gets
# its own session key (matching channel behavior). Set
# dm_top_level_threads_as_sessions: false in config to revert to
# legacy single-session-per-DM-channel behavior.
if is_dm:
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts") # None for top-level DMs
thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
if not thread_ts and self._dm_top_level_threads_as_sessions():
thread_ts = ts
else:
thread_ts = event.get("thread_ts") or ts # ts fallback for channels

View file

@ -18,6 +18,10 @@ logger = logging.getLogger(__name__)
try:
from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
try:
from telegram import LinkPreviewOptions
except ImportError:
LinkPreviewOptions = None
from telegram.ext import (
Application,
CommandHandler,
@ -36,6 +40,7 @@ except ImportError:
Message = Any
InlineKeyboardButton = Any
InlineKeyboardMarkup = Any
LinkPreviewOptions = None
Application = Any
CommandHandler = Any
CallbackQueryHandler = Any
@ -129,6 +134,7 @@ class TelegramAdapter(BasePlatformAdapter):
# When a chunk is near this limit, a continuation is almost certain.
_SPLIT_THRESHOLD = 4000
MEDIA_GROUP_WAIT_SECONDS = 0.8
_GENERAL_TOPIC_THREAD_ID = "1"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
@ -137,6 +143,7 @@ class TelegramAdapter(BasePlatformAdapter):
self._webhook_mode: bool = False
self._mention_patterns = self._compile_mention_patterns()
self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@ -172,6 +179,29 @@ class TelegramAdapter(BasePlatformAdapter):
allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
return "*" in allowed_ids or user_id in allowed_ids
@classmethod
def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
if not metadata:
return None
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
return str(thread_id) if thread_id is not None else None
@classmethod
def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
return None
return int(thread_id)
@classmethod
def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
if not thread_id:
return None
return int(thread_id)
@staticmethod
def _is_thread_not_found_error(error: Exception) -> bool:
return "thread not found" in str(error).lower()
def _fallback_ips(self) -> list[str]:
"""Return validated fallback IPs from config (populated by _apply_env_overrides)."""
configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@ -202,6 +232,26 @@ class TelegramAdapter(BasePlatformAdapter):
pass
return isinstance(error, OSError)
def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return default
if isinstance(value, str):
lowered = value.strip().lower()
if lowered in ("true", "1", "yes", "on"):
return True
if lowered in ("false", "0", "no", "off"):
return False
return default
return bool(value)
def _link_preview_kwargs(self) -> Dict[str, Any]:
if not getattr(self, "_disable_link_previews", False):
return {}
if LinkPreviewOptions is not None:
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
return {"disable_web_page_preview": True}
async def _handle_polling_network_error(self, error: Exception) -> None:
"""Reconnect polling after a transient network interruption.
@ -549,7 +599,7 @@ class TelegramAdapter(BasePlatformAdapter):
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
}
proxy_url = resolve_proxy_url()
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
fallback_ips = self._fallback_ips()
if not fallback_ips:
@ -615,14 +665,14 @@ class TelegramAdapter(BasePlatformAdapter):
from telegram.error import NetworkError, TimedOut
except ImportError:
NetworkError = TimedOut = OSError # type: ignore[misc,assignment]
_max_connect = 3
_max_connect = 8
for _attempt in range(_max_connect):
try:
await self._app.initialize()
break
except (NetworkError, TimedOut, OSError) as init_err:
if _attempt < _max_connect - 1:
wait = 2 ** _attempt
wait = min(2 ** _attempt, 15)
logger.warning(
"[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
self.name, _attempt + 1, _max_connect, init_err, wait,
@ -823,7 +873,7 @@ class TelegramAdapter(BasePlatformAdapter):
]
message_ids = []
thread_id = metadata.get("thread_id") if metadata else None
thread_id = self._metadata_thread_id(metadata)
try:
from telegram.error import NetworkError as _NetErr
@ -843,7 +893,7 @@ class TelegramAdapter(BasePlatformAdapter):
for i, chunk in enumerate(chunks):
should_thread = self._should_thread_reply(reply_to, i)
reply_to_id = int(reply_to) if should_thread else None
effective_thread_id = int(thread_id) if thread_id else None
effective_thread_id = self._message_thread_id_for_send(thread_id)
msg = None
for _send_attempt in range(3):
@ -856,6 +906,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
except Exception as md_error:
# Markdown parsing failed, try plain text
@ -868,6 +919,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=None,
reply_to_message_id=reply_to_id,
message_thread_id=effective_thread_id,
**self._link_preview_kwargs(),
)
else:
raise
@ -878,8 +930,7 @@ class TelegramAdapter(BasePlatformAdapter):
# (not transient network issues). Detect and handle
# specific cases instead of blindly retrying.
if _BadReq and isinstance(send_err, _BadReq):
err_lower = str(send_err).lower()
if "thread not found" in err_lower and effective_thread_id is not None:
if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
# Thread doesn't exist — retry without
# message_thread_id so the message still
# reaches the chat.
@ -889,6 +940,7 @@ class TelegramAdapter(BasePlatformAdapter):
)
effective_thread_id = None
continue
err_lower = str(send_err).lower()
if "message to be replied not found" in err_lower and reply_to_id is not None:
# Original message was deleted before we
# could reply — clear reply target and retry
@ -1055,6 +1107,7 @@ class TelegramAdapter(BasePlatformAdapter):
text=text,
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
**self._link_preview_kwargs(),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1076,16 +1129,17 @@ class TelegramAdapter(BasePlatformAdapter):
try:
cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
# Escape backticks that would break Markdown v1 inline code parsing
safe_cmd = cmd_preview.replace("`", "'")
safe_desc = description.replace("`", "'").replace("*", "")
text = (
f"⚠️ *Command Approval Required*\n\n"
f"`{cmd_preview}`\n\n"
f"Reason: {description}"
f"`{safe_cmd}`\n\n"
f"Reason: {safe_desc}"
)
# Resolve thread context for thread replies
thread_id = None
if metadata:
thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
thread_id = self._metadata_thread_id(metadata)
# We'll use the message_id as part of callback_data to look up session_key
# Send a placeholder first, then update — or use a counter.
@ -1111,9 +1165,11 @@ class TelegramAdapter(BasePlatformAdapter):
"text": text,
"parse_mode": ParseMode.MARKDOWN,
"reply_markup": keyboard,
**self._link_preview_kwargs(),
}
if thread_id:
kwargs["message_thread_id"] = int(thread_id)
message_thread_id = self._message_thread_id_for_send(thread_id)
if message_thread_id is not None:
kwargs["message_thread_id"] = message_thread_id
msg = await self._bot.send_message(**kwargs)
@ -1181,6 +1237,7 @@ class TelegramAdapter(BasePlatformAdapter):
parse_mode=ParseMode.MARKDOWN,
reply_markup=keyboard,
message_thread_id=int(thread_id) if thread_id else None,
**self._link_preview_kwargs(),
)
# Store picker state keyed by chat_id
@ -1545,23 +1602,23 @@ class TelegramAdapter(BasePlatformAdapter):
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
if audio_path.endswith((".ogg", ".opus")):
_voice_thread = metadata.get("thread_id") if metadata else None
_voice_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_voice(
chat_id=int(chat_id),
voice=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_voice_thread) if _voice_thread else None,
message_thread_id=self._message_thread_id_for_send(_voice_thread),
)
else:
# .mp3 and others -> send as audio file
_audio_thread = metadata.get("thread_id") if metadata else None
_audio_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_audio(
chat_id=int(chat_id),
audio=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_audio_thread) if _audio_thread else None,
message_thread_id=self._message_thread_id_for_send(_audio_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1591,14 +1648,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(image_path):
return SendResult(success=False, error=f"Image file not found: {image_path}")
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(image_path, "rb") as image_file:
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1629,7 +1686,7 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
@ -1638,7 +1695,7 @@ class TelegramAdapter(BasePlatformAdapter):
filename=display_name,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1662,14 +1719,14 @@ class TelegramAdapter(BasePlatformAdapter):
if not os.path.exists(video_path):
return SendResult(success=False, error=f"Video file not found: {video_path}")
_thread = metadata.get("thread_id") if metadata else None
_thread = self._metadata_thread_id(metadata)
with open(video_path, "rb") as f:
msg = await self._bot.send_video(
chat_id=int(chat_id),
video=f,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_thread) if _thread else None,
message_thread_id=self._message_thread_id_for_send(_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1699,13 +1756,13 @@ class TelegramAdapter(BasePlatformAdapter):
try:
# Telegram can send photos directly from URLs (up to ~5MB)
_photo_thread = metadata.get("thread_id") if metadata else None
_photo_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_url,
caption=caption[:1024] if caption else None, # Telegram caption limit
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_photo_thread) if _photo_thread else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1728,6 +1785,7 @@ class TelegramAdapter(BasePlatformAdapter):
photo=image_data,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=self._message_thread_id_for_send(_photo_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e2:
@ -1753,13 +1811,13 @@ class TelegramAdapter(BasePlatformAdapter):
return SendResult(success=False, error="Not connected")
try:
_anim_thread = metadata.get("thread_id") if metadata else None
_anim_thread = self._metadata_thread_id(metadata)
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_anim_thread) if _anim_thread else None,
message_thread_id=self._message_thread_id_for_send(_anim_thread),
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
@ -1776,12 +1834,23 @@ class TelegramAdapter(BasePlatformAdapter):
"""Send typing indicator."""
if self._bot:
try:
_typing_thread = metadata.get("thread_id") if metadata else None
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=int(_typing_thread) if _typing_thread else None,
)
_typing_thread = self._metadata_thread_id(metadata)
message_thread_id = self._message_thread_id_for_typing(_typing_thread)
try:
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=message_thread_id,
)
except Exception as e:
if message_thread_id is not None and self._is_thread_not_found_error(e):
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=None,
)
else:
raise
except Exception as e:
# Typing failures are non-fatal; log at debug level only.
logger.debug(
@ -2726,7 +2795,9 @@ class TelegramAdapter(BasePlatformAdapter):
# Resolve DM topic name and skill binding
thread_id_raw = message.message_thread_id
thread_id_str = str(thread_id_raw) if thread_id_raw else None
thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
thread_id_str = self._GENERAL_TOPIC_THREAD_ID
chat_topic = None
topic_skill = None

View file

@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
def _resolve_proxy_url() -> str | None:
# Delegate to shared implementation (env vars + macOS system proxy detection)
from gateway.platforms.base import resolve_proxy_url
return resolve_proxy_url()
return resolve_proxy_url("TELEGRAM_PROXY")
class TelegramFallbackTransport(httpx.AsyncBaseTransport):

View file

@ -258,6 +258,20 @@ class WecomCallbackAdapter(BasePlatformAdapter):
)
event = self._build_event(app, decrypted)
if event is not None:
# Deduplicate: WeCom retries callbacks on timeout,
# producing duplicate inbound messages (#10305).
if event.message_id:
now = time.time()
if event.message_id in self._seen_messages:
if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
return web.Response(text="success", content_type="text/plain")
del self._seen_messages[event.message_id]
self._seen_messages[event.message_id] = now
# Prune expired entries when cache grows large
if len(self._seen_messages) > 2000:
cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
# Record which app this user belongs to.
if event.source and event.source.user_id:
map_key = self._user_app_key(

View file

@ -24,6 +24,7 @@ import signal
import tempfile
import threading
import time
from contextvars import copy_context
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List
@ -834,7 +835,7 @@ class GatewayRunner:
session_key: Optional[str] = None,
):
"""Run the sync memory flush in a thread pool so it won't block the event loop."""
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
await loop.run_in_executor(
None,
self._flush_memories_for_session,
@ -2925,6 +2926,32 @@ class GatewayRunner:
merge_pending_message_event(adapter._pending_messages, _quick_key, event)
return None
_telegram_followup_grace = float(
os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
)
_started_at = self._running_agents_ts.get(_quick_key, 0)
if (
source.platform == Platform.TELEGRAM
and event.message_type == MessageType.TEXT
and _telegram_followup_grace > 0
and _started_at
and (time.time() - _started_at) <= _telegram_followup_grace
):
logger.debug(
"Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
time.time() - _started_at,
_quick_key[:20],
)
adapter = self.adapters.get(source.platform)
if adapter:
merge_pending_message_event(
adapter._pending_messages,
_quick_key,
event,
merge_text=True,
)
return None
running_agent = self._running_agents.get(_quick_key)
if running_agent is _AGENT_PENDING_SENTINEL:
# Agent is being set up but not ready yet.
@ -2938,7 +2965,12 @@ class GatewayRunner:
# agent starts.
adapter = self.adapters.get(source.platform)
if adapter:
adapter._pending_messages[_quick_key] = event
merge_pending_message_event(
adapter._pending_messages,
_quick_key,
event,
merge_text=True,
)
return None
if self._draining:
if self._queue_during_drain_enabled():
@ -3746,12 +3778,13 @@ class GatewayRunner:
model=_hyg_model,
max_iterations=4,
quiet_mode=True,
skip_memory=True,
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
_hyg_agent._print_fn = lambda *a, **kw: None
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
_compressed, _ = await loop.run_in_executor(
None,
lambda: _hyg_agent._compress_context(
@ -4400,31 +4433,16 @@ class GatewayRunner:
async def _handle_profile_command(self, event: MessageEvent) -> str:
"""Handle /profile — show active profile name and home directory."""
from hermes_constants import get_hermes_home, display_hermes_home
from pathlib import Path
from hermes_constants import display_hermes_home
from hermes_cli.profiles import get_active_profile_name
home = get_hermes_home()
display = display_hermes_home()
profile_name = get_active_profile_name()
# Detect profile name from HERMES_HOME path
# Profile paths look like: ~/.hermes/profiles/<name>
profiles_parent = Path.home() / ".hermes" / "profiles"
try:
rel = home.relative_to(profiles_parent)
profile_name = str(rel).split("/")[0]
except ValueError:
profile_name = None
if profile_name:
lines = [
f"👤 **Profile:** `{profile_name}`",
f"📂 **Home:** `{display}`",
]
else:
lines = [
"👤 **Profile:** default",
f"📂 **Home:** `{display}`",
]
lines = [
f"👤 **Profile:** `{profile_name}`",
f"📂 **Home:** `{display}`",
]
return "\n".join(lines)
@ -5087,6 +5105,7 @@ class GatewayRunner:
async def _handle_personality_command(self, event: MessageEvent) -> str:
"""Handle /personality command - list or set a personality."""
import yaml
from hermes_constants import display_hermes_home
args = event.get_command_args().strip().lower()
config_path = _hermes_home / 'config.yaml'
@ -5104,7 +5123,7 @@ class GatewayRunner:
personalities = {}
if not personalities:
return "No personalities configured in `~/.hermes/config.yaml`"
return f"No personalities configured in `{display_hermes_home()}/config.yaml`"
if not args:
lines = ["🎭 **Available Personalities**\n"]
@ -5825,8 +5844,7 @@ class GatewayRunner:
task_id=task_id,
)
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, run_sync)
result = await self._run_in_executor_with_context(run_sync)
response = result.get("final_response", "") if result else ""
if not response and result and result.get("error"):
@ -6008,8 +6026,7 @@ class GatewayRunner:
task_id=task_id,
)
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(None, run_sync)
result = await self._run_in_executor_with_context(run_sync)
response = (result.get("final_response") or "") if result else ""
if not response and result and result.get("error"):
@ -6332,6 +6349,7 @@ class GatewayRunner:
model=model,
max_iterations=4,
quiet_mode=True,
skip_memory=True,
enabled_toolsets=["memory"],
session_id=session_entry.session_id,
)
@ -6344,7 +6362,7 @@ class GatewayRunner:
if compress_start >= compress_end:
return "Nothing to compress yet (the transcript is still all protected context)."
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
compressed, _ = await loop.run_in_executor(
None,
lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
@ -6697,6 +6715,11 @@ class GatewayRunner:
import asyncio as _asyncio
args = event.get_command_args().strip()
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
import re as _re
args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
days = 30
source = None
@ -6724,7 +6747,7 @@ class GatewayRunner:
from hermes_state import SessionDB
from agent.insights import InsightsEngine
loop = _asyncio.get_event_loop()
loop = _asyncio.get_running_loop()
def _run_insights():
db = SessionDB()
@ -6741,7 +6764,7 @@ class GatewayRunner:
async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
"""Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
try:
from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
@ -7422,7 +7445,13 @@ class GatewayRunner:
"""Restore session context variables to their pre-handler values."""
from gateway.session_context import clear_session_vars
clear_session_vars(tokens)
async def _run_in_executor_with_context(self, func, *args):
"""Run blocking work in the thread pool while preserving session contextvars."""
loop = asyncio.get_running_loop()
ctx = copy_context()
return await loop.run_in_executor(None, ctx.run, func, *args)
async def _enrich_message_with_vision(
self,
user_text: str,
@ -8456,7 +8485,7 @@ class GatewayRunner:
stream_consumer_holder = [None] # Mutable container for stream consumer
# Bridge sync step_callback → async hooks.emit for agent:step events
_loop_for_step = asyncio.get_event_loop()
_loop_for_step = asyncio.get_running_loop()
_hooks_ref = self.hooks
def _step_callback_sync(iteration: int, prev_tools: list) -> None:
@ -8694,6 +8723,7 @@ class GatewayRunner:
session_id=session_id,
platform=platform_key,
user_id=source.user_id,
gateway_session_key=session_key,
session_db=self._session_db,
fallback_model=self._fallback_model,
)
@ -8713,8 +8743,11 @@ class GatewayRunner:
agent.service_tier = self._service_tier
agent.request_overrides = turn_route.get("request_overrides")
# Background review delivery — send "💾 Memory updated" etc. to user
def _bg_review_send(message: str) -> None:
_bg_review_release = threading.Event()
_bg_review_pending: list[str] = []
_bg_review_pending_lock = threading.Lock()
def _deliver_bg_review_message(message: str) -> None:
if not _status_adapter:
return
try:
@ -8729,7 +8762,32 @@ class GatewayRunner:
except Exception as _e:
logger.debug("background_review_callback error: %s", _e)
def _release_bg_review_messages() -> None:
_bg_review_release.set()
with _bg_review_pending_lock:
pending = list(_bg_review_pending)
_bg_review_pending.clear()
for queued in pending:
_deliver_bg_review_message(queued)
# Background review delivery — send "💾 Memory updated" etc. to user
def _bg_review_send(message: str) -> None:
if not _status_adapter:
return
if not _bg_review_release.is_set():
with _bg_review_pending_lock:
if not _bg_review_release.is_set():
_bg_review_pending.append(message)
return
_deliver_bg_review_message(message)
agent.background_review_callback = _bg_review_send
# Register the release hook on the adapter so base.py's finally
# block can fire it after delivering the main response.
if _status_adapter and session_key:
_pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
if _pdc is not None:
_pdc[session_key] = _release_bg_review_messages
# Store agent reference for interrupt support
agent_holder[0] = agent
@ -8925,7 +8983,7 @@ class GatewayRunner:
_resolved_model = getattr(_agent, "model", None) if _agent else None
if not final_response:
error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
return {
"final_response": error_msg,
"messages": result.get("messages", []),
@ -9169,9 +9227,8 @@ class GatewayRunner:
_agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
_agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
_warning_fired = False
loop = asyncio.get_event_loop()
_executor_task = asyncio.ensure_future(
loop.run_in_executor(None, run_sync)
self._run_in_executor_with_context(run_sync)
)
_inactivity_timeout = False
@ -9436,16 +9493,18 @@ class GatewayRunner:
pass
except Exception as e:
logger.debug("Stream consumer wait before queued message failed: %s", e)
_previewed = bool(result.get("response_previewed"))
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
(_sc and getattr(_sc, "final_response_sent", False))
or _previewed
)
first_response = result.get("final_response", "")
if first_response and not _already_streamed:
try:
logger.info(
"Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
session_key[:20] if session_key else "?",
)
await adapter.send(
source.chat_id,
first_response,
@ -9453,6 +9512,22 @@ class GatewayRunner:
)
except Exception as e:
logger.warning("Failed to send first response before queued message: %s", e)
elif first_response:
logger.info(
"Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
session_key[:20] if session_key else "?",
)
# Release deferred bg-review notifications now that the
# first response has been delivered. Pop from the
# adapter's callback dict (prevents double-fire in
# base.py's finally block) and call it.
if adapter and hasattr(adapter, "_post_delivery_callbacks"):
_bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
if callable(_bg_cb):
try:
_bg_cb()
except Exception:
pass
# else: interrupted — discard the interrupted response ("Operation
# interrupted." is just noise; the user already knows they sent a
# new message).
@ -9472,6 +9547,19 @@ class GatewayRunner:
return result
next_message_id = getattr(pending_event, "message_id", None)
# Restart typing indicator so the user sees activity while
# the follow-up turn runs. The outer _process_message_background
# typing task is still alive but may be stale.
_followup_adapter = self.adapters.get(source.platform)
if _followup_adapter:
try:
await _followup_adapter.send_typing(
source.chat_id,
metadata=_status_thread_metadata,
)
except Exception:
pass
return await self._run_agent(
message=next_message,
context_prompt=context_prompt,
@ -9532,13 +9620,22 @@ class GatewayRunner:
# final answer. Suppressing delivery here leaves the user staring
# at silence. (#10xxx — "agent stops after web search")
_sc = stream_consumer_holder[0]
if _sc and isinstance(response, dict) and not response.get("failed"):
if isinstance(response, dict) and not response.get("failed"):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
if not _is_empty_sentinel and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
):
_streamed = bool(
_sc and getattr(_sc, "final_response_sent", False)
)
# response_previewed means the interim_assistant_callback already
# sent the final text via the adapter (non-streaming path).
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
logger.info(
"Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
session_key[:20] if session_key else "?",
_streamed,
_previewed,
)
response["already_sent"] = True
return response
@ -9752,7 +9849,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
def restart_signal_handler():
runner.request_restart(detached=False, via_service=True)
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
if threading.current_thread() is threading.main_thread():
for sig in (signal.SIGINT, signal.SIGTERM):
try:

View file

@ -301,6 +301,8 @@ def build_session_context_prompt(
lines.append("")
lines.append("**Delivery options for scheduled tasks:**")
from hermes_constants import display_hermes_home
# Origin delivery
if context.source.platform == Platform.LOCAL:
lines.append("- `\"origin\"` → Local output (saved to files)")
@ -309,9 +311,11 @@ def build_session_context_prompt(
_hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
)
lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
# Local always available
lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
lines.append(
f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
)
# Platform home channels
for platform, home in context.home_channels.items():

View file

@ -403,18 +403,20 @@ class GatewayStreamConsumer:
except asyncio.CancelledError:
# Best-effort final edit on cancellation
_best_effort_ok = False
if self._accumulated and self._message_id:
try:
await self._send_or_edit(self._accumulated)
_best_effort_ok = bool(await self._send_or_edit(self._accumulated))
except Exception:
pass
# If we delivered any content before being cancelled, mark the
# final response as sent so the gateway's already_sent check
# doesn't trigger a duplicate message. The 5-second
# stream_task timeout (gateway/run.py) can cancel us while
# waiting on a slow Telegram API call — without this flag the
# gateway falls through to the normal send path.
if self._already_sent:
# Only confirm final delivery if the best-effort send above
# actually succeeded OR if the final response was already
# confirmed before we were cancelled. Previously this
# promoted any partial send (already_sent=True) to
# final_response_sent — which suppressed the gateway's
# fallback send even when only intermediate text (e.g.
# "Let me search…") had been delivered, not the real answer.
if _best_effort_ok and not self._final_response_sent:
self._final_response_sent = True
except Exception as e:
logger.error("Stream consumer error: %s", e)

View file

@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@ -274,6 +275,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XIAOMI_API_KEY",),
base_url_env_var="XIAOMI_BASE_URL",
),
"ollama-cloud": ProviderConfig(
id="ollama-cloud",
name="Ollama Cloud",
auth_type="api_key",
inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
api_key_env_vars=("OLLAMA_API_KEY",),
base_url_env_var="OLLAMA_BASE_URL",
),
"bedrock": ProviderConfig(
id="bedrock",
name="AWS Bedrock",
@ -919,6 +928,7 @@ def resolve_provider(
_PROVIDER_ALIASES = {
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
"x-ai": "xai", "x.ai": "xai", "grok": "xai",
"kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"arcee-ai": "arcee", "arceeai": "arcee",
@ -937,7 +947,8 @@ def resolve_provider(
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
"ollama": "custom", "vllm": "custom", "llamacpp": "custom",
"ollama": "custom", "ollama_cloud": "ollama-cloud",
"vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
normalized = _PROVIDER_ALIASES.get(normalized, normalized)

View file

@ -4,6 +4,7 @@ from __future__ import annotations
from getpass import getpass
import math
import sys
import time
from types import SimpleNamespace
import uuid
@ -160,7 +161,10 @@ def auth_add_command(args) -> None:
default_label = _api_key_default_label(len(pool.entries()) + 1)
label = (getattr(args, "label", None) or "").strip()
if not label:
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
if sys.stdin.isatty():
label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
else:
label = default_label
entry = PooledCredential(
provider=provider,
id=uuid.uuid4().hex[:6],

View file

@ -454,7 +454,7 @@ def _collect_gateway_skill_entries(
name = sanitize_name(cmd_name) if sanitize_name else cmd_name
if not name:
continue
desc = "Plugin command"
desc = plugin_cmds[cmd_name].get("description", "Plugin command")
if len(desc) > desc_limit:
desc = desc[:desc_limit - 3] + "..."
plugin_pairs.append((name, desc))
@ -1195,6 +1195,22 @@ class SlashCommandCompleter(Completer):
display_meta=f"{short_desc}",
)
# Plugin-registered slash commands
try:
from hermes_cli.plugins import get_plugin_commands
for cmd_name, cmd_info in get_plugin_commands().items():
if cmd_name.startswith(word):
desc = str(cmd_info.get("description", "Plugin command"))
short_desc = desc[:50] + ("..." if len(desc) > 50 else "")
yield Completion(
self._completion_text(cmd_name, word),
start_position=-len(word),
display=f"/{cmd_name}",
display_meta=f"🔌 {short_desc}",
)
except Exception:
pass
# ---------------------------------------------------------------------------
# Inline auto-suggest (ghost text) for slash commands

View file

@ -241,13 +241,41 @@ def _secure_dir(path):
pass
def _is_container() -> bool:
"""Detect if we're running inside a Docker/Podman/LXC container.
When Hermes runs in a container with volume-mounted config files, forcing
0o600 permissions breaks multi-process setups where the gateway and
dashboard run as different UIDs or the volume mount requires broader
permissions.
"""
# Explicit opt-out
if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
return True
# Docker / Podman marker file
if os.path.exists("/.dockerenv"):
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
except (OSError, IOError):
pass
return False
def _secure_file(path):
"""Set file to owner-only read/write (0600). No-op on Windows.
Skipped in managed mode the NixOS activation script sets
group-readable permissions (0640) on config files.
Skipped in containers Docker/Podman volume mounts often need broader
permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
"""
if is_managed():
if is_managed() or _is_container():
return
try:
if os.path.exists(str(path)):
@ -392,8 +420,7 @@ DEFAULT_CONFIG = {
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
"camofox": {
# When true, Hermes sends a stable profile-scoped userId to Camofox
# so the server can map it to a persistent browser profile directory.
# Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
# so the server maps it to a persistent Firefox profile automatically.
# When false (default), each session gets a random userId (ephemeral).
"managed_persistence": False,
},
@ -531,6 +558,11 @@ DEFAULT_CONFIG = {
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
},
# Web dashboard settings
"dashboard": {
"theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
},
# Privacy settings
"privacy": {
"redact_pii": False, # When True, hash user IDs and strip phone numbers from LLM context
@ -538,7 +570,7 @@ DEFAULT_CONFIG = {
# Text-to-speech configuration
"tts": {
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
"provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
"edge": {
"voice": "en-US-AriaNeural",
# Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@ -552,6 +584,12 @@ DEFAULT_CONFIG = {
"voice": "alloy",
# Voices: alloy, echo, fable, onyx, nova, shimmer
},
"xai": {
"voice_id": "eve",
"language": "en",
"sample_rate": 24000,
"bit_rate": 128000,
},
"mistral": {
"model": "voxtral-mini-tts-2603",
"voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8", # Paul - Neutral
@ -808,6 +846,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"XAI_API_KEY": {
"description": "xAI API key",
"prompt": "xAI API key",
"url": "https://console.x.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"XAI_BASE_URL": {
"description": "xAI base URL override",
"prompt": "xAI base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"GLM_API_KEY": {
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
"prompt": "Z.AI / GLM API key",
@ -996,6 +1050,22 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"OLLAMA_API_KEY": {
"description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
"prompt": "Ollama Cloud API key",
"url": "https://ollama.com/settings",
"password": True,
"category": "provider",
"advanced": True,
},
"OLLAMA_BASE_URL": {
"description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
"prompt": "Ollama base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"XIAOMI_API_KEY": {
"description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
"prompt": "Xiaomi MiMo API Key",
@ -1224,6 +1294,12 @@ OPTIONAL_ENV_VARS = {
"password": False,
"category": "messaging",
},
"TELEGRAM_PROXY": {
"description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://",
"prompt": "Telegram proxy URL (optional)",
"password": False,
"category": "messaging",
},
"DISCORD_BOT_TOKEN": {
"description": "Discord bot token from Developer Portal",
"prompt": "Discord bot token",
@ -2900,12 +2976,25 @@ def save_env_value(key: str, value: str):
lines.append(f"{key}={value}\n")
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
if env_path.exists():
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)
@ -2916,13 +3005,6 @@ def save_env_value(key: str, value: str):
os.environ[key] = value
# Restrict .env permissions to owner-only (contains API keys)
if not _IS_WINDOWS:
try:
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
def remove_env_value(key: str) -> bool:
"""Remove a key from ~/.hermes/.env and os.environ.
@ -2951,12 +3033,23 @@ def remove_env_value(key: str) -> bool:
if found:
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(new_lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)

View file

@ -1372,7 +1372,7 @@ def select_provider_and_model(args=None):
_model_flow_kimi(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
_model_flow_api_key_provider(config, selected_provider, current_model)
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@ -1799,6 +1799,27 @@ def _model_flow_custom(config):
effective_key = api_key or current_key
# Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
# in the base URL for OpenAI-compatible chat completions. Prompt the
# user if the URL looks like a local server without /v1.
_url_lower = effective_url.rstrip("/").lower()
_looks_local = any(h in _url_lower for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000"))
if _looks_local and not _url_lower.endswith("/v1"):
print()
print(f" Hint: Did you mean to add /v1 at the end?")
print(f" Most local model servers (Ollama, vLLM, llama.cpp) require it.")
print(f" e.g. {effective_url.rstrip('/')}/v1")
try:
_add_v1 = input(" Add /v1? [Y/n]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
_add_v1 = "n"
if _add_v1 in ("", "y", "yes"):
effective_url = effective_url.rstrip("/") + "/v1"
if base_url:
base_url = effective_url
print(f" Updated URL: {effective_url}")
print()
from hermes_cli.models import probe_api_models
probe = probe_api_models(effective_key, effective_url)
@ -2965,34 +2986,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
# 2. Curated static fallback list (offline insurance)
# 3. Live /models endpoint probe (small providers without models.dev data)
curated = _PROVIDER_MODELS.get(provider_id, [])
# Try models.dev first — returns tool-capable models, filtered for noise
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
model_list = curated
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
else:
#
# Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
if provider_id == "ollama-cloud":
from hermes_cli.models import fetch_ollama_cloud_models
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models and len(live_models) >= len(curated):
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base)
if model_list:
print(f" Found {len(model_list)} model(s) from Ollama Cloud")
else:
curated = _PROVIDER_MODELS.get(provider_id, [])
# Try models.dev first — returns tool-capable models, filtered for noise
mdev_models: list = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models(provider_id)
except Exception:
pass
if mdev_models:
model_list = mdev_models
print(f" Found {len(model_list)} model(s) from models.dev registry")
elif curated and len(curated) >= 8:
# Curated list is substantial — use it directly, skip live probe
model_list = curated
if model_list:
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
# else: no defaults either, will fall through to raw input
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
else:
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
live_models = fetch_api_models(api_key_for_probe, effective_base)
if live_models and len(live_models) >= len(curated):
model_list = live_models
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
else:
model_list = curated
if model_list:
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
# else: no defaults either, will fall through to raw input
if provider_id in {"opencode-zen", "opencode-go"}:
model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
@ -5130,7 +5160,7 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
default=None,
help="Inference provider (default: auto)"
)
@ -6608,8 +6638,13 @@ Examples:
sys.stderr = _io.StringIO()
args = parser.parse_args(_processed_argv)
sys.stderr = _saved_stderr
except SystemExit:
except SystemExit as exc:
sys.stderr = _saved_stderr
# Help/version flags (exit code 0) already printed output —
# re-raise immediately to avoid a second parse_args printing
# the same help text again (#10230).
if exc.code == 0:
raise
# Subcommand name was consumed as a flag value (e.g. -c model).
# Fall back to optional subparsers so argparse handles it normally.
subparsers.required = False

View file

@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"ollama-cloud",
"custom",
})

View file

@ -274,6 +274,11 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
is_global = False
explicit_provider = ""
# Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
# A single Unicode dash before a flag keyword becomes "--"
import re as _re
raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
# Extract --global
if "--global" in raw_args:
is_global = True
@ -452,6 +457,7 @@ def switch_model(
ModelSwitchResult with all information the caller needs.
"""
from hermes_cli.models import (
copilot_model_api_mode,
detect_provider_for_model,
validate_requested_model,
opencode_model_api_mode,
@ -709,8 +715,12 @@ def switch_model(
if validation.get("corrected_model"):
new_model = validation["corrected_model"]
# --- Copilot api_mode override ---
if target_provider in {"copilot", "github-copilot"}:
api_mode = copilot_model_api_mode(new_model, api_key=api_key)
# --- OpenCode api_mode override ---
if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
if target_provider in {"opencode-zen", "opencode-go", "opencode"}:
api_mode = opencode_model_api_mode(target_provider, new_model)
# --- Determine api_mode if not already set ---
@ -786,7 +796,8 @@ def list_authenticated_providers(
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
results: List[dict] = []
seen_slugs: set = set()
seen_slugs: set = set() # lowercase-normalized to catch case variants (#9545)
seen_mdev_ids: set = set() # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
data = fetch_models_dev()
@ -799,6 +810,11 @@ def list_authenticated_providers(
# --- 1. Check Hermes-mapped providers ---
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
# Skip aliases that map to the same models.dev provider (e.g.
# kimi-coding and kimi-coding-cn both → kimi-for-coding).
# The first one with valid credentials wins (#10526).
if mdev_id in seen_mdev_ids:
continue
pdata = data.get(mdev_id)
if not isinstance(pdata, dict):
continue
@ -837,7 +853,8 @@ def list_authenticated_providers(
"total_models": total,
"source": "built-in",
})
seen_slugs.add(slug)
seen_slugs.add(slug.lower())
seen_mdev_ids.add(mdev_id)
# --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
from hermes_cli.providers import HERMES_OVERLAYS
@ -849,12 +866,12 @@ def list_authenticated_providers(
_mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
for pid, overlay in HERMES_OVERLAYS.items():
if pid in seen_slugs:
if pid.lower() in seen_slugs:
continue
# Resolve Hermes slug — e.g. "github-copilot" → "copilot"
hermes_slug = _mdev_to_hermes.get(pid, pid)
if hermes_slug in seen_slugs:
if hermes_slug.lower() in seen_slugs:
continue
# Check if credentials exist
@ -935,8 +952,8 @@ def list_authenticated_providers(
"total_models": total,
"source": "hermes",
})
seen_slugs.add(pid)
seen_slugs.add(hermes_slug)
seen_slugs.add(pid.lower())
seen_slugs.add(hermes_slug.lower())
# --- 2b. Cross-check canonical provider list ---
# Catches providers that are in CANONICAL_PROVIDERS but weren't found
@ -948,7 +965,7 @@ def list_authenticated_providers(
_canon_provs = []
for _cp in _canon_provs:
if _cp.slug in seen_slugs:
if _cp.slug.lower() in seen_slugs:
continue
# Check credentials via PROVIDER_REGISTRY (auth.py)
@ -995,7 +1012,7 @@ def list_authenticated_providers(
"total_models": _cp_total,
"source": "canonical",
})
seen_slugs.add(_cp.slug)
seen_slugs.add(_cp.slug.lower())
# --- 3. User-defined endpoints from config ---
if user_providers and isinstance(user_providers, dict):
@ -1068,7 +1085,7 @@ def list_authenticated_providers(
groups[slug]["models"].append(default_model)
for slug, grp in groups.items():
if slug in seen_slugs:
if slug.lower() in seen_slugs:
continue
results.append({
"slug": slug,
@ -1080,11 +1097,9 @@ def list_authenticated_providers(
"source": "user-config",
"api_url": grp["api_url"],
})
seen_slugs.add(slug)
seen_slugs.add(slug.lower())
# Sort: current provider first, then by model count descending
results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
return results

View file

@ -11,7 +11,9 @@ import json
import os
import urllib.request
import urllib.error
import time
from difflib import get_close_matches
from pathlib import Path
from typing import Any, NamedTuple, Optional
COPILOT_BASE_URL = "https://api.githubcopilot.com"
@ -143,17 +145,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"glm-4.5-flash",
],
"xai": [
"grok-4.20-0309-reasoning",
"grok-4.20-0309-non-reasoning",
"grok-4.20-multi-agent-0309",
"grok-4.20-reasoning",
"grok-4-1-fast-reasoning",
"grok-4-1-fast-non-reasoning",
"grok-4-fast-reasoning",
"grok-4-fast-non-reasoning",
"grok-4-0709",
"grok-code-fast-1",
"grok-3",
"grok-3-mini",
],
"kimi-coding": [
"kimi-for-coding",
@ -547,6 +540,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
@ -559,6 +553,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
_PROVIDER_ALIASES = {
"glm": "zai",
"z-ai": "zai",
@ -611,6 +606,8 @@ _PROVIDER_ALIASES = {
"grok": "xai",
"x-ai": "xai",
"x.ai": "xai",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
}
@ -1064,7 +1061,8 @@ def detect_provider_for_model(
break
if direct_match:
# Check if we have credentials for this provider
# Check if we have credentials for this provider — env vars,
# credential pool, or auth store entries.
has_creds = False
try:
from hermes_cli.auth import PROVIDER_REGISTRY
@ -1077,16 +1075,28 @@ def detect_provider_for_model(
break
except Exception:
pass
# Also check credential pool and auth store — covers OAuth,
# Claude Code tokens, and other non-env-var credentials (#10300).
if not has_creds:
try:
from agent.credential_pool import load_pool
pool = load_pool(direct_match)
if pool.has_credentials():
has_creds = True
except Exception:
pass
if not has_creds:
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
has_creds = True
except Exception:
pass
if has_creds:
return (direct_match, name)
# No direct creds — try to find this model on OpenRouter instead
or_slug = _find_openrouter_slug(name)
if or_slug:
return ("openrouter", or_slug)
# Still return the direct provider — credential resolution will
# give a clear error rather than silently using the wrong provider
# Always return the direct provider match. If credentials are
# missing, the client init will give a clear error rather than
# silently routing through the wrong provider (#10300).
return (direct_match, name)
# --- Step 2: check OpenRouter catalog ---
@ -1560,6 +1570,11 @@ def copilot_model_api_mode(
primary signal. Falls back to the catalog's ``supported_endpoints``
only for models not covered by the pattern check.
"""
# Fetch the catalog once so normalize + endpoint check share it
# (avoids two redundant network calls for non-GPT-5 models).
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
if not normalized:
return "chat_completions"
@ -1569,9 +1584,6 @@ def copilot_model_api_mode(
return "codex_responses"
# Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
if catalog is None and api_key:
catalog = fetch_github_model_catalog(api_key=api_key)
if catalog:
catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
if isinstance(catalog_entry, dict):
@ -1786,6 +1798,125 @@ def fetch_api_models(
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
# ---------------------------------------------------------------------------
# Ollama Cloud — merged model discovery with disk cache
# ---------------------------------------------------------------------------
_OLLAMA_CLOUD_CACHE_TTL = 3600 # 1 hour
def _ollama_cloud_cache_path() -> Path:
"""Return the path for the Ollama Cloud model cache."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "ollama_cloud_models_cache.json"
def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
"""Load cached Ollama Cloud models from disk.
Args:
ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
"""
try:
cache_path = _ollama_cloud_cache_path()
if not cache_path.exists():
return None
with open(cache_path, encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
return None
models = data.get("models")
if not (isinstance(models, list) and models):
return None
if not ignore_ttl:
cached_at = data.get("cached_at", 0)
if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
return None # stale
return data
except Exception:
pass
return None
def _save_ollama_cloud_cache(models: list[str]) -> None:
"""Persist the merged Ollama Cloud model list to disk."""
try:
from utils import atomic_json_write
cache_path = _ollama_cloud_cache_path()
cache_path.parent.mkdir(parents=True, exist_ok=True)
atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
except Exception:
pass
def fetch_ollama_cloud_models(
api_key: Optional[str] = None,
base_url: Optional[str] = None,
*,
force_refresh: bool = False,
) -> list[str]:
"""Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
Resolution order:
1. Disk cache (if fresh, < 1 hour, and not force_refresh)
2. Live ``/v1/models`` endpoint (primary freshest source)
3. models.dev registry (secondary fills gaps for unlisted models)
4. Merge: live models first, then models.dev additions (deduped)
Returns a list of model IDs (never None empty list on total failure).
"""
# 1. Check disk cache
if not force_refresh:
cached = _load_ollama_cloud_cache()
if cached is not None:
return cached["models"]
# 2. Live API probe
if not api_key:
api_key = os.getenv("OLLAMA_API_KEY", "")
if not base_url:
base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
live_models: list[str] = []
if api_key:
result = fetch_api_models(api_key, base_url, timeout=8.0)
if result:
live_models = result
# 3. models.dev registry
mdev_models: list[str] = []
try:
from agent.models_dev import list_agentic_models
mdev_models = list_agentic_models("ollama-cloud")
except Exception:
pass
# 4. Merge: live first, then models.dev additions (deduped, order-preserving)
if live_models or mdev_models:
seen: set[str] = set()
merged: list[str] = []
for m in live_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
for m in mdev_models:
if m and m not in seen:
seen.add(m)
merged.append(m)
if merged:
_save_ollama_cloud_cache(merged)
return merged
# Total failure — return stale cache if available (ignore TTL)
stale = _load_ollama_cloud_cache(ignore_ttl=True)
if stale is not None:
return stale["models"]
return []
def validate_requested_model(
model_name: str,
provider: Optional[str],

View file

@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
"openai": "OpenAI TTS",
"elevenlabs": "ElevenLabs",
"edge": "Edge TTS",
"xai": "xAI TTS",
"mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS",
}

View file

@ -112,6 +112,7 @@ class LoadedPlugin:
module: Optional[types.ModuleType] = None
tools_registered: List[str] = field(default_factory=list)
hooks_registered: List[str] = field(default_factory=list)
commands_registered: List[str] = field(default_factory=list)
enabled: bool = False
error: Optional[str] = None
@ -211,6 +212,84 @@ class PluginContext:
}
logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)
# -- slash command registration -------------------------------------------
def register_command(
self,
name: str,
handler: Callable,
description: str = "",
) -> None:
"""Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
The handler signature is ``fn(raw_args: str) -> str | None``.
It may also be an async callable the gateway dispatch handles both.
Unlike ``register_cli_command()`` (which creates ``hermes <subcommand>``
terminal commands), this registers in-session slash commands that users
invoke during a conversation.
Names conflicting with built-in commands are rejected with a warning.
"""
clean = name.lower().strip().lstrip("/").replace(" ", "-")
if not clean:
logger.warning(
"Plugin '%s' tried to register a command with an empty name.",
self.manifest.name,
)
return
# Reject if it conflicts with a built-in command
try:
from hermes_cli.commands import resolve_command
if resolve_command(clean) is not None:
logger.warning(
"Plugin '%s' tried to register command '/%s' which conflicts "
"with a built-in command. Skipping.",
self.manifest.name, clean,
)
return
except Exception:
pass # If commands module isn't available, skip the check
self._manager._plugin_commands[clean] = {
"handler": handler,
"description": description or "Plugin command",
"plugin": self.manifest.name,
}
logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
# -- tool dispatch -------------------------------------------------------
def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str:
"""Dispatch a tool call through the registry, with parent agent context.
This is the public interface for plugin slash commands that need to call
tools like ``delegate_task`` without reaching into framework internals.
The parent agent (if available) is resolved automatically plugins never
need to access the agent directly.
Args:
tool_name: Registry name of the tool (e.g. ``"delegate_task"``).
args: Tool arguments dict (same as what the model would pass).
**kwargs: Extra keyword args forwarded to the registry dispatch.
Returns:
JSON string from the tool handler (same format as model tool calls).
"""
from tools.registry import registry
# Wire up parent agent context when available (CLI mode).
# In gateway mode _cli_ref is None — tools degrade gracefully
# (workspace hints fall back to TERMINAL_CWD, no spinner).
if "parent_agent" not in kwargs:
cli = self._manager._cli_ref
agent = getattr(cli, "agent", None) if cli else None
if agent is not None:
kwargs["parent_agent"] = agent
return registry.dispatch(tool_name, args, **kwargs)
# -- context engine registration -----------------------------------------
def register_context_engine(self, engine) -> None:
@ -323,6 +402,7 @@ class PluginManager:
self._plugin_tool_names: Set[str] = set()
self._cli_commands: Dict[str, dict] = {}
self._context_engine = None # Set by a plugin via register_context_engine()
self._plugin_commands: Dict[str, dict] = {} # Slash commands registered by plugins
self._discovered: bool = False
self._cli_ref = None # Set by CLI after plugin discovery
# Plugin skill registry: qualified name → metadata dict.
@ -485,6 +565,10 @@ class PluginManager:
for h in p.hooks_registered
}
)
loaded.commands_registered = [
c for c in self._plugin_commands
if self._plugin_commands[c].get("plugin") == manifest.name
]
loaded.enabled = True
except Exception as exc:
@ -598,6 +682,7 @@ class PluginManager:
"enabled": loaded.enabled,
"tools": len(loaded.tools_registered),
"hooks": len(loaded.hooks_registered),
"commands": len(loaded.commands_registered),
"error": loaded.error,
}
)
@ -699,6 +784,20 @@ def get_plugin_context_engine():
return get_plugin_manager()._context_engine
def get_plugin_command_handler(name: str) -> Optional[Callable]:
"""Return the handler for a plugin-registered slash command, or ``None``."""
entry = get_plugin_manager()._plugin_commands.get(name)
return entry["handler"] if entry else None
def get_plugin_commands() -> Dict[str, dict]:
"""Return the full plugin commands dict (name → {handler, description, plugin}).
Safe to call before discovery returns an empty dict if no plugins loaded.
"""
return get_plugin_manager()._plugin_commands
def get_plugin_toolsets() -> List[tuple]:
"""Return plugin toolsets as ``(key, label, description)`` tuples.

View file

@ -128,7 +128,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_env_var="HF_BASE_URL",
),
"xai": HermesOverlay(
transport="openai_chat",
transport="codex_responses",
base_url_override="https://api.x.ai/v1",
base_url_env_var="XAI_BASE_URL",
),
@ -141,6 +141,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
}
@ -180,6 +184,7 @@ ALIASES: Dict[str, str] = {
# xai
"x-ai": "xai",
"x.ai": "xai",
"grok": "xai",
# kimi-for-coding (models.dev ID)
"kimi": "kimi-for-coding",
@ -250,7 +255,7 @@ ALIASES: Dict[str, str] = {
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
"lm_studio": "lmstudio",
"ollama": "ollama-cloud",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"vllm": "local",
"llamacpp": "local",
"llama.cpp": "local",
@ -269,6 +274,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"xiaomi": "Xiaomi MiMo",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
}

View file

@ -41,6 +41,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
tool calls with reasoning (chat/completions returns 400).
"""
normalized = (base_url or "").strip().lower().rstrip("/")
if "api.x.ai" in normalized:
return "codex_responses"
if "api.openai.com" in normalized and "openrouter" not in normalized:
return "codex_responses"
return None
@ -163,10 +165,13 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL
elif provider == "xai":
api_mode = "codex_responses"
elif provider == "nous":
api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
@ -627,6 +632,8 @@ def _resolve_explicit_runtime(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
@ -923,6 +930,8 @@ def resolve_runtime_provider(
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
elif provider == "xai":
api_mode = "codex_responses"
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Only honor persisted api_mode when it belongs to the same provider family.

View file

@ -920,6 +920,7 @@ def _setup_tts_provider(config: dict):
"edge": "Edge TTS",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI TTS",
"xai": "xAI TTS",
"minimax": "MiniMax TTS",
"mistral": "Mistral Voxtral TTS",
"neutts": "NeuTTS",
@ -941,12 +942,13 @@ def _setup_tts_provider(config: dict):
"Edge TTS (free, cloud-based, no setup needed)",
"ElevenLabs (premium quality, needs API key)",
"OpenAI TTS (good quality, needs API key)",
"xAI TTS (Grok voices, needs API key)",
"MiniMax TTS (high quality with voice cloning, needs API key)",
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "neutts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -1012,6 +1014,23 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "xai":
existing = get_env_value("XAI_API_KEY")
if not existing:
print()
api_key = prompt("xAI API key for TTS", password=True)
if api_key:
save_env_value("XAI_API_KEY", api_key)
print_success("xAI TTS API key saved")
else:
from hermes_constants import display_hermes_home as _dhh
print_warning(
"No xAI API key provided for TTS. Configure XAI_API_KEY via "
f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
"Falling back to Edge TTS."
)
selected = "edge"
elif selected == "minimax":
existing = get_env_value("MINIMAX_API_KEY")
if not existing:
@ -1611,9 +1630,19 @@ def _setup_telegram():
return
print_info("Create a bot via @BotFather on Telegram")
token = prompt("Telegram bot token", password=True)
if not token:
return
import re
while True:
token = prompt("Telegram bot token", password=True)
if not token:
return
if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token):
print_error(
"Invalid token format. Expected: <numeric_id>:<alphanumeric_hash> "
"(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)"
)
continue
break
save_env_value("TELEGRAM_BOT_TOKEN", token)
print_success("Telegram token saved")

View file

@ -146,6 +146,14 @@ TOOL_CATEGORIES = {
],
"tts_provider": "openai",
},
{
"name": "xAI TTS",
"tag": "Grok voices - requires xAI API key",
"env_vars": [
{"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
],
"tts_provider": "xai",
},
{
"name": "ElevenLabs",
"badge": "paid",

View file

@ -11,6 +11,7 @@ Usage:
import asyncio
import hmac
import importlib.util
import json
import logging
import os
@ -96,6 +97,9 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
"/api/config/defaults",
"/api/config/schema",
"/api/model/info",
"/api/dashboard/themes",
"/api/dashboard/plugins",
"/api/dashboard/plugins/rescan",
})
@ -114,7 +118,7 @@ def _require_token(request: Request) -> None:
async def auth_middleware(request: Request, call_next):
"""Require the session token on all /api/ routes except the public list."""
path = request.url.path
if path.startswith("/api/") and path not in _PUBLIC_API_PATHS:
if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
auth = request.headers.get("authorization", "")
expected = f"Bearer {_SESSION_TOKEN}"
if not hmac.compare_digest(auth.encode(), expected.encode()):
@ -166,6 +170,11 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
"description": "CLI visual theme",
"options": ["default", "ares", "mono", "slate"],
},
"dashboard.theme": {
"type": "select",
"description": "Web dashboard visual theme",
"options": ["default", "midnight", "ember", "mono", "cyberpunk", "rose"],
},
"display.resume_display": {
"type": "select",
"description": "How resumed sessions display history",
@ -224,6 +233,7 @@ _CATEGORY_MERGE: Dict[str, str] = {
"approvals": "security",
"human_delay": "display",
"smart_model_routing": "agent",
"dashboard": "display",
}
# Display order for tabs — unlisted categories sort alphabetically after these.
@ -2068,6 +2078,237 @@ def mount_spa(application: FastAPI):
return _serve_index()
# ---------------------------------------------------------------------------
# Dashboard theme endpoints
# ---------------------------------------------------------------------------
# Built-in dashboard themes — label + description only. The actual color
# definitions live in the frontend (web/src/themes/presets.ts).
_BUILTIN_DASHBOARD_THEMES = [
{"name": "default", "label": "Hermes Teal", "description": "Classic dark teal — the canonical Hermes look"},
{"name": "midnight", "label": "Midnight", "description": "Deep blue-violet with cool accents"},
{"name": "ember", "label": "Ember", "description": "Warm crimson and bronze — forge vibes"},
{"name": "mono", "label": "Mono", "description": "Clean grayscale — minimal and focused"},
{"name": "cyberpunk", "label": "Cyberpunk", "description": "Neon green on black — matrix terminal"},
{"name": "rose", "label": "Rosé", "description": "Soft pink and warm ivory — easy on the eyes"},
]
def _discover_user_themes() -> list:
"""Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
themes_dir = get_hermes_home() / "dashboard-themes"
if not themes_dir.is_dir():
return []
result = []
for f in sorted(themes_dir.glob("*.yaml")):
try:
data = yaml.safe_load(f.read_text(encoding="utf-8"))
if isinstance(data, dict) and data.get("name"):
result.append({
"name": data["name"],
"label": data.get("label", data["name"]),
"description": data.get("description", ""),
})
except Exception:
continue
return result
@app.get("/api/dashboard/themes")
async def get_dashboard_themes():
"""Return available themes and the currently active one."""
config = load_config()
active = config.get("dashboard", {}).get("theme", "default")
user_themes = _discover_user_themes()
# Merge built-in + user, user themes override built-in by name.
seen = set()
themes = []
for t in _BUILTIN_DASHBOARD_THEMES:
seen.add(t["name"])
themes.append(t)
for t in user_themes:
if t["name"] not in seen:
themes.append(t)
seen.add(t["name"])
return {"themes": themes, "active": active}
class ThemeSetBody(BaseModel):
name: str
@app.put("/api/dashboard/theme")
async def set_dashboard_theme(body: ThemeSetBody):
"""Set the active dashboard theme (persists to config.yaml)."""
config = load_config()
if "dashboard" not in config:
config["dashboard"] = {}
config["dashboard"]["theme"] = body.name
save_config(config)
return {"ok": True, "theme": body.name}
# ---------------------------------------------------------------------------
# Dashboard plugin system
# ---------------------------------------------------------------------------
def _discover_dashboard_plugins() -> list:
"""Scan plugins/*/dashboard/manifest.json for dashboard extensions.
Checks three plugin sources (same as hermes_cli.plugins):
1. User plugins: ~/.hermes/plugins/<name>/dashboard/manifest.json
2. Bundled plugins: <repo>/plugins/<name>/dashboard/manifest.json (memory/, etc.)
3. Project plugins: ./.hermes/plugins/ (only if HERMES_ENABLE_PROJECT_PLUGINS)
"""
plugins = []
seen_names: set = set()
search_dirs = [
(get_hermes_home() / "plugins", "user"),
(PROJECT_ROOT / "plugins" / "memory", "bundled"),
(PROJECT_ROOT / "plugins", "bundled"),
]
if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))
for plugins_root, source in search_dirs:
if not plugins_root.is_dir():
continue
for child in sorted(plugins_root.iterdir()):
if not child.is_dir():
continue
manifest_file = child / "dashboard" / "manifest.json"
if not manifest_file.exists():
continue
try:
data = json.loads(manifest_file.read_text(encoding="utf-8"))
name = data.get("name", child.name)
if name in seen_names:
continue
seen_names.add(name)
plugins.append({
"name": name,
"label": data.get("label", name),
"description": data.get("description", ""),
"icon": data.get("icon", "Puzzle"),
"version": data.get("version", "0.0.0"),
"tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
"entry": data.get("entry", "dist/index.js"),
"css": data.get("css"),
"has_api": bool(data.get("api")),
"source": source,
"_dir": str(child / "dashboard"),
"_api_file": data.get("api"),
})
except Exception as exc:
_log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
continue
return plugins
# Cache discovered plugins per-process (refresh on explicit re-scan).
_dashboard_plugins_cache: Optional[list] = None
def _get_dashboard_plugins(force_rescan: bool = False) -> list:
global _dashboard_plugins_cache
if _dashboard_plugins_cache is None or force_rescan:
_dashboard_plugins_cache = _discover_dashboard_plugins()
return _dashboard_plugins_cache
@app.get("/api/dashboard/plugins")
async def get_dashboard_plugins():
"""Return discovered dashboard plugins."""
plugins = _get_dashboard_plugins()
# Strip internal fields before sending to frontend.
return [
{k: v for k, v in p.items() if not k.startswith("_")}
for p in plugins
]
@app.get("/api/dashboard/plugins/rescan")
async def rescan_dashboard_plugins():
"""Force re-scan of dashboard plugins."""
plugins = _get_dashboard_plugins(force_rescan=True)
return {"ok": True, "count": len(plugins)}
@app.get("/dashboard-plugins/{plugin_name}/{file_path:path}")
async def serve_plugin_asset(plugin_name: str, file_path: str):
"""Serve static assets from a dashboard plugin directory.
Only serves files from the plugin's ``dashboard/`` subdirectory.
Path traversal is blocked by checking ``resolve().is_relative_to()``.
"""
plugins = _get_dashboard_plugins()
plugin = next((p for p in plugins if p["name"] == plugin_name), None)
if not plugin:
raise HTTPException(status_code=404, detail="Plugin not found")
base = Path(plugin["_dir"])
target = (base / file_path).resolve()
if not target.is_relative_to(base.resolve()):
raise HTTPException(status_code=403, detail="Path traversal blocked")
if not target.exists() or not target.is_file():
raise HTTPException(status_code=404, detail="File not found")
# Guess content type
suffix = target.suffix.lower()
content_types = {
".js": "application/javascript",
".mjs": "application/javascript",
".css": "text/css",
".json": "application/json",
".html": "text/html",
".svg": "image/svg+xml",
".png": "image/png",
".jpg": "image/jpeg",
".woff2": "font/woff2",
".woff": "font/woff",
}
media_type = content_types.get(suffix, "application/octet-stream")
return FileResponse(target, media_type=media_type)
def _mount_plugin_api_routes():
"""Import and mount backend API routes from plugins that declare them.
Each plugin's ``api`` field points to a Python file that must expose
a ``router`` (FastAPI APIRouter). Routes are mounted under
``/api/plugins/<name>/``.
"""
for plugin in _get_dashboard_plugins():
api_file_name = plugin.get("_api_file")
if not api_file_name:
continue
api_path = Path(plugin["_dir"]) / api_file_name
if not api_path.exists():
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
continue
try:
spec = importlib.util.spec_from_file_location(
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
)
if spec is None or spec.loader is None:
continue
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
router = getattr(mod, "router", None)
if router is None:
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
continue
app.include_router(router, prefix=f"/api/plugins/{plugin['name']}")
_log.info("Mounted plugin API routes: /api/plugins/%s/", plugin["name"])
except Exception as exc:
_log.warning("Failed to load plugin %s API routes: %s", plugin["name"], exc)
# Mount plugin API routes before the SPA catch-all.
_mount_plugin_api_routes()
mount_spa(app)

View file

@ -1,12 +1,12 @@
---
name: honcho
description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, and dialectic reasoning. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation and recall settings.
version: 1.0.0
description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, dialectic reasoning, session summaries, and context budget enforcement. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation, recall, and dialectic settings.
version: 2.0.0
author: Hermes Agent
license: MIT
metadata:
hermes:
tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling]
tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling, Session-Summary]
homepage: https://docs.honcho.dev
related_skills: [hermes-agent]
prerequisites:
@ -22,8 +22,9 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is
- Setting up Honcho (cloud or self-hosted)
- Troubleshooting memory not working / peers not syncing
- Creating multi-profile setups where each agent has its own Honcho peer
- Tuning observation, recall, or write frequency settings
- Understanding what the 4 Honcho tools do and when to use them
- Tuning observation, recall, dialectic depth, or write frequency settings
- Understanding what the 5 Honcho tools do and when to use them
- Configuring context budgets and session summary injection
## Setup
@ -51,6 +52,27 @@ hermes honcho status # shows resolved config, connection test, peer info
## Architecture
### Base Context Injection
When Honcho injects context into the system prompt (in `hybrid` or `context` recall modes), it assembles the base context block in this order:
1. **Session summary** -- a short digest of the current session so far (placed first so the model has immediate conversational continuity)
2. **User representation** -- Honcho's accumulated model of the user (preferences, facts, patterns)
3. **AI peer card** -- the identity card for this Hermes profile's AI peer
The session summary is generated automatically by Honcho at the start of each turn (when a prior session exists). It gives the model a warm start without replaying full history.
### Cold / Warm Prompt Selection
Honcho automatically selects between two prompt strategies:
| Condition | Strategy | What happens |
|-----------|----------|--------------|
| No prior session or empty representation | **Cold start** | Lightweight intro prompt; skips summary injection; encourages the model to learn about the user |
| Existing representation and/or session history | **Warm start** | Full base context injection (summary → representation → card); richer system prompt |
You do not need to configure this -- it is automatic based on session state.
### Peers
Honcho models conversations as interactions between **peers**. Hermes creates two peers per session:
@ -112,6 +134,63 @@ How the agent accesses Honcho memory:
| `context` | Yes | No (hidden) | Minimal token cost, no tool calls |
| `tools` | No | Yes | Agent controls all memory access explicitly |
## Three Orthogonal Knobs
Honcho's dialectic behavior is controlled by three independent dimensions. Each can be tuned without affecting the others:
### Cadence (when)
Controls **how often** dialectic and context calls happen.
| Key | Default | Description |
|-----|---------|-------------|
| `contextCadence` | `1` | Min turns between context API calls |
| `dialecticCadence` | `3` | Min turns between dialectic API calls |
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
### Depth (how many)
Controls **how many rounds** of dialectic reasoning Honcho performs per query.
| Key | Default | Range | Description |
|-----|---------|-------|-------------|
| `dialecticDepth` | `1` | 1-3 | Number of dialectic reasoning rounds per query |
| `dialecticDepthLevels` | -- | array | Optional per-depth-round level overrides (see below) |
`dialecticDepth: 2` means Honcho runs two rounds of dialectic synthesis. The first round produces an initial answer; the second refines it.
`dialecticDepthLevels` lets you set the reasoning level for each round independently:
```json
{
"dialecticDepth": 3,
"dialecticDepthLevels": ["low", "medium", "high"]
}
```
If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived from `dialecticReasoningLevel` (the base):
| Depth | Pass levels |
|-------|-------------|
| 1 | [base] |
| 2 | [minimal, base] |
| 3 | [minimal, base, low] |
This keeps earlier passes cheap while using full depth on the final synthesis.
### Level (how hard)
Controls the **intensity** of each dialectic reasoning round.
| Key | Default | Description |
|-----|---------|-------------|
| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
| `dialecticDynamic` | `true` | When `true`, the model can pass `reasoning_level` to `honcho_reasoning` to override the default per-call. `false` = always use `dialecticReasoningLevel`, model overrides ignored |
Higher levels produce richer synthesis but cost more tokens on Honcho's backend.
## Multi-Profile Setup
Each Hermes profile gets its own Honcho AI peer while sharing the same workspace (user context). This means:
@ -149,6 +228,7 @@ Override any setting in the host block:
"hermes.coder": {
"aiPeer": "coder",
"recallMode": "tools",
"dialecticDepth": 2,
"observation": {
"user": { "observeMe": true, "observeOthers": false },
"ai": { "observeMe": true, "observeOthers": true }
@ -160,19 +240,97 @@ Override any setting in the host block:
## Tools
The agent has 4 Honcho tools (hidden in `context` recall mode):
The agent has 5 bidirectional Honcho tools (hidden in `context` recall mode):
| Tool | LLM call? | Cost | Use when |
|------|-----------|------|----------|
| `honcho_profile` | No | minimal | Quick factual snapshot at conversation start or for fast name/role/pref lookups |
| `honcho_search` | No | low | Fetch specific past facts to reason over yourself — raw excerpts, no synthesis |
| `honcho_context` | No | low | Full session context snapshot: summary, representation, card, recent messages |
| `honcho_reasoning` | Yes | mediumhigh | Natural language question synthesized by Honcho's dialectic engine |
| `honcho_conclude` | No | minimal | Write or delete a persistent fact; pass `peer: "ai"` for AI self-knowledge |
### `honcho_profile`
Quick factual snapshot of the user -- name, role, preferences, patterns. No LLM call, minimal cost. Use at conversation start or for fast lookups.
Read or update a peer card — curated key facts (name, role, preferences, communication style). Pass `card: [...]` to update; omit to read. No LLM call.
### `honcho_search`
Semantic search over stored context. Returns raw excerpts ranked by relevance, no LLM synthesis. Default 800 tokens, max 2000. Use when you want specific past facts to reason over yourself.
Semantic search over stored context for a specific peer. Returns raw excerpts ranked by relevance, no synthesis. Default 800 tokens, max 2000. Good when you need specific past facts to reason over yourself rather than a synthesized answer.
### `honcho_context`
Natural language question answered by Honcho's dialectic reasoning (LLM call on Honcho's backend). Higher cost, higher quality. Can query about user (default) or the AI peer.
Full session context snapshot from Honcho — session summary, peer representation, peer card, and recent messages. No LLM call. Use when you want to see everything Honcho knows about the current session and peer in one shot.
### `honcho_reasoning`
Natural language question answered by Honcho's dialectic reasoning engine (LLM call on Honcho's backend). Higher cost, higher quality. Pass `reasoning_level` to control depth: `minimal` (fast/cheap) → `low``medium``high``max` (thorough). Omit to use the configured default (`low`). Use for synthesized understanding of the user's patterns, goals, or current state.
### `honcho_conclude`
Write a persistent fact about the user. Conclusions build the user's profile over time. Use when the user states a preference, corrects you, or shares something to remember.
Write or delete a persistent conclusion about a peer. Pass `conclusion: "..."` to create. Pass `delete_id: "..."` to remove a conclusion (for PII removal — Honcho self-heals incorrect conclusions over time, so deletion is only needed for PII). You MUST pass exactly one of the two.
### Bidirectional peer targeting
All 5 tools accept an optional `peer` parameter:
- `peer: "user"` (default) — operates on the user peer
- `peer: "ai"` — operates on this profile's AI peer
- `peer: "<explicit-id>"` — any peer ID in the workspace
Examples:
```
honcho_profile # read user's card
honcho_profile peer="ai" # read AI peer's card
honcho_reasoning query="What does this user care about most?"
honcho_reasoning query="What are my interaction patterns?" peer="ai" reasoning_level="medium"
honcho_conclude conclusion="Prefers terse answers"
honcho_conclude conclusion="I tend to over-explain code" peer="ai"
honcho_conclude delete_id="abc123" # PII removal
```
## Agent Usage Patterns
Guidelines for Hermes when Honcho memory is active.
### On conversation start
```
1. honcho_profile → fast warmup, no LLM cost
2. If context looks thin → honcho_context (full snapshot, still no LLM)
3. If deep synthesis needed → honcho_reasoning (LLM call, use sparingly)
```
Do NOT call `honcho_reasoning` on every turn. Auto-injection already handles ongoing context refresh. Use the reasoning tool only when you genuinely need synthesized insight the base context doesn't provide.
### When the user shares something to remember
```
honcho_conclude conclusion="<specific, actionable fact>"
```
Good conclusions: "Prefers code examples over prose explanations", "Working on a Rust async project through April 2026"
Bad conclusions: "User said something about Rust" (too vague), "User seems technical" (already in representation)
### When the user asks about past context / you need to recall specifics
```
honcho_search query="<topic>" → fast, no LLM, good for specific facts
honcho_context → full snapshot with summary + messages
honcho_reasoning query="<question>" → synthesized answer, use when search isn't enough
```
### When to use `peer: "ai"`
Use AI peer targeting to build and query the agent's own self-knowledge:
- `honcho_conclude conclusion="I tend to be verbose when explaining architecture" peer="ai"` — self-correction
- `honcho_reasoning query="How do I typically handle ambiguous requests?" peer="ai"` — self-audit
- `honcho_profile peer="ai"` — review own identity card
### When NOT to call tools
In `hybrid` and `context` modes, base context (user representation + card + session summary) is auto-injected before every turn. Do not re-fetch what was already injected. Call tools only when:
- You need something the injected context doesn't have
- The user explicitly asks you to recall or check memory
- You're writing a conclusion about something new
### Cadence awareness
`honcho_reasoning` on the tool side shares the same cost as auto-injection dialectic. After an explicit tool call, the auto-injection cadence resets — avoiding double-charging the same turn.
## Config Reference
@ -191,18 +349,39 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
| `observation` | all on | Per-peer `observeMe`/`observeOthers` booleans |
| `writeFrequency` | `async` | `async`, `turn`, `session`, or integer N |
| `sessionStrategy` | `per-directory` | `per-directory`, `per-repo`, `per-session`, `global` |
| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
| `dialecticDynamic` | `true` | Auto-bump reasoning by query length. `false` = fixed level |
| `messageMaxChars` | `25000` | Max chars per message (chunked if exceeded) |
| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input |
### Cost-awareness (advanced, root config only)
### Dialectic settings
| Key | Default | Description |
|-----|---------|-------------|
| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
| `dialecticDynamic` | `true` | Auto-bump reasoning by query complexity. `false` = fixed level |
| `dialecticDepth` | `1` | Number of dialectic rounds per query (1-3) |
| `dialecticDepthLevels` | -- | Optional array of per-round levels, e.g. `["low", "high"]` |
| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input |
### Context budget and injection
| Key | Default | Description |
|-----|---------|-------------|
| `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
| `contextCadence` | `1` | Min turns between context API calls |
| `dialecticCadence` | `1` | Min turns between dialectic API calls |
| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
### Memory-context sanitization
Honcho sanitizes the `memory-context` block before injection to prevent prompt injection and malformed content:
- Strips XML/HTML tags from user-authored conclusions
- Normalizes whitespace and control characters
- Truncates individual conclusions that exceed `messageMaxChars`
- Escapes delimiter sequences that could break the system prompt structure
This fix addresses edge cases where raw user conclusions containing markup or special characters could corrupt the injected context block.
## Troubleshooting
@ -221,6 +400,12 @@ Observation config is synced from the server on each session init. Start a new s
### Messages truncated
Messages over `messageMaxChars` (default 25k) are automatically chunked with `[continued]` markers. If you're hitting this often, check if tool results or skill content is inflating message size.
### Context injection too large
If you see warnings about context budget exceeded, lower `contextTokens` or reduce `dialecticDepth`. The session summary is trimmed first when the budget is tight.
### Session summary missing
Session summary requires at least one prior turn in the current Honcho session. On cold start (new session, no history), the summary is omitted and Honcho uses the cold-start prompt strategy instead.
## CLI Commands
| Command | Description |

View file

@ -0,0 +1,94 @@
/**
* Example Dashboard Plugin
*
* Demonstrates how to build a dashboard plugin using the Hermes Plugin SDK.
* No build step needed this is a plain IIFE that uses globals from the SDK.
*/
(function () {
"use strict";
const SDK = window.__HERMES_PLUGIN_SDK__;
const { React } = SDK;
const { Card, CardHeader, CardTitle, CardContent, Badge, Button } = SDK.components;
const { useState, useEffect } = SDK.hooks;
const { cn } = SDK.utils;
function ExamplePage() {
const [greeting, setGreeting] = useState(null);
const [loading, setLoading] = useState(false);
function fetchGreeting() {
setLoading(true);
SDK.fetchJSON("/api/plugins/example/hello")
.then(function (data) { setGreeting(data.message); })
.catch(function () { setGreeting("(backend not available)"); })
.finally(function () { setLoading(false); });
}
return React.createElement("div", { className: "flex flex-col gap-6" },
// Header card
React.createElement(Card, null,
React.createElement(CardHeader, null,
React.createElement("div", { className: "flex items-center gap-3" },
React.createElement(CardTitle, { className: "text-lg" }, "Example Plugin"),
React.createElement(Badge, { variant: "outline" }, "v1.0.0"),
),
),
React.createElement(CardContent, { className: "flex flex-col gap-4" },
React.createElement("p", { className: "text-sm text-muted-foreground" },
"This is an example dashboard plugin. It demonstrates using the Plugin SDK to build ",
"custom tabs with React components, connect to backend API routes, and integrate with ",
"the existing Hermes UI system.",
),
React.createElement("div", { className: "flex items-center gap-3" },
React.createElement(Button, {
onClick: fetchGreeting,
disabled: loading,
className: cn(
"inline-flex items-center gap-2 border border-border bg-background/40 px-4 py-2",
"text-sm font-courier transition-colors hover:bg-foreground/10 cursor-pointer",
),
}, loading ? "Loading..." : "Call Backend API"),
greeting && React.createElement("span", {
className: "text-sm font-courier text-muted-foreground",
}, greeting),
),
),
),
// Info card about the SDK
React.createElement(Card, null,
React.createElement(CardHeader, null,
React.createElement(CardTitle, { className: "text-base" }, "Plugin SDK Reference"),
),
React.createElement(CardContent, null,
React.createElement("div", { className: "grid gap-3 text-sm" },
React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.React"),
React.createElement("span", { className: "text-muted-foreground text-xs" }, "React instance — use instead of importing react"),
),
React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.hooks"),
React.createElement("span", { className: "text-muted-foreground text-xs" }, "useState, useEffect, useCallback, useMemo, useRef, useContext, createContext"),
),
React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.components"),
React.createElement("span", { className: "text-muted-foreground text-xs" }, "Card, Badge, Button, Input, Label, Select, Separator, Tabs, etc."),
),
React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.api"),
React.createElement("span", { className: "text-muted-foreground text-xs" }, "Hermes API client — getStatus(), getSessions(), etc."),
),
React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.utils"),
React.createElement("span", { className: "text-muted-foreground text-xs" }, "cn(), timeAgo(), isoTimeAgo()"),
),
),
),
),
);
}
// Register this plugin — the dashboard picks it up automatically.
window.__HERMES_PLUGINS__.register("example", ExamplePage);
})();

View file

@ -0,0 +1,13 @@
{
"name": "example",
"label": "Example",
"description": "Example dashboard plugin — demonstrates the plugin SDK",
"icon": "Sparkles",
"version": "1.0.0",
"tab": {
"path": "/example",
"position": "after:skills"
},
"entry": "dist/index.js",
"api": "plugin_api.py"
}

View file

@ -0,0 +1,14 @@
"""Example dashboard plugin — backend API routes.
Mounted at /api/plugins/example/ by the dashboard plugin system.
"""
from fastapi import APIRouter
router = APIRouter()
@router.get("/hello")
async def hello():
"""Simple greeting endpoint to demonstrate plugin API routes."""
return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"}

View file

@ -1,6 +1,6 @@
# Honcho Memory Provider
AI-native cross-session user modeling with dialectic Q&A, semantic search, peer cards, and persistent conclusions.
AI-native cross-session user modeling with multi-pass dialectic reasoning, session summaries, bidirectional peer tools, and persistent conclusions.
> **Honcho docs:** <https://docs.honcho.dev/v3/guides/integrations/hermes>
@ -19,9 +19,86 @@ hermes memory setup # generic picker, also works
Or manually:
```bash
hermes config set memory.provider honcho
echo "HONCHO_API_KEY=your-key" >> ~/.hermes/.env
echo "HONCHO_API_KEY=***" >> ~/.hermes/.env
```
## Architecture Overview
### Two-Layer Context Injection
Context is injected into the **user message** at API-call time (not the system prompt) to preserve prompt caching. Only a static mode header goes in the system prompt. The injected block is wrapped in `<memory-context>` fences with a system note clarifying it's background data, not new user input.
Two independent layers, each on its own cadence:
**Layer 1 — Base context** (refreshed every `contextCadence` turns):
1. **SESSION SUMMARY** — from `session.context(summary=True)`, placed first
2. **User Representation** — Honcho's evolving model of the user
3. **User Peer Card** — key facts snapshot
4. **AI Self-Representation** — Honcho's model of the AI peer
5. **AI Identity Card** — AI peer facts
**Layer 2 — Dialectic supplement** (fired every `dialecticCadence` turns):
Multi-pass `.chat()` reasoning about the user, appended after base context.
Both layers are joined, then truncated to fit `contextTokens` budget via `_truncate_to_budget` (tokens × 4 chars, word-boundary safe).
### Cold Start vs Warm Session Prompts
Dialectic pass 0 automatically selects its prompt based on session state:
- **Cold** (no base context cached): "Who is this person? What are their preferences, goals, and working style? Focus on facts that would help an AI assistant be immediately useful."
- **Warm** (base context exists): "Given what's been discussed in this session so far, what context about this user is most relevant to the current conversation? Prioritize active context over biographical facts."
Not configurable — determined automatically.
### Dialectic Depth (Multi-Pass Reasoning)
`dialecticDepth` (13, clamped) controls how many `.chat()` calls fire per dialectic cycle:
| Depth | Passes | Behavior |
|-------|--------|----------|
| 1 | single `.chat()` | Base query only (cold or warm prompt) |
| 2 | audit + synthesis | Pass 0 result is self-audited; pass 1 does targeted synthesis. Conditional bail-out if pass 0 returns strong signal (>300 chars or structured with bullets/sections >100 chars) |
| 3 | audit + synthesis + reconciliation | Pass 2 reconciles contradictions across prior passes into a final synthesis |
### Proportional Reasoning Levels
When `dialecticDepthLevels` is not set, each pass uses a proportional level relative to `dialecticReasoningLevel` (the "base"):
| Depth | Pass levels |
|-------|-------------|
| 1 | [base] |
| 2 | [minimal, base] |
| 3 | [minimal, base, low] |
Override with `dialecticDepthLevels`: an explicit array of reasoning level strings per pass.
### Three Orthogonal Dialectic Knobs
| Knob | Controls | Type |
|------|----------|------|
| `dialecticCadence` | How often — minimum turns between dialectic firings | int |
| `dialecticDepth` | How many — passes per firing (13) | int |
| `dialecticReasoningLevel` | How hard — reasoning ceiling per `.chat()` call | string |
### Input Sanitization
`run_conversation` strips leaked `<memory-context>` blocks from user input before processing. When `saveMessages` persists a turn that included injected context, the block can reappear in subsequent turns via message history. The sanitizer removes `<memory-context>` blocks plus associated system notes.
## Tools
Five bidirectional tools. All accept an optional `peer` parameter (`"user"` or `"ai"`, default `"user"`).
| Tool | LLM call? | Description |
|------|-----------|-------------|
| `honcho_profile` | No | Peer card — key facts snapshot |
| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) |
| `honcho_context` | No | Full session context: summary, representation, card, messages |
| `honcho_reasoning` | Yes | LLM-synthesized answer via dialectic `.chat()` |
| `honcho_conclude` | No | Write a persistent fact/conclusion about the user |
Tool visibility depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`.
## Config Resolution
Config is read from the first file that exists:
@ -34,42 +111,128 @@ Config is read from the first file that exists:
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>`.
## Tools
| Tool | LLM call? | Description |
|------|-----------|-------------|
| `honcho_profile` | No | User's peer card -- key facts snapshot |
| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) |
| `honcho_context` | Yes | LLM-synthesized answer via dialectic reasoning |
| `honcho_conclude` | No | Write a persistent fact about the user |
Tool availability depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`.
For every key, resolution order is: **host block > root > env var > default**.
## Full Configuration Reference
### Identity & Connection
| Key | Type | Default | Scope | Description |
|-----|------|---------|-------|-------------|
| `apiKey` | string | -- | root / host | API key. Falls back to `HONCHO_API_KEY` env var |
| `baseUrl` | string | -- | root | Base URL for self-hosted Honcho. Local URLs (`localhost`, `127.0.0.1`, `::1`) auto-skip API key auth |
| `environment` | string | `"production"` | root / host | SDK environment mapping |
| `enabled` | bool | auto | root / host | Master toggle. Auto-enables when `apiKey` or `baseUrl` present |
| `workspace` | string | host key | root / host | Honcho workspace ID |
| `peerName` | string | -- | root / host | User peer identity |
| `aiPeer` | string | host key | root / host | AI peer identity |
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `apiKey` | string | — | API key. Falls back to `HONCHO_API_KEY` env var |
| `baseUrl` | string | — | Base URL for self-hosted Honcho. Local URLs auto-skip API key auth |
| `environment` | string | `"production"` | SDK environment mapping |
| `enabled` | bool | auto | Master toggle. Auto-enables when `apiKey` or `baseUrl` present |
| `workspace` | string | host key | Honcho workspace ID. Shared environment — all profiles in the same workspace can see the same user identity and related memories |
| `peerName` | string | | User peer identity |
| `aiPeer` | string | host key | AI peer identity |
### Memory & Recall
| Key | Type | Default | Scope | Description |
|-----|------|---------|-------|-------------|
| `recallMode` | string | `"hybrid"` | root / host | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` normalizes to `"hybrid"` |
| `observationMode` | string | `"directional"` | root / host | Shorthand preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control |
| `observation` | object | -- | root / host | Per-peer observation config (see below) |
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `recallMode` | string | `"hybrid"` | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` `"hybrid"` |
| `observationMode` | string | `"directional"` | Preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control |
| `observation` | object | — | Per-peer observation config (see Observation section) |
#### Observation (granular)
### Write Behavior
Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block -- each profile can have different observation settings. When present, overrides `observationMode` preset.
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `writeFrequency` | string/int | `"async"` | `"async"` (background), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) |
| `saveMessages` | bool | `true` | Persist messages to Honcho API |
### Session Resolution
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `sessionStrategy` | string | `"per-directory"` | `"per-directory"`, `"per-session"`, `"per-repo"` (git root), `"global"` |
| `sessionPeerPrefix` | bool | `false` | Prepend peer name to session keys |
| `sessions` | object | `{}` | Manual directory-to-session-name mappings |
#### Session Name Resolution
The Honcho session name determines which conversation bucket memory lands in. Resolution follows a priority chain — first match wins:
| Priority | Source | Example session name |
|----------|--------|---------------------|
| 1 | Manual map (`sessions` config) | `"myproject-main"` |
| 2 | `/title` command (mid-session rename) | `"refactor-auth"` |
| 3 | Gateway session key (Telegram, Discord, etc.) | `"agent-main-telegram-dm-8439114563"` |
| 4 | `per-session` strategy | Hermes session ID (`20260415_a3f2b1`) |
| 5 | `per-repo` strategy | Git root directory name (`hermes-agent`) |
| 6 | `per-directory` strategy | Current directory basename (`src`) |
| 7 | `global` strategy | Workspace name (`hermes`) |
Gateway platforms always resolve via priority 3 (per-chat isolation) regardless of `sessionStrategy`. The strategy setting only affects CLI sessions.
If `sessionPeerPrefix` is `true`, the peer name is prepended: `eri-hermes-agent`.
#### What each strategy produces
- **`per-directory`** — basename of `$PWD`. Opening hermes in `~/code/myapp` and `~/code/other` gives two separate sessions. Same directory = same session across runs.
- **`per-repo`** — git root directory name. All subdirectories within a repo share one session. Falls back to `per-directory` if not inside a git repo.
- **`per-session`** — Hermes session ID (timestamp + hex). Every `hermes` invocation starts a fresh Honcho session. Falls back to `per-directory` if no session ID is available.
- **`global`** — workspace name. One session for everything. Memory accumulates across all directories and runs.
### Multi-Profile Pattern
Multiple Hermes profiles can share one workspace while maintaining separate AI identities. Config resolution is **host block > root > env var > default** — host blocks inherit from root, so shared settings only need to be declared once:
```json
{
"apiKey": "***",
"workspace": "hermes",
"peerName": "yourname",
"hosts": {
"hermes": {
"aiPeer": "hermes",
"recallMode": "hybrid",
"sessionStrategy": "per-directory"
},
"hermes.coder": {
"aiPeer": "coder",
"recallMode": "tools",
"sessionStrategy": "per-repo"
}
}
}
```
Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad.
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>` (e.g. `hermes -p coder` → host key `hermes.coder`).
### Dialectic & Reasoning
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `dialecticDepth` | int | `1` | Passes per dialectic cycle (13, clamped). 1=single query, 2=audit+synthesis, 3=audit+synthesis+reconciliation |
| `dialecticDepthLevels` | array | — | Optional array of reasoning level strings per pass. Overrides proportional defaults. Example: `["minimal", "low", "medium"]` |
| `dialecticReasoningLevel` | string | `"low"` | Base reasoning level for `.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` |
| `dialecticDynamic` | bool | `true` | When `true`, model can override reasoning level per-call via `honcho_reasoning` tool. When `false`, always uses `dialecticReasoningLevel` |
| `dialecticMaxChars` | int | `600` | Max chars of dialectic result injected into system prompt |
| `dialecticMaxInputChars` | int | `10000` | Max chars for dialectic query input to `.chat()`. Honcho cloud limit: 10k |
### Token Budgets
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `contextTokens` | int | SDK default | Token budget for `context()` API calls. Also gates prefetch truncation (tokens × 4 chars) |
| `messageMaxChars` | int | `25000` | Max chars per message sent via `add_messages()`. Exceeding this triggers chunking with `[continued]` markers. Honcho cloud limit: 25k |
### Cadence (Cost Control)
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `contextCadence` | int | `1` | Minimum turns between base context refreshes (session summary + representation + card) |
| `dialecticCadence` | int | `1` | Minimum turns between dialectic `.chat()` firings |
| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context on the first user message only, skip from turn 2 onward) |
| `reasoningLevelCap` | string | — | Hard cap on reasoning level: `"minimal"`, `"low"`, `"medium"`, `"high"` |
### Observation (Granular)
Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. When present, overrides `observationMode` preset.
```json
"observation": {
@ -85,74 +248,16 @@ Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block
| `ai.observeMe` | `true` | AI peer self-observation (Honcho builds AI representation) |
| `ai.observeOthers` | `true` | AI peer observes user messages (enables cross-peer dialectic) |
Presets for `observationMode`:
- `"directional"` (default): all four booleans `true`
Presets:
- `"directional"` (default): all four `true`
- `"unified"`: user `observeMe=true`, AI `observeOthers=true`, rest `false`
Per-profile example -- coder profile observes the user but user doesn't observe coder:
### Hardcoded Limits
```json
"hosts": {
"hermes.coder": {
"observation": {
"user": { "observeMe": true, "observeOthers": false },
"ai": { "observeMe": true, "observeOthers": true }
}
}
}
```
Settings changed in the [Honcho dashboard](https://app.honcho.dev) are synced back on session init.
### Write Behavior
| Key | Type | Default | Scope | Description |
|-----|------|---------|-------|-------------|
| `writeFrequency` | string or int | `"async"` | root / host | `"async"` (background thread), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) |
| `saveMessages` | bool | `true` | root / host | Whether to persist messages to Honcho API |
### Session Resolution
| Key | Type | Default | Scope | Description |
|-----|------|---------|-------|-------------|
| `sessionStrategy` | string | `"per-directory"` | root / host | `"per-directory"`, `"per-session"` (new each run), `"per-repo"` (git root name), `"global"` (single session) |
| `sessionPeerPrefix` | bool | `false` | root / host | Prepend peer name to session keys |
| `sessions` | object | `{}` | root | Manual directory-to-session-name mappings: `{"/path/to/project": "my-session"}` |
### Token Budgets & Dialectic
| Key | Type | Default | Scope | Description |
|-----|------|---------|-------|-------------|
| `contextTokens` | int | SDK default | root / host | Token budget for `context()` API calls. Also gates prefetch truncation (tokens x 4 chars) |
| `dialecticReasoningLevel` | string | `"low"` | root / host | Base reasoning level for `peer.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` |
| `dialecticDynamic` | bool | `true` | root / host | Auto-bump reasoning based on query length: `<120` chars = base level, `120-400` = +1, `>400` = +2 (capped at `"high"`). Set `false` to always use `dialecticReasoningLevel` as-is |
| `dialecticMaxChars` | int | `600` | root / host | Max chars of dialectic result injected into system prompt |
| `dialecticMaxInputChars` | int | `10000` | root / host | Max chars for dialectic query input to `peer.chat()`. Honcho cloud limit: 10k |
| `messageMaxChars` | int | `25000` | root / host | Max chars per message sent via `add_messages()`. Messages exceeding this are chunked with `[continued]` markers. Honcho cloud limit: 25k |
### Cost Awareness (Advanced)
These are read from the root config object, not the host block. Must be set manually in `honcho.json`.
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context only on turn 0) |
| `contextCadence` | int | `1` | Minimum turns between `context()` API calls |
| `dialecticCadence` | int | `1` | Minimum turns between `peer.chat()` API calls |
| `reasoningLevelCap` | string | -- | Hard cap on auto-bumped reasoning: `"minimal"`, `"low"`, `"mid"`, `"high"` |
### Hardcoded Limits (Not Configurable)
| Limit | Value | Location |
|-------|-------|----------|
| Search tool max tokens | 2000 (hard cap), 800 (default) | `__init__.py` handle_tool_call |
| Peer card fetch tokens | 200 | `session.py` get_peer_card |
## Config Precedence
For every key, resolution order is: **host block > root > env var > default**.
Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile>`) > `"hermes"`.
| Limit | Value |
|-------|-------|
| Search tool max tokens | 2000 (hard cap), 800 (default) |
| Peer card fetch tokens | 200 |
## Environment Variables
@ -182,15 +287,16 @@ Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile
```json
{
"apiKey": "your-key",
"apiKey": "***",
"workspace": "hermes",
"peerName": "eri",
"peerName": "username",
"contextCadence": 2,
"dialecticCadence": 3,
"dialecticDepth": 2,
"hosts": {
"hermes": {
"enabled": true,
"aiPeer": "hermes",
"workspace": "hermes",
"peerName": "eri",
"recallMode": "hybrid",
"observation": {
"user": { "observeMe": true, "observeOthers": true },
@ -199,14 +305,16 @@ Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile
"writeFrequency": "async",
"sessionStrategy": "per-directory",
"dialecticReasoningLevel": "low",
"dialecticDepth": 2,
"dialecticMaxChars": 600,
"saveMessages": true
},
"hermes.coder": {
"enabled": true,
"aiPeer": "coder",
"workspace": "hermes",
"peerName": "eri",
"sessionStrategy": "per-repo",
"dialecticDepth": 1,
"dialecticDepthLevels": ["low"],
"observation": {
"user": { "observeMe": true, "observeOthers": false },
"ai": { "observeMe": true, "observeOthers": true }

View file

@ -17,6 +17,7 @@ from __future__ import annotations
import json
import logging
import re
import threading
from typing import Any, Dict, List, Optional
@ -33,20 +34,33 @@ logger = logging.getLogger(__name__)
PROFILE_SCHEMA = {
"name": "honcho_profile",
"description": (
"Retrieve the user's peer card from Honcho — a curated list of key facts "
"about them (name, role, preferences, communication style, patterns). "
"Fast, no LLM reasoning, minimal cost. "
"Use this at conversation start or when you need a quick factual snapshot."
"Retrieve or update a peer card from Honcho — a curated list of key facts "
"about that peer (name, role, preferences, communication style, patterns). "
"Pass `card` to update; omit `card` to read."
),
"parameters": {"type": "object", "properties": {}, "required": []},
"parameters": {
"type": "object",
"properties": {
"peer": {
"type": "string",
"description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
},
"card": {
"type": "array",
"items": {"type": "string"},
"description": "New peer card as a list of fact strings. Omit to read the current card.",
},
},
"required": [],
},
}
SEARCH_SCHEMA = {
"name": "honcho_search",
"description": (
"Semantic search over Honcho's stored context about the user. "
"Semantic search over Honcho's stored context about a peer. "
"Returns raw excerpts ranked by relevance — no LLM synthesis. "
"Cheaper and faster than honcho_context. "
"Cheaper and faster than honcho_reasoning. "
"Good when you want to find specific past facts and reason over them yourself."
),
"parameters": {
@ -60,17 +74,23 @@ SEARCH_SCHEMA = {
"type": "integer",
"description": "Token budget for returned context (default 800, max 2000).",
},
"peer": {
"type": "string",
"description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
},
},
"required": ["query"],
},
}
CONTEXT_SCHEMA = {
"name": "honcho_context",
REASONING_SCHEMA = {
"name": "honcho_reasoning",
"description": (
"Ask Honcho a natural language question and get a synthesized answer. "
"Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
"Can query about any peer: the user (default) or the AI assistant."
"Can query about any peer via alias or explicit peer ID. "
"Pass reasoning_level to control depth: minimal (fast/cheap), low (default), "
"medium, high, max (deep/expensive). Omit for configured default."
),
"parameters": {
"type": "object",
@ -79,37 +99,84 @@ CONTEXT_SCHEMA = {
"type": "string",
"description": "A natural language question.",
},
"reasoning_level": {
"type": "string",
"description": (
"Override the default reasoning depth. "
"Omit to use the configured default (typically low). "
"Guide:\n"
"- minimal: quick factual lookups (name, role, simple preference)\n"
"- low: straightforward questions with clear answers\n"
"- medium: multi-aspect questions requiring synthesis across observations\n"
"- high: complex behavioral patterns, contradictions, deep analysis\n"
"- max: thorough audit-level analysis, leave no stone unturned"
),
"enum": ["minimal", "low", "medium", "high", "max"],
},
"peer": {
"type": "string",
"description": "Which peer to query about: 'user' (default) or 'ai'.",
"description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
},
},
"required": ["query"],
},
}
CONTEXT_SCHEMA = {
"name": "honcho_context",
"description": (
"Retrieve full session context from Honcho — summary, peer representation, "
"peer card, and recent messages. No LLM synthesis. "
"Cheaper than honcho_reasoning. Use this to see what Honcho knows about "
"the current conversation and the specified peer."
),
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Optional focus query to filter context. Omit for full session context snapshot.",
},
"peer": {
"type": "string",
"description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
},
},
"required": [],
},
}
CONCLUDE_SCHEMA = {
"name": "honcho_conclude",
"description": (
"Write a conclusion about the user back to Honcho's memory. "
"Conclusions are persistent facts that build the user's profile. "
"Use when the user states a preference, corrects you, or shares "
"something to remember across sessions."
"Write or delete a conclusion about a peer in Honcho's memory. "
"Conclusions are persistent facts that build a peer's profile. "
"You MUST pass exactly one of: `conclusion` (to create) or `delete_id` (to delete). "
"Passing neither is an error. "
"Deletion is only for PII removal — Honcho self-heals incorrect conclusions over time."
),
"parameters": {
"type": "object",
"properties": {
"conclusion": {
"type": "string",
"description": "A factual statement about the user to persist.",
}
"description": "A factual statement to persist. Required when not using delete_id.",
},
"delete_id": {
"type": "string",
"description": "Conclusion ID to delete (for PII removal). Required when not using conclusion.",
},
"peer": {
"type": "string",
"description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
},
},
"required": ["conclusion"],
"required": [],
},
}
ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, REASONING_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
# ---------------------------------------------------------------------------
@ -131,16 +198,18 @@ class HonchoMemoryProvider(MemoryProvider):
# B1: recall_mode — set during initialize from config
self._recall_mode = "hybrid" # "context", "tools", or "hybrid"
# B4: First-turn context baking
self._first_turn_context: Optional[str] = None
self._first_turn_lock = threading.Lock()
# Base context cache — refreshed on context_cadence, not frozen
self._base_context_cache: Optional[str] = None
self._base_context_lock = threading.Lock()
# B5: Cost-awareness turn counting and cadence
self._turn_count = 0
self._injection_frequency = "every-turn" # or "first-turn"
self._context_cadence = 1 # minimum turns between context API calls
self._dialectic_cadence = 1 # minimum turns between dialectic API calls
self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "mid", "high"
self._dialectic_cadence = 3 # minimum turns between dialectic API calls
self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3)
self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels
self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "medium", "high"
self._last_context_turn = -999
self._last_dialectic_turn = -999
@ -236,9 +305,11 @@ class HonchoMemoryProvider(MemoryProvider):
raw = cfg.raw or {}
self._injection_frequency = raw.get("injectionFrequency", "every-turn")
self._context_cadence = int(raw.get("contextCadence", 1))
self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
self._dialectic_depth_levels = cfg.dialectic_depth_levels
cap = raw.get("reasoningLevelCap")
if cap and cap in ("minimal", "low", "mid", "high"):
if cap and cap in ("minimal", "low", "medium", "high"):
self._reasoning_level_cap = cap
except Exception as e:
logger.debug("Honcho cost-awareness config parse error: %s", e)
@ -251,9 +322,7 @@ class HonchoMemoryProvider(MemoryProvider):
# ----- Port #1957: lazy session init for tools-only mode -----
if self._recall_mode == "tools":
if cfg.init_on_session_start:
# Eager init: create session now so sync_turn() works from turn 1.
# Does NOT enable auto-injection — prefetch() still returns empty.
logger.debug("Honcho tools-only mode — eager session init (initOnSessionStart=true)")
# Eager init even in tools mode (opt-in)
self._do_session_init(cfg, session_id, **kwargs)
return
# Defer actual session creation until first tool call
@ -287,8 +356,13 @@ class HonchoMemoryProvider(MemoryProvider):
# ----- B3: resolve_session_name -----
session_title = kwargs.get("session_title")
gateway_session_key = kwargs.get("gateway_session_key")
self._session_key = (
cfg.resolve_session_name(session_title=session_title, session_id=session_id)
cfg.resolve_session_name(
session_title=session_title,
session_id=session_id,
gateway_session_key=gateway_session_key,
)
or session_id
or "hermes-default"
)
@ -299,12 +373,21 @@ class HonchoMemoryProvider(MemoryProvider):
self._session_initialized = True
# ----- B6: Memory file migration (one-time, for new sessions) -----
# Skip under per-session strategy: every Hermes run creates a fresh
# Honcho session by design, so uploading MEMORY.md/USER.md/SOUL.md to
# each one would flood the backend with short-lived duplicates instead
# of performing a one-time migration.
try:
if not session.messages:
if not session.messages and cfg.session_strategy != "per-session":
from hermes_constants import get_hermes_home
mem_dir = str(get_hermes_home() / "memories")
self._manager.migrate_memory_files(self._session_key, mem_dir)
logger.debug("Honcho memory file migration attempted for new session: %s", self._session_key)
elif cfg.session_strategy == "per-session":
logger.debug(
"Honcho memory file migration skipped: per-session strategy creates a fresh session per run (%s)",
self._session_key,
)
except Exception as e:
logger.debug("Honcho memory file migration skipped: %s", e)
@ -347,6 +430,11 @@ class HonchoMemoryProvider(MemoryProvider):
"""Format the prefetch context dict into a readable system prompt block."""
parts = []
# Session summary — session-scoped context, placed first for relevance
summary = ctx.get("summary", "")
if summary:
parts.append(f"## Session Summary\n{summary}")
rep = ctx.get("representation", "")
if rep:
parts.append(f"## User Representation\n{rep}")
@ -370,9 +458,9 @@ class HonchoMemoryProvider(MemoryProvider):
def system_prompt_block(self) -> str:
"""Return system prompt text, adapted by recall_mode.
B4: On the FIRST call, fetch and bake the full Honcho context
(user representation, peer card, AI representation, continuity synthesis).
Subsequent calls return the cached block for prompt caching stability.
Returns only the mode header and tool instructions static text
that doesn't change between turns (prompt-cache friendly).
Live context (representation, card) is injected via prefetch().
"""
if self._cron_skipped:
return ""
@ -382,24 +470,10 @@ class HonchoMemoryProvider(MemoryProvider):
return (
"# Honcho Memory\n"
"Active (tools-only mode). Use honcho_profile, honcho_search, "
"honcho_context, and honcho_conclude tools to access user memory."
"honcho_reasoning, honcho_context, and honcho_conclude tools to access user memory."
)
return ""
# ----- B4: First-turn context baking -----
first_turn_block = ""
if self._recall_mode in ("context", "hybrid"):
with self._first_turn_lock:
if self._first_turn_context is None:
# First call — fetch and cache
try:
ctx = self._manager.get_prefetch_context(self._session_key)
self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
except Exception as e:
logger.debug("Honcho first-turn context fetch failed: %s", e)
self._first_turn_context = ""
first_turn_block = self._first_turn_context
# ----- B1: adapt text based on recall_mode -----
if self._recall_mode == "context":
header = (
@ -412,7 +486,8 @@ class HonchoMemoryProvider(MemoryProvider):
header = (
"# Honcho Memory\n"
"Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for synthesized answers, "
"honcho_search for raw excerpts, honcho_context for raw peer context, "
"honcho_reasoning for synthesized answers, "
"honcho_conclude to save facts about the user. "
"No automatic context injection — you must use tools to access memory."
)
@ -421,16 +496,19 @@ class HonchoMemoryProvider(MemoryProvider):
"# Honcho Memory\n"
"Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
"Use honcho_profile for a quick factual snapshot, "
"honcho_search for raw excerpts, honcho_context for synthesized answers, "
"honcho_search for raw excerpts, honcho_context for raw peer context, "
"honcho_reasoning for synthesized answers, "
"honcho_conclude to save facts about the user."
)
if first_turn_block:
return f"{header}\n\n{first_turn_block}"
return header
def prefetch(self, query: str, *, session_id: str = "") -> str:
"""Return prefetched dialectic context from background thread.
"""Return base context (representation + card) plus dialectic supplement.
Assembles two layers:
1. Base context from peer.context() cached, refreshed on context_cadence
2. Dialectic supplement cached, refreshed on dialectic_cadence
B1: Returns empty when recall_mode is "tools" (no injection).
B5: Respects injection_frequency "first-turn" returns cached/empty after turn 0.
@ -443,22 +521,95 @@ class HonchoMemoryProvider(MemoryProvider):
if self._recall_mode == "tools":
return ""
# B5: injection_frequency — if "first-turn" and past first turn, return empty
if self._injection_frequency == "first-turn" and self._turn_count > 0:
# B5: injection_frequency — if "first-turn" and past first turn, return empty.
# _turn_count is 1-indexed (first user message = 1), so > 1 means "past first".
if self._injection_frequency == "first-turn" and self._turn_count > 1:
return ""
parts = []
# ----- Layer 1: Base context (representation + card) -----
# On first call, fetch synchronously so turn 1 isn't empty.
# After that, serve from cache and refresh in background on cadence.
with self._base_context_lock:
if self._base_context_cache is None:
# First call — synchronous fetch
try:
ctx = self._manager.get_prefetch_context(self._session_key)
self._base_context_cache = self._format_first_turn_context(ctx) if ctx else ""
self._last_context_turn = self._turn_count
except Exception as e:
logger.debug("Honcho base context fetch failed: %s", e)
self._base_context_cache = ""
base_context = self._base_context_cache
# Check if background context prefetch has a fresher result
if self._manager:
fresh_ctx = self._manager.pop_context_result(self._session_key)
if fresh_ctx:
formatted = self._format_first_turn_context(fresh_ctx)
if formatted:
with self._base_context_lock:
self._base_context_cache = formatted
base_context = formatted
if base_context:
parts.append(base_context)
# ----- Layer 2: Dialectic supplement -----
# On the very first turn, no queue_prefetch() has run yet so the
# dialectic result is empty. Run with a bounded timeout so a slow
# Honcho connection doesn't block the first response indefinitely.
# On timeout the result is skipped and queue_prefetch() will pick it
# up at the next cadence-allowed turn.
if self._last_dialectic_turn == -999 and query:
_first_turn_timeout = (
self._config.timeout if self._config and self._config.timeout else 8.0
)
_result_holder: list[str] = []
def _run_first_turn() -> None:
try:
_result_holder.append(self._run_dialectic_depth(query))
except Exception as exc:
logger.debug("Honcho first-turn dialectic failed: %s", exc)
_t = threading.Thread(target=_run_first_turn, daemon=True)
_t.start()
_t.join(timeout=_first_turn_timeout)
if not _t.is_alive():
first_turn_dialectic = _result_holder[0] if _result_holder else ""
if first_turn_dialectic and first_turn_dialectic.strip():
with self._prefetch_lock:
self._prefetch_result = first_turn_dialectic
self._last_dialectic_turn = self._turn_count
else:
logger.debug(
"Honcho first-turn dialectic timed out (%.1fs) — "
"will inject at next cadence-allowed turn",
_first_turn_timeout,
)
# Don't update _last_dialectic_turn: queue_prefetch() will
# retry at the next cadence-allowed turn via the async path.
if self._prefetch_thread and self._prefetch_thread.is_alive():
self._prefetch_thread.join(timeout=3.0)
with self._prefetch_lock:
result = self._prefetch_result
dialectic_result = self._prefetch_result
self._prefetch_result = ""
if not result:
if dialectic_result and dialectic_result.strip():
parts.append(dialectic_result)
if not parts:
return ""
result = "\n\n".join(parts)
# ----- Port #3265: token budget enforcement -----
result = self._truncate_to_budget(result)
return f"## Honcho Context\n{result}"
return result
def _truncate_to_budget(self, text: str) -> str:
"""Truncate text to fit within context_tokens budget if set."""
@ -475,9 +626,11 @@ class HonchoMemoryProvider(MemoryProvider):
return truncated + ""
def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
"""Fire a background dialectic query for the upcoming turn.
"""Fire background prefetch threads for the upcoming turn.
B5: Checks cadence before firing background threads.
B5: Checks cadence independently for dialectic and context refresh.
Context refresh updates the base layer (representation + card).
Dialectic fires the LLM reasoning supplement.
"""
if self._cron_skipped:
return
@ -488,6 +641,15 @@ class HonchoMemoryProvider(MemoryProvider):
if self._recall_mode == "tools":
return
# ----- Context refresh (base layer) — independent cadence -----
if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
self._last_context_turn = self._turn_count
try:
self._manager.prefetch_context(self._session_key, query)
except Exception as e:
logger.debug("Honcho context prefetch failed: %s", e)
# ----- Dialectic prefetch (supplement layer) -----
# B5: cadence check — skip if too soon since last dialectic call
if self._dialectic_cadence > 1:
if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
@ -499,9 +661,7 @@ class HonchoMemoryProvider(MemoryProvider):
def _run():
try:
result = self._manager.dialectic_query(
self._session_key, query, peer="user"
)
result = self._run_dialectic_depth(query)
if result and result.strip():
with self._prefetch_lock:
self._prefetch_result = result
@ -513,13 +673,140 @@ class HonchoMemoryProvider(MemoryProvider):
)
self._prefetch_thread.start()
# Also fire context prefetch if cadence allows
if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
self._last_context_turn = self._turn_count
try:
self._manager.prefetch_context(self._session_key, query)
except Exception as e:
logger.debug("Honcho context prefetch failed: %s", e)
# ----- Dialectic depth: multi-pass .chat() with cold/warm prompts -----
# Proportional reasoning levels per depth/pass when dialecticDepthLevels
# is not configured. The base level is dialecticReasoningLevel.
# Index: (depth, pass) → level relative to base.
_PROPORTIONAL_LEVELS: dict[tuple[int, int], str] = {
# depth 1: single pass at base level
(1, 0): "base",
# depth 2: pass 0 lighter, pass 1 at base
(2, 0): "minimal",
(2, 1): "base",
# depth 3: pass 0 lighter, pass 1 at base, pass 2 one above minimal
(3, 0): "minimal",
(3, 1): "base",
(3, 2): "low",
}
_LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
def _resolve_pass_level(self, pass_idx: int) -> str:
"""Resolve reasoning level for a given pass index.
Uses dialecticDepthLevels if configured, otherwise proportional
defaults relative to dialecticReasoningLevel.
"""
if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
return self._dialectic_depth_levels[pass_idx]
base = (self._config.dialectic_reasoning_level if self._config else "low")
mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
if mapping is None or mapping == "base":
return base
return mapping
def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
"""Build the prompt for a given dialectic pass.
Pass 0: cold start (general user query) or warm (session-scoped).
Pass 1: self-audit / targeted synthesis against gaps from pass 0.
Pass 2: reconciliation / contradiction check across prior passes.
"""
if pass_idx == 0:
if is_cold:
return (
"Who is this person? What are their preferences, goals, "
"and working style? Focus on facts that would help an AI "
"assistant be immediately useful."
)
return (
"Given what's been discussed in this session so far, what "
"context about this user is most relevant to the current "
"conversation? Prioritize active context over biographical facts."
)
elif pass_idx == 1:
prior = prior_results[-1] if prior_results else ""
return (
f"Given this initial assessment:\n\n{prior}\n\n"
"What gaps remain in your understanding that would help "
"going forward? Synthesize what you actually know about "
"the user's current state and immediate needs, grounded "
"in evidence from recent sessions."
)
else:
# pass 2: reconciliation
return (
f"Prior passes produced:\n\n"
f"Pass 1:\n{prior_results[0] if len(prior_results) > 0 else '(empty)'}\n\n"
f"Pass 2:\n{prior_results[1] if len(prior_results) > 1 else '(empty)'}\n\n"
"Do these assessments cohere? Reconcile any contradictions "
"and produce a final, concise synthesis of what matters most "
"for the current conversation."
)
@staticmethod
def _signal_sufficient(result: str) -> bool:
"""Check if a dialectic pass returned enough signal to skip further passes.
Heuristic: a response longer than 100 chars with some structure
(section headers, bullets, or an ordered list) is considered sufficient.
"""
if not result or len(result.strip()) < 100:
return False
# Structured output with sections/bullets is strong signal
if "\n" in result and (
"##" in result
or "" in result
or re.search(r"^[*-] ", result, re.MULTILINE)
or re.search(r"^\s*\d+\. ", result, re.MULTILINE)
):
return True
# Long enough even without structure
return len(result.strip()) > 300
def _run_dialectic_depth(self, query: str) -> str:
"""Execute up to dialecticDepth .chat() calls with conditional bail-out.
Cold start (no base context): general user-oriented query.
Warm session (base context exists): session-scoped query.
Each pass is conditional bails early if prior pass returned strong signal.
Returns the best (usually last) result.
"""
if not self._manager or not self._session_key:
return ""
is_cold = not self._base_context_cache
results: list[str] = []
for i in range(self._dialectic_depth):
if i == 0:
prompt = self._build_dialectic_prompt(0, results, is_cold)
else:
# Skip further passes if prior pass delivered strong signal
if results and self._signal_sufficient(results[-1]):
logger.debug("Honcho dialectic depth %d: pass %d skipped, prior signal sufficient",
self._dialectic_depth, i)
break
prompt = self._build_dialectic_prompt(i, results, is_cold)
level = self._resolve_pass_level(i)
logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
self._dialectic_depth, i, level, is_cold)
result = self._manager.dialectic_query(
self._session_key, prompt,
reasoning_level=level,
peer="user",
)
results.append(result or "")
# Return the last non-empty result (deepest pass that ran)
for r in reversed(results):
if r and r.strip():
return r
return ""
def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
"""Track turn count for cadence and injection_frequency logic."""
@ -659,7 +946,14 @@ class HonchoMemoryProvider(MemoryProvider):
try:
if tool_name == "honcho_profile":
card = self._manager.get_peer_card(self._session_key)
peer = args.get("peer", "user")
card_update = args.get("card")
if card_update:
result = self._manager.set_peer_card(self._session_key, card_update, peer=peer)
if result is None:
return tool_error("Failed to update peer card.")
return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
card = self._manager.get_peer_card(self._session_key, peer=peer)
if not card:
return json.dumps({"result": "No profile facts available yet."})
return json.dumps({"result": card})
@ -669,30 +963,64 @@ class HonchoMemoryProvider(MemoryProvider):
if not query:
return tool_error("Missing required parameter: query")
max_tokens = min(int(args.get("max_tokens", 800)), 2000)
peer = args.get("peer", "user")
result = self._manager.search_context(
self._session_key, query, max_tokens=max_tokens
self._session_key, query, max_tokens=max_tokens, peer=peer
)
if not result:
return json.dumps({"result": "No relevant context found."})
return json.dumps({"result": result})
elif tool_name == "honcho_context":
elif tool_name == "honcho_reasoning":
query = args.get("query", "")
if not query:
return tool_error("Missing required parameter: query")
peer = args.get("peer", "user")
reasoning_level = args.get("reasoning_level")
result = self._manager.dialectic_query(
self._session_key, query, peer=peer
self._session_key, query,
reasoning_level=reasoning_level,
peer=peer,
)
# Update cadence tracker so auto-injection respects the gap after an explicit call
self._last_dialectic_turn = self._turn_count
return json.dumps({"result": result or "No result from Honcho."})
elif tool_name == "honcho_context":
peer = args.get("peer", "user")
ctx = self._manager.get_session_context(self._session_key, peer=peer)
if not ctx:
return json.dumps({"result": "No context available yet."})
parts = []
if ctx.get("summary"):
parts.append(f"## Summary\n{ctx['summary']}")
if ctx.get("representation"):
parts.append(f"## Representation\n{ctx['representation']}")
if ctx.get("card"):
parts.append(f"## Card\n{ctx['card']}")
if ctx.get("recent_messages"):
msgs = ctx["recent_messages"]
msg_str = "\n".join(
f" [{m['role']}] {m['content'][:200]}"
for m in msgs[-5:] # last 5 for brevity
)
parts.append(f"## Recent messages\n{msg_str}")
return json.dumps({"result": "\n\n".join(parts) or "No context available."})
elif tool_name == "honcho_conclude":
delete_id = args.get("delete_id")
peer = args.get("peer", "user")
if delete_id:
ok = self._manager.delete_conclusion(self._session_key, delete_id, peer=peer)
if ok:
return json.dumps({"result": f"Conclusion {delete_id} deleted."})
return tool_error(f"Failed to delete conclusion {delete_id}.")
conclusion = args.get("conclusion", "")
if not conclusion:
return tool_error("Missing required parameter: conclusion")
ok = self._manager.create_conclusion(self._session_key, conclusion)
return tool_error("Missing required parameter: conclusion or delete_id")
ok = self._manager.create_conclusion(self._session_key, conclusion, peer=peer)
if ok:
return json.dumps({"result": f"Conclusion saved: {conclusion}"})
return json.dumps({"result": f"Conclusion saved for {peer}: {conclusion}"})
return tool_error("Failed to save conclusion.")
return tool_error(f"Unknown tool: {tool_name}")

View file

@ -440,11 +440,43 @@ def cmd_setup(args) -> None:
if new_recall in ("hybrid", "context", "tools"):
hermes_host["recallMode"] = new_recall
# --- 7. Session strategy ---
current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-directory")
# --- 7. Context token budget ---
current_ctx_tokens = hermes_host.get("contextTokens") or cfg.get("contextTokens")
current_display = str(current_ctx_tokens) if current_ctx_tokens else "uncapped"
print("\n Context injection per turn (hybrid/context recall modes only):")
print(" uncapped -- no limit (default)")
print(" N -- token limit per turn (e.g. 1200)")
new_ctx_tokens = _prompt("Context tokens", default=current_display)
if new_ctx_tokens.strip().lower() in ("none", "uncapped", "no limit"):
hermes_host.pop("contextTokens", None)
elif new_ctx_tokens.strip() == "":
pass # keep current
else:
try:
val = int(new_ctx_tokens)
if val >= 0:
hermes_host["contextTokens"] = val
except (ValueError, TypeError):
pass # keep current
# --- 7b. Dialectic cadence ---
current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
print("\n Dialectic cadence:")
print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).")
print(" 1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
try:
val = int(new_dialectic)
if val >= 1:
hermes_host["dialecticCadence"] = val
except (ValueError, TypeError):
hermes_host["dialecticCadence"] = 3
# --- 8. Session strategy ---
current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
print("\n Session strategy:")
print(" per-directory -- one session per working directory (default)")
print(" per-session -- new Honcho session each run")
print(" per-session -- each run starts clean, Honcho injects context automatically")
print(" per-directory -- reuses session per dir, prior context auto-injected each run")
print(" per-repo -- one session per git repository")
print(" global -- single session across all directories")
new_strat = _prompt("Session strategy", default=current_strat)
@ -490,10 +522,11 @@ def cmd_setup(args) -> None:
print(f" Recall: {hcfg.recall_mode}")
print(f" Sessions: {hcfg.session_strategy}")
print("\n Honcho tools available in chat:")
print(" honcho_context -- ask Honcho about the user (LLM-synthesized)")
print(" honcho_search -- semantic search over history (no LLM)")
print(" honcho_profile -- peer card, key facts (no LLM)")
print(" honcho_conclude -- persist a user fact to memory (no LLM)")
print(" honcho_context -- session context: summary, representation, card, messages")
print(" honcho_search -- semantic search over history")
print(" honcho_profile -- peer card, key facts")
print(" honcho_reasoning -- ask Honcho a question, synthesized answer")
print(" honcho_conclude -- persist a user fact to memory")
print("\n Other commands:")
print(" hermes honcho status -- show full config")
print(" hermes honcho mode -- change recall/observation mode")
@ -585,13 +618,26 @@ def cmd_status(args) -> None:
print(f" Enabled: {hcfg.enabled}")
print(f" API key: {masked}")
print(f" Workspace: {hcfg.workspace_id}")
print(f" Config path: {active_path}")
# Config paths — show where config was read from and where writes go
global_path = Path.home() / ".honcho" / "config.json"
print(f" Config: {active_path}")
if write_path != active_path:
print(f" Write path: {write_path} (instance-local)")
print(f" Write to: {write_path} (profile-local)")
if active_path == global_path:
print(f" Fallback: (none — using global ~/.honcho/config.json)")
elif global_path.exists():
print(f" Fallback: {global_path} (exists, cross-app interop)")
print(f" AI peer: {hcfg.ai_peer}")
print(f" User peer: {hcfg.peer_name or 'not set'}")
print(f" Session key: {hcfg.resolve_session_name()}")
print(f" Session strat: {hcfg.session_strategy}")
print(f" Recall mode: {hcfg.recall_mode}")
print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
raw = getattr(hcfg, "raw", None) or {}
dialectic_cadence = raw.get("dialecticCadence") or 3
print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
print(f" Write freq: {hcfg.write_frequency}")
@ -599,8 +645,8 @@ def cmd_status(args) -> None:
print("\n Connection... ", end="", flush=True)
try:
client = get_honcho_client(hcfg)
print("OK")
_show_peer_cards(hcfg, client)
print("OK")
except Exception as e:
print(f"FAILED ({e})\n")
else:
@ -824,6 +870,41 @@ def cmd_mode(args) -> None:
print(f" {label}Recall mode -> {mode_arg} ({MODES[mode_arg]})\n")
def cmd_strategy(args) -> None:
"""Show or set the session strategy."""
STRATEGIES = {
"per-session": "each run starts clean, Honcho injects context automatically",
"per-directory": "reuses session per dir, prior context auto-injected each run",
"per-repo": "one session per git repository",
"global": "single session across all directories",
}
cfg = _read_config()
strat_arg = getattr(args, "strategy", None)
if strat_arg is None:
current = (
(cfg.get("hosts") or {}).get(_host_key(), {}).get("sessionStrategy")
or cfg.get("sessionStrategy")
or "per-session"
)
print("\nHoncho session strategy\n" + "" * 40)
for s, desc in STRATEGIES.items():
marker = " <-" if s == current else ""
print(f" {s:<15} {desc}{marker}")
print(f"\n Set with: hermes honcho strategy [per-session|per-directory|per-repo|global]\n")
return
if strat_arg not in STRATEGIES:
print(f" Invalid strategy '{strat_arg}'. Options: {', '.join(STRATEGIES)}\n")
return
host = _host_key()
label = f"[{host}] " if host != "hermes" else ""
cfg.setdefault("hosts", {}).setdefault(host, {})["sessionStrategy"] = strat_arg
_write_config(cfg)
print(f" {label}Session strategy -> {strat_arg} ({STRATEGIES[strat_arg]})\n")
def cmd_tokens(args) -> None:
"""Show or set token budget settings."""
cfg = _read_config()
@ -1143,10 +1224,11 @@ def cmd_migrate(args) -> None:
print(" automatically. Files become the seed, not the live store.")
print()
print(" Honcho tools (available to the agent during conversation)")
print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)")
print(" honcho_search — semantic search over stored context (no LLM)")
print(" honcho_profile — fast peer card snapshot (no LLM)")
print(" honcho_conclude — write a conclusion/fact back to memory (no LLM)")
print(" honcho_context — session context: summary, representation, card, messages")
print(" honcho_search — semantic search over stored context")
print(" honcho_profile — fast peer card snapshot")
print(" honcho_reasoning — ask Honcho a question, synthesized answer")
print(" honcho_conclude — write a conclusion/fact back to memory")
print()
print(" Session naming")
print(" OpenClaw: no persistent session concept — files are global.")
@ -1197,6 +1279,8 @@ def honcho_command(args) -> None:
cmd_peer(args)
elif sub == "mode":
cmd_mode(args)
elif sub == "strategy":
cmd_strategy(args)
elif sub == "tokens":
cmd_tokens(args)
elif sub == "identity":
@ -1211,7 +1295,7 @@ def honcho_command(args) -> None:
cmd_sync(args)
else:
print(f" Unknown honcho command: {sub}")
print(" Available: status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n")
print(" Available: status, sessions, map, peer, mode, strategy, tokens, identity, migrate, enable, disable, sync\n")
def register_cli(subparser) -> None:
@ -1270,6 +1354,15 @@ def register_cli(subparser) -> None:
help="Recall mode to set (hybrid/context/tools). Omit to show current.",
)
strategy_parser = subs.add_parser(
"strategy", help="Show or set session strategy (per-session/per-directory/per-repo/global)",
)
strategy_parser.add_argument(
"strategy", nargs="?", metavar="STRATEGY",
choices=("per-session", "per-directory", "per-repo", "global"),
help="Session strategy to set. Omit to show current.",
)
tokens_parser = subs.add_parser(
"tokens", help="Show or set token budget for context and dialectic",
)

View file

@ -58,7 +58,8 @@ def resolve_config_path() -> Path:
Resolution order:
1. $HERMES_HOME/honcho.json (profile-local, if it exists)
2. ~/.honcho/config.json (global, cross-app interop)
2. ~/.hermes/honcho.json (default profile shared host blocks live here)
3. ~/.honcho/config.json (global, cross-app interop)
Returns the global path if none exist (for first-time setup writes).
"""
@ -66,6 +67,11 @@ def resolve_config_path() -> Path:
if local_path.exists():
return local_path
# Default profile's config — host blocks accumulate here via setup/clone
default_path = Path.home() / ".hermes" / "honcho.json"
if default_path != local_path and default_path.exists():
return default_path
return GLOBAL_CONFIG_PATH
@ -88,6 +94,68 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool:
return default
def _parse_context_tokens(host_val, root_val) -> int | None:
"""Parse contextTokens: host wins, then root, then None (uncapped)."""
for val in (host_val, root_val):
if val is not None:
try:
return int(val)
except (ValueError, TypeError):
pass
return None
def _parse_dialectic_depth(host_val, root_val) -> int:
"""Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3."""
for val in (host_val, root_val):
if val is not None:
try:
return max(1, min(int(val), 3))
except (ValueError, TypeError):
pass
return 1
_VALID_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] | None:
"""Parse dialecticDepthLevels: optional array of reasoning levels per pass.
Returns None when not configured (use proportional defaults).
When configured, validates each level and truncates/pads to match depth.
"""
for val in (host_val, root_val):
if val is not None and isinstance(val, list):
levels = [
lvl if lvl in _VALID_REASONING_LEVELS else "low"
for lvl in val[:depth]
]
# Pad with "low" if array is shorter than depth
while len(levels) < depth:
levels.append("low")
return levels
return None
def _resolve_optional_float(*values: Any) -> float | None:
"""Return the first non-empty value coerced to a positive float."""
for value in values:
if value is None:
continue
if isinstance(value, str):
value = value.strip()
if not value:
continue
try:
parsed = float(value)
except (TypeError, ValueError):
continue
if parsed > 0:
return parsed
return None
_VALID_OBSERVATION_MODES = {"unified", "directional"}
_OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}
@ -153,6 +221,8 @@ class HonchoClientConfig:
environment: str = "production"
# Optional base URL for self-hosted Honcho (overrides environment mapping)
base_url: str | None = None
# Optional request timeout in seconds for Honcho SDK HTTP calls
timeout: float | None = None
# Identity
peer_name: str | None = None
ai_peer: str = "hermes"
@ -162,17 +232,25 @@ class HonchoClientConfig:
# Write frequency: "async" (background thread), "turn" (sync per turn),
# "session" (flush on session end), or int (every N turns)
write_frequency: str | int = "async"
# Prefetch budget
# Prefetch budget (None = no cap; set to an integer to bound auto-injected context)
context_tokens: int | None = None
# Dialectic (peer.chat) settings
# reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
dialectic_reasoning_level: str = "low"
# dynamic: auto-bump reasoning level based on query length
# true — low->medium (120+ chars), low->high (400+ chars), capped at "high"
# false — always use dialecticReasoningLevel as-is
# When true, the model can override reasoning_level per-call via the
# honcho_reasoning tool param (agentic). When false, always uses
# dialecticReasoningLevel and ignores model-provided overrides.
dialectic_dynamic: bool = True
# Max chars of dialectic result to inject into Hermes system prompt
dialectic_max_chars: int = 600
# Dialectic depth: how many .chat() calls per dialectic cycle (1-3).
# Depth 1: single call. Depth 2: self-audit + targeted synthesis.
# Depth 3: self-audit + synthesis + reconciliation.
dialectic_depth: int = 1
# Optional per-pass reasoning level override. Array of reasoning levels
# matching dialectic_depth length. When None, uses proportional defaults
# derived from dialectic_reasoning_level.
dialectic_depth_levels: list[str] | None = None
# Honcho API limits — configurable for self-hosted instances
# Max chars per message sent via add_messages() (Honcho cloud: 25000)
message_max_chars: int = 25000
@ -183,10 +261,8 @@ class HonchoClientConfig:
# "context" — auto-injected context only, Honcho tools removed
# "tools" — Honcho tools only, no auto-injected context
recall_mode: str = "hybrid"
# When True and recallMode is "tools", create the Honcho session eagerly
# during initialize() instead of deferring to the first tool call.
# This ensures sync_turn() can write from the very first turn.
# Does NOT enable automatic context injection — only changes init timing.
# Eager init in tools mode — when true, initializes session during
# initialize() instead of deferring to first tool call
init_on_session_start: bool = False
# Observation mode: legacy string shorthand ("directional" or "unified").
# Kept for backward compat; granular per-peer booleans below are preferred.
@ -218,12 +294,14 @@ class HonchoClientConfig:
resolved_host = host or resolve_active_host()
api_key = os.environ.get("HONCHO_API_KEY")
base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT"))
return cls(
host=resolved_host,
workspace_id=workspace_id,
api_key=api_key,
environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
base_url=base_url,
timeout=timeout,
ai_peer=resolved_host,
enabled=bool(api_key or base_url),
)
@ -284,6 +362,11 @@ class HonchoClientConfig:
or os.environ.get("HONCHO_BASE_URL", "").strip()
or None
)
timeout = _resolve_optional_float(
raw.get("timeout"),
raw.get("requestTimeout"),
os.environ.get("HONCHO_TIMEOUT"),
)
# Auto-enable when API key or base_url is present (unless explicitly disabled)
# Host-level enabled wins, then root-level, then auto-enable if key/url exists.
@ -329,12 +412,16 @@ class HonchoClientConfig:
api_key=api_key,
environment=environment,
base_url=base_url,
timeout=timeout,
peer_name=host_block.get("peerName") or raw.get("peerName"),
ai_peer=ai_peer,
enabled=enabled,
save_messages=save_messages,
write_frequency=write_frequency,
context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
context_tokens=_parse_context_tokens(
host_block.get("contextTokens"),
raw.get("contextTokens"),
),
dialectic_reasoning_level=(
host_block.get("dialecticReasoningLevel")
or raw.get("dialecticReasoningLevel")
@ -350,6 +437,15 @@ class HonchoClientConfig:
or raw.get("dialecticMaxChars")
or 600
),
dialectic_depth=_parse_dialectic_depth(
host_block.get("dialecticDepth"),
raw.get("dialecticDepth"),
),
dialectic_depth_levels=_parse_dialectic_depth_levels(
host_block.get("dialecticDepthLevels"),
raw.get("dialecticDepthLevels"),
depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
),
message_max_chars=int(
host_block.get("messageMaxChars")
or raw.get("messageMaxChars")
@ -416,16 +512,18 @@ class HonchoClientConfig:
cwd: str | None = None,
session_title: str | None = None,
session_id: str | None = None,
gateway_session_key: str | None = None,
) -> str | None:
"""Resolve Honcho session name.
Resolution order:
1. Manual directory override from sessions map
2. Hermes session title (from /title command)
3. per-session strategy Hermes session_id ({timestamp}_{hex})
4. per-repo strategy git repo root directory name
5. per-directory strategy directory basename
6. global strategy workspace name
3. Gateway session key (stable per-chat identifier from gateway platforms)
4. per-session strategy Hermes session_id ({timestamp}_{hex})
5. per-repo strategy git repo root directory name
6. per-directory strategy directory basename
7. global strategy workspace name
"""
import re
@ -439,12 +537,22 @@ class HonchoClientConfig:
# /title mid-session remap
if session_title:
sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-')
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', session_title).strip('-')
if sanitized:
if self.session_peer_prefix and self.peer_name:
return f"{self.peer_name}-{sanitized}"
return sanitized
# Gateway session key: stable per-chat identifier passed by the gateway
# (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens
# for Honcho session ID compatibility. This takes priority over strategy-
# based resolution because gateway platforms need per-chat isolation that
# cwd-based strategies cannot provide.
if gateway_session_key:
sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
if sanitized:
return sanitized
# per-session: inherit Hermes session_id (new Honcho session each run)
if self.session_strategy == "per-session" and session_id:
if self.session_peer_prefix and self.peer_name:
@ -506,13 +614,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
# mapping, enabling remote self-hosted Honcho deployments without
# requiring the server to live on localhost.
resolved_base_url = config.base_url
if not resolved_base_url:
resolved_timeout = config.timeout
if not resolved_base_url or resolved_timeout is None:
try:
from hermes_cli.config import load_config
hermes_cfg = load_config()
honcho_cfg = hermes_cfg.get("honcho", {})
if isinstance(honcho_cfg, dict):
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
if not resolved_base_url:
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
if resolved_timeout is None:
resolved_timeout = _resolve_optional_float(
honcho_cfg.get("timeout"),
honcho_cfg.get("request_timeout"),
)
except Exception:
pass
@ -547,6 +662,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
}
if resolved_base_url:
kwargs["base_url"] = resolved_base_url
if resolved_timeout is not None:
kwargs["timeout"] = resolved_timeout
_honcho_client = Honcho(**kwargs)

View file

@ -486,36 +486,9 @@ class HonchoSessionManager:
_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
def _dynamic_reasoning_level(self, query: str) -> str:
"""
Pick a reasoning level for a dialectic query.
When dialecticDynamic is true (default), auto-bumps based on query
length so Honcho applies more inference where it matters:
< 120 chars -> configured default (typically "low")
120-400 chars -> +1 level above default (cap at "high")
> 400 chars -> +2 levels above default (cap at "high")
"max" is never selected automatically -- reserve it for explicit config.
When dialecticDynamic is false, always returns the configured level.
"""
if not self._dialectic_dynamic:
return self._dialectic_reasoning_level
levels = self._REASONING_LEVELS
default_idx = levels.index(self._dialectic_reasoning_level) if self._dialectic_reasoning_level in levels else 1
n = len(query)
if n < 120:
bump = 0
elif n < 400:
bump = 1
else:
bump = 2
# Cap at "high" (index 3) for auto-selection
idx = min(default_idx + bump, 3)
return levels[idx]
def _default_reasoning_level(self) -> str:
"""Return the configured default reasoning level."""
return self._dialectic_reasoning_level
def dialectic_query(
self, session_key: str, query: str,
@ -532,8 +505,9 @@ class HonchoSessionManager:
Args:
session_key: The session key to query against.
query: Natural language question.
reasoning_level: Override the config default. If None, uses
_dynamic_reasoning_level(query).
reasoning_level: Override the configured default (dialecticReasoningLevel).
Only honored when dialecticDynamic is true.
If None or dialecticDynamic is false, uses the configured default.
peer: Which peer to query "user" (default) or "ai".
Returns:
@ -543,29 +517,34 @@ class HonchoSessionManager:
if not session:
return ""
target_peer_id = self._resolve_peer_id(session, peer)
if target_peer_id is None:
return ""
# Guard: truncate query to Honcho's dialectic input limit
if len(query) > self._dialectic_max_input_chars:
query = query[:self._dialectic_max_input_chars].rsplit(" ", 1)[0]
level = reasoning_level or self._dynamic_reasoning_level(query)
if self._dialectic_dynamic and reasoning_level:
level = reasoning_level
else:
level = self._default_reasoning_level()
try:
if self._ai_observe_others:
# AI peer can observe user — use cross-observation routing
if peer == "ai":
ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
# AI peer can observe other peers — use assistant as observer.
ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
if target_peer_id == session.assistant_peer_id:
result = ai_peer_obj.chat(query, reasoning_level=level) or ""
else:
ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
result = ai_peer_obj.chat(
query,
target=session.user_peer_id,
target=target_peer_id,
reasoning_level=level,
) or ""
else:
# AI can't observe others — each peer queries self
peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
target_peer = self._get_or_create_peer(peer_id)
# Without cross-observation, each peer queries its own context.
target_peer = self._get_or_create_peer(target_peer_id)
result = target_peer.chat(query, reasoning_level=level) or ""
# Apply Hermes-side char cap before caching
@ -647,10 +626,11 @@ class HonchoSessionManager:
"""
Pre-fetch user and AI peer context from Honcho.
Fetches peer_representation and peer_card for both peers. search_query
is intentionally omitted it would only affect additional excerpts
that this code does not consume, and passing the raw message exposes
conversation content in server access logs.
Fetches peer_representation and peer_card for both peers, plus the
session summary when available. search_query is intentionally omitted
it would only affect additional excerpts that this code does not
consume, and passing the raw message exposes conversation content in
server access logs.
Args:
session_key: The session key to get context for.
@ -658,15 +638,29 @@ class HonchoSessionManager:
Returns:
Dictionary with 'representation', 'card', 'ai_representation',
and 'ai_card' keys.
'ai_card', and optionally 'summary' keys.
"""
session = self._cache.get(session_key)
if not session:
return {}
result: dict[str, str] = {}
# Session summary — provides session-scoped context.
# Fresh sessions (per-session cold start, or first-ever per-directory)
# return null summary — the guard below handles that gracefully.
# Per-directory returning sessions get their accumulated summary.
try:
user_ctx = self._fetch_peer_context(session.user_peer_id)
honcho_session = self._sessions_cache.get(session.honcho_session_id)
if honcho_session:
ctx = honcho_session.context(summary=True)
if ctx.summary and getattr(ctx.summary, "content", None):
result["summary"] = ctx.summary.content
except Exception as e:
logger.debug("Failed to fetch session summary from Honcho: %s", e)
try:
user_ctx = self._fetch_peer_context(session.user_peer_id, target=session.user_peer_id)
result["representation"] = user_ctx["representation"]
result["card"] = "\n".join(user_ctx["card"])
except Exception as e:
@ -674,7 +668,7 @@ class HonchoSessionManager:
# Also fetch AI peer's own representation so Hermes knows itself.
try:
ai_ctx = self._fetch_peer_context(session.assistant_peer_id)
ai_ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id)
result["ai_representation"] = ai_ctx["representation"]
result["ai_card"] = "\n".join(ai_ctx["card"])
except Exception as e:
@ -862,7 +856,7 @@ class HonchoSessionManager:
return [str(item) for item in card if item]
return [str(card)]
def _fetch_peer_card(self, peer_id: str) -> list[str]:
def _fetch_peer_card(self, peer_id: str, *, target: str | None = None) -> list[str]:
"""Fetch a peer card directly from the peer object.
This avoids relying on session.context(), which can return an empty
@ -872,22 +866,33 @@ class HonchoSessionManager:
peer = self._get_or_create_peer(peer_id)
getter = getattr(peer, "get_card", None)
if callable(getter):
return self._normalize_card(getter())
return self._normalize_card(getter(target=target) if target is not None else getter())
legacy_getter = getattr(peer, "card", None)
if callable(legacy_getter):
return self._normalize_card(legacy_getter())
return self._normalize_card(legacy_getter(target=target) if target is not None else legacy_getter())
return []
def _fetch_peer_context(self, peer_id: str, search_query: str | None = None) -> dict[str, Any]:
def _fetch_peer_context(
self,
peer_id: str,
search_query: str | None = None,
*,
target: str | None = None,
) -> dict[str, Any]:
"""Fetch representation + peer card directly from a peer object."""
peer = self._get_or_create_peer(peer_id)
representation = ""
card: list[str] = []
try:
ctx = peer.context(search_query=search_query) if search_query else peer.context()
context_kwargs: dict[str, Any] = {}
if target is not None:
context_kwargs["target"] = target
if search_query is not None:
context_kwargs["search_query"] = search_query
ctx = peer.context(**context_kwargs) if context_kwargs else peer.context()
representation = (
getattr(ctx, "representation", None)
or getattr(ctx, "peer_representation", None)
@ -899,24 +904,111 @@ class HonchoSessionManager:
if not representation:
try:
representation = peer.representation() or ""
representation = (
peer.representation(target=target) if target is not None else peer.representation()
) or ""
except Exception as e:
logger.debug("Direct peer.representation() failed for '%s': %s", peer_id, e)
if not card:
try:
card = self._fetch_peer_card(peer_id)
card = self._fetch_peer_card(peer_id, target=target)
except Exception as e:
logger.debug("Direct peer card fetch failed for '%s': %s", peer_id, e)
return {"representation": representation, "card": card}
def get_peer_card(self, session_key: str) -> list[str]:
def get_session_context(self, session_key: str, peer: str = "user") -> dict[str, Any]:
"""Fetch full session context from Honcho including summary.
Uses the session-level context() API which returns summary,
peer_representation, peer_card, and messages.
"""
Fetch the user peer's card — a curated list of key facts.
session = self._cache.get(session_key)
if not session:
return {}
honcho_session = self._sessions_cache.get(session.honcho_session_id)
if not honcho_session:
# Fall back to peer-level context, respecting the requested peer
peer_id = self._resolve_peer_id(session, peer)
if peer_id is None:
peer_id = session.user_peer_id
return self._fetch_peer_context(peer_id, target=peer_id)
try:
peer_id = self._resolve_peer_id(session, peer)
ctx = honcho_session.context(
summary=True,
peer_target=peer_id,
peer_perspective=session.user_peer_id if peer == "user" else session.assistant_peer_id,
)
result: dict[str, Any] = {}
# Summary
if ctx.summary:
result["summary"] = ctx.summary.content
# Peer representation and card
if ctx.peer_representation:
result["representation"] = ctx.peer_representation
if ctx.peer_card:
result["card"] = "\n".join(ctx.peer_card)
# Messages (last N for context)
if ctx.messages:
recent = ctx.messages[-10:] # last 10 messages
result["recent_messages"] = [
{"role": getattr(m, "peer_id", "unknown"), "content": (m.content or "")[:500]}
for m in recent
]
return result
except Exception as e:
logger.debug("Session context fetch failed: %s", e)
return {}
def _resolve_peer_id(self, session: HonchoSession, peer: str | None) -> str:
"""Resolve a peer alias or explicit peer ID to a concrete Honcho peer ID.
Always returns a non-empty string: either a known peer ID or a
sanitized version of the caller-supplied alias/ID.
"""
candidate = (peer or "user").strip()
if not candidate:
return session.user_peer_id
normalized = self._sanitize_id(candidate)
if normalized == self._sanitize_id("user"):
return session.user_peer_id
if normalized == self._sanitize_id("ai"):
return session.assistant_peer_id
return normalized
def _resolve_observer_target(
self,
session: HonchoSession,
peer: str | None,
) -> tuple[str, str | None]:
"""Resolve observer and target peer IDs for context/search/profile queries."""
target_peer_id = self._resolve_peer_id(session, peer)
if target_peer_id == session.assistant_peer_id:
return session.assistant_peer_id, session.assistant_peer_id
if self._ai_observe_others:
return session.assistant_peer_id, target_peer_id
return target_peer_id, None
def get_peer_card(self, session_key: str, peer: str = "user") -> list[str]:
"""
Fetch a peer card a curated list of key facts.
Fast, no LLM reasoning. Returns raw structured facts Honcho has
inferred about the user (name, role, preferences, patterns).
inferred about the target peer (name, role, preferences, patterns).
Empty list if unavailable.
"""
session = self._cache.get(session_key)
@ -924,12 +1016,19 @@ class HonchoSessionManager:
return []
try:
return self._fetch_peer_card(session.user_peer_id)
observer_peer_id, target_peer_id = self._resolve_observer_target(session, peer)
return self._fetch_peer_card(observer_peer_id, target=target_peer_id)
except Exception as e:
logger.debug("Failed to fetch peer card from Honcho: %s", e)
return []
def search_context(self, session_key: str, query: str, max_tokens: int = 800) -> str:
def search_context(
self,
session_key: str,
query: str,
max_tokens: int = 800,
peer: str = "user",
) -> str:
"""
Semantic search over Honcho session context.
@ -941,6 +1040,7 @@ class HonchoSessionManager:
session_key: Session to search against.
query: Search query for semantic matching.
max_tokens: Token budget for returned content.
peer: Peer alias or explicit peer ID to search about.
Returns:
Relevant context excerpts as a string, or empty string if none.
@ -950,7 +1050,13 @@ class HonchoSessionManager:
return ""
try:
ctx = self._fetch_peer_context(session.user_peer_id, search_query=query)
observer_peer_id, target = self._resolve_observer_target(session, peer)
ctx = self._fetch_peer_context(
observer_peer_id,
search_query=query,
target=target,
)
parts = []
if ctx["representation"]:
parts.append(ctx["representation"])
@ -962,16 +1068,17 @@ class HonchoSessionManager:
logger.debug("Honcho search_context failed: %s", e)
return ""
def create_conclusion(self, session_key: str, content: str) -> bool:
"""Write a conclusion about the user back to Honcho.
def create_conclusion(self, session_key: str, content: str, peer: str = "user") -> bool:
"""Write a conclusion about a target peer back to Honcho.
Conclusions are facts the AI peer observes about the user
preferences, corrections, clarifications, project context.
They feed into the user's peer card and representation.
Conclusions are facts a peer observes about another peer or itself
preferences, corrections, clarifications, and project context.
They feed into the target peer's card and representation.
Args:
session_key: Session to associate the conclusion with.
content: The conclusion text (e.g. "User prefers dark mode").
content: The conclusion text.
peer: Peer alias or explicit peer ID. "user" is the default alias.
Returns:
True on success, False on failure.
@ -985,25 +1092,90 @@ class HonchoSessionManager:
return False
try:
if self._ai_observe_others:
# AI peer creates conclusion about user (cross-observation)
target_peer_id = self._resolve_peer_id(session, peer)
if target_peer_id is None:
logger.warning("Could not resolve conclusion peer '%s' for session '%s'", peer, session_key)
return False
if target_peer_id == session.assistant_peer_id:
assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
conclusions_scope = assistant_peer.conclusions_of(session.assistant_peer_id)
elif self._ai_observe_others:
assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
conclusions_scope = assistant_peer.conclusions_of(target_peer_id)
else:
# AI can't observe others — user peer creates self-conclusion
user_peer = self._get_or_create_peer(session.user_peer_id)
conclusions_scope = user_peer.conclusions_of(session.user_peer_id)
target_peer = self._get_or_create_peer(target_peer_id)
conclusions_scope = target_peer.conclusions_of(target_peer_id)
conclusions_scope.create([{
"content": content.strip(),
"session_id": session.honcho_session_id,
}])
logger.info("Created conclusion for %s: %s", session_key, content[:80])
logger.info("Created conclusion about %s for %s: %s", target_peer_id, session_key, content[:80])
return True
except Exception as e:
logger.error("Failed to create conclusion: %s", e)
return False
def delete_conclusion(self, session_key: str, conclusion_id: str, peer: str = "user") -> bool:
"""Delete a conclusion by ID. Use only for PII removal.
Args:
session_key: Session key for peer resolution.
conclusion_id: The conclusion ID to delete.
peer: Peer alias or explicit peer ID.
Returns:
True on success, False on failure.
"""
session = self._cache.get(session_key)
if not session:
return False
try:
target_peer_id = self._resolve_peer_id(session, peer)
if target_peer_id == session.assistant_peer_id:
observer = self._get_or_create_peer(session.assistant_peer_id)
scope = observer.conclusions_of(session.assistant_peer_id)
elif self._ai_observe_others:
observer = self._get_or_create_peer(session.assistant_peer_id)
scope = observer.conclusions_of(target_peer_id)
else:
target_peer = self._get_or_create_peer(target_peer_id)
scope = target_peer.conclusions_of(target_peer_id)
scope.delete(conclusion_id)
logger.info("Deleted conclusion %s for %s", conclusion_id, session_key)
return True
except Exception as e:
logger.error("Failed to delete conclusion %s: %s", conclusion_id, e)
return False
def set_peer_card(self, session_key: str, card: list[str], peer: str = "user") -> list[str] | None:
"""Update a peer's card.
Args:
session_key: Session key for peer resolution.
card: New peer card as list of fact strings.
peer: Peer alias or explicit peer ID.
Returns:
Updated card on success, None on failure.
"""
session = self._cache.get(session_key)
if not session:
return None
try:
peer_id = self._resolve_peer_id(session, peer)
if peer_id is None:
logger.warning("Could not resolve peer '%s' for set_peer_card in session '%s'", peer, session_key)
return None
peer_obj = self._get_or_create_peer(peer_id)
result = peer_obj.set_card(card)
logger.info("Updated peer card for %s (%d facts)", peer_id, len(card))
return result
except Exception as e:
logger.error("Failed to set peer card: %s", e)
return None
def seed_ai_identity(self, session_key: str, content: str, source: str = "manual") -> bool:
"""
Seed the AI peer's Honcho representation from text content.
@ -1061,7 +1233,7 @@ class HonchoSessionManager:
return {"representation": "", "card": ""}
try:
ctx = self._fetch_peer_context(session.assistant_peer_id)
ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id)
return {
"representation": ctx["representation"] or "",
"card": "\n".join(ctx["card"]),

View file

@ -68,6 +68,7 @@ termux = [
# Tested Android / Termux path: keeps the core CLI feature-rich while
# avoiding extras that currently depend on non-Android wheels (notably
# faster-whisper -> ctranslate2 via the voice extra).
"python-telegram-bot[webhooks]>=22.6,<23",
"hermes-agent[cron]",
"hermes-agent[cli]",
"hermes-agent[pty]",

View file

@ -75,7 +75,7 @@ from tools.browser_tool import cleanup_browser
from hermes_constants import OPENROUTER_BASE_URL
# Agent internals extracted to agent/ package for modularity
from agent.memory_manager import build_memory_context_block
from agent.memory_manager import build_memory_context_block, sanitize_context
from agent.retry_utils import jittered_backoff
from agent.error_classifier import classify_api_error, FailoverReason
from agent.prompt_builder import (
@ -602,6 +602,7 @@ class AIAgent:
prefill_messages: List[Dict[str, Any]] = None,
platform: str = None,
user_id: str = None,
gateway_session_key: str = None,
skip_context_files: bool = False,
skip_memory: bool = False,
session_db=None,
@ -667,6 +668,7 @@ class AIAgent:
self.ephemeral_system_prompt = ephemeral_system_prompt
self.platform = platform # "cli", "telegram", "discord", "whatsapp", etc.
self._user_id = user_id # Platform user identifier (gateway sessions)
self._gateway_session_key = gateway_session_key # Stable per-chat key (e.g. agent:main:telegram:dm:123)
# Pluggable print function — CLI replaces this with _cprint so that
# raw ANSI status lines are routed through prompt_toolkit's renderer
# instead of going directly to stdout where patch_stdout's StdoutProxy
@ -689,9 +691,14 @@ class AIAgent:
self.api_mode = api_mode
elif self.provider == "openai-codex":
self.api_mode = "codex_responses"
elif self.provider == "xai":
self.api_mode = "codex_responses"
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
self.api_mode = "codex_responses"
self.provider = "openai-codex"
elif (provider_name is None) and "api.x.ai" in self._base_url_lower:
self.api_mode = "codex_responses"
self.provider = "xai"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
self.api_mode = "anthropic_messages"
self.provider = "anthropic"
@ -1019,16 +1026,12 @@ class AIAgent:
f"was found. Set the {_env_hint} environment "
f"variable, or switch to a different provider with `hermes model`."
)
# Final fallback: try raw OpenRouter key
client_kwargs = {
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
"base_url": OPENROUTER_BASE_URL,
"default_headers": {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
}
# No provider configured — reject with a clear message.
raise RuntimeError(
"No LLM provider configured. Run `hermes model` to "
"select a provider, or run `hermes setup` for first-time "
"configuration."
)
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
@ -1292,6 +1295,9 @@ class AIAgent:
# Thread gateway user identity for per-user memory scoping
if self._user_id:
_init_kwargs["user_id"] = self._user_id
# Thread gateway session key for stable per-chat Honcho session isolation
if self._gateway_session_key:
_init_kwargs["gateway_session_key"] = self._gateway_session_key
# Profile identity for per-profile provider scoping
try:
from hermes_cli.profiles import get_active_profile_name
@ -2102,6 +2108,59 @@ class AIAgent:
content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
return content
@staticmethod
def _has_natural_response_ending(content: str) -> bool:
"""Heuristic: does visible assistant text look intentionally finished?"""
if not content:
return False
stripped = content.rstrip()
if not stripped:
return False
if stripped.endswith("```"):
return True
return stripped[-1] in '.!?:)"\']}。!?:)】」』》'
def _is_ollama_glm_backend(self) -> bool:
"""Detect the narrow backend family affected by Ollama/GLM stop misreports."""
model_lower = (self.model or "").lower()
provider_lower = (self.provider or "").lower()
if "glm" not in model_lower and provider_lower != "zai":
return False
if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower:
return True
return bool(self.base_url and is_local_endpoint(self.base_url))
def _should_treat_stop_as_truncated(
self,
finish_reason: str,
assistant_message,
messages: Optional[list] = None,
) -> bool:
"""Detect conservative stop->length misreports for Ollama-hosted GLM models."""
if finish_reason != "stop" or self.api_mode != "chat_completions":
return False
if not self._is_ollama_glm_backend():
return False
if not any(
isinstance(msg, dict) and msg.get("role") == "tool"
for msg in (messages or [])
):
return False
if assistant_message is None or getattr(assistant_message, "tool_calls", None):
return False
content = getattr(assistant_message, "content", None)
if not isinstance(content, str):
return False
visible_text = self._strip_think_blocks(content).strip()
if not visible_text:
return False
if len(visible_text) < 20 or not re.search(r"\s", visible_text):
return False
return not self._has_natural_response_ending(visible_text)
def _looks_like_codex_intermediate_ack(
self,
user_message: str,
@ -3978,6 +4037,7 @@ class AIAgent:
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
}
normalized: Dict[str, Any] = {
"model": model,
@ -4013,6 +4073,20 @@ class AIAgent:
if val is not None:
normalized[passthrough_key] = val
extra_headers = api_kwargs.get("extra_headers")
if extra_headers is not None:
if not isinstance(extra_headers, dict):
raise ValueError("Codex Responses request 'extra_headers' must be an object.")
normalized_headers: Dict[str, str] = {}
for key, value in extra_headers.items():
if not isinstance(key, str) or not key.strip():
raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
if value is None:
continue
normalized_headers[key.strip()] = str(value)
if normalized_headers:
normalized["extra_headers"] = normalized_headers
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:
@ -6451,7 +6525,12 @@ class AIAgent:
if not is_github_responses:
kwargs["prompt_cache_key"] = self.session_id
if reasoning_enabled:
is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower()
if reasoning_enabled and is_xai_responses:
# xAI reasons automatically — no effort param, just include encrypted content
kwargs["include"] = ["reasoning.encrypted_content"]
elif reasoning_enabled:
if is_github_responses:
# Copilot's Responses route advertises reasoning-effort support,
# but not OpenAI-specific prompt cache or encrypted reasoning
@ -6462,7 +6541,7 @@ class AIAgent:
else:
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
kwargs["include"] = ["reasoning.encrypted_content"]
elif not is_github_responses:
elif not is_github_responses and not is_xai_responses:
kwargs["include"] = []
if self.request_overrides:
@ -6471,6 +6550,9 @@ class AIAgent:
if self.max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = self.max_tokens
if is_xai_responses and getattr(self, "session_id", None):
kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
return kwargs
sanitized_messages = api_messages
@ -6635,18 +6717,24 @@ class AIAgent:
options["num_ctx"] = self._ollama_num_ctx
extra_body["options"] = options
# Ollama / custom provider: pass think=false when reasoning is disabled.
# Ollama does not recognise the OpenRouter-style `reasoning` extra_body
# field, so we use its native `think` parameter instead.
# This prevents thinking-capable models (Qwen3, etc.) from generating
# <think> blocks and producing empty-response errors when the user has
# set reasoning_effort: none.
if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict):
_effort = (self.reasoning_config.get("effort") or "").strip().lower()
_enabled = self.reasoning_config.get("enabled", True)
if _effort == "none" or _enabled is False:
extra_body["think"] = False
if self._is_qwen_portal():
extra_body["vl_high_resolution_images"] = True
if extra_body:
api_kwargs["extra_body"] = extra_body
# xAI prompt caching: send x-grok-conv-id header to route requests
# to the same server, maximizing automatic cache hits.
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
# Priority Processing / generic request overrides (e.g. service_tier).
# Applied last so overrides win over any defaults set above.
if self.request_overrides:
@ -6757,9 +6845,16 @@ class AIAgent:
except Exception:
pass
# Sanitize surrogates from API response — some models (e.g. Kimi/GLM via Ollama)
# can return invalid surrogate code points that crash json.dumps() on persist.
_raw_content = assistant_message.content or ""
_san_content = _sanitize_surrogates(_raw_content)
if reasoning_text:
reasoning_text = _sanitize_surrogates(reasoning_text)
msg = {
"role": "assistant",
"content": assistant_message.content or "",
"content": _san_content,
"reasoning": reasoning_text,
"finish_reason": finish_reason,
}
@ -7418,7 +7513,7 @@ class AIAgent:
# Start spinner for CLI mode (skip when TUI handles tool progress)
spinner = None
if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
face = random.choice(KawaiiSpinner.get_waiting_faces())
spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn)
spinner.start()
@ -7432,24 +7527,50 @@ class AIAgent:
# Wait for all to complete with periodic heartbeats so the
# gateway's inactivity monitor doesn't kill us during long
# concurrent tool batches.
# concurrent tool batches. Also check for user interrupts
# so we don't block indefinitely when the user sends /stop
# or a new message during concurrent tool execution.
_conc_start = time.time()
_interrupt_logged = False
while True:
done, not_done = concurrent.futures.wait(
futures, timeout=30.0,
futures, timeout=5.0,
)
if not not_done:
break
# Check for interrupt — the per-thread interrupt signal
# already causes individual tools (terminal, execute_code)
# to abort, but tools without interrupt checks (web_search,
# read_file) will run to completion. Cancel any futures
# that haven't started yet so we don't block on them.
if self._interrupt_requested:
if not _interrupt_logged:
_interrupt_logged = True
self._vprint(
f"{self.log_prefix}⚡ Interrupt: cancelling "
f"{len(not_done)} pending concurrent tool(s)",
force=True,
)
for f in not_done:
f.cancel()
# Give already-running tools a moment to notice the
# per-thread interrupt signal and exit gracefully.
concurrent.futures.wait(not_done, timeout=3.0)
break
_conc_elapsed = int(time.time() - _conc_start)
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
# Heartbeat every ~30s (6 × 5s poll intervals)
if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
_still_running = [
parsed_calls[futures.index(f)][1]
for f in not_done
if f in futures
]
self._touch_activity(
f"concurrent tools running ({_conc_elapsed}s, "
f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
)
finally:
if spinner:
# Build a summary message for the spinner stop
@ -7461,8 +7582,11 @@ class AIAgent:
for i, (tc, name, args) in enumerate(parsed_calls):
r = results[i]
if r is None:
# Shouldn't happen, but safety fallback
function_result = f"Error executing tool '{name}': thread did not return a result"
# Tool was cancelled (interrupt) or thread didn't return
if self._interrupt_requested:
function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
else:
function_result = f"Error executing tool '{name}': thread did not return a result"
tool_duration = 0.0
else:
function_name, function_args, function_result, tool_duration, is_error = r
@ -7714,7 +7838,7 @@ class AIAgent:
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
spinner = None
if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
face = random.choice(KawaiiSpinner.get_waiting_faces())
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn)
spinner.start()
self._delegate_spinner = spinner
@ -7741,7 +7865,7 @@ class AIAgent:
# Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
spinner = None
if self.quiet_mode and not self.tool_progress_callback:
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -7765,7 +7889,7 @@ class AIAgent:
# These are not in the tool registry — route through MemoryManager.
spinner = None
if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -7787,7 +7911,7 @@ class AIAgent:
elif self.quiet_mode:
spinner = None
if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
face = random.choice(KawaiiSpinner.get_waiting_faces())
emoji = _get_tool_emoji(function_name)
preview = _build_tool_preview(function_name, function_args) or function_name
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -8150,6 +8274,16 @@ class AIAgent:
if isinstance(persist_user_message, str):
persist_user_message = _sanitize_surrogates(persist_user_message)
# Strip leaked <memory-context> blocks from user input. When Honcho's
# saveMessages persists a turn that included injected context, the block
# can reappear in the next turn's user message via message history.
# Stripping here prevents stale memory tags from leaking into the
# conversation and being visible to the user or the model as user text.
if isinstance(user_message, str):
user_message = sanitize_context(user_message)
if isinstance(persist_user_message, str):
persist_user_message = sanitize_context(persist_user_message)
# Store stream callback for _interruptible_api_call to pick up
self._stream_callback = stream_callback
self._persist_user_message_idx = None
@ -8429,6 +8563,16 @@ class AIAgent:
self._interrupt_message = None
self._interrupt_thread_signal_pending = False
# Notify memory providers of the new turn so cadence tracking works.
# Must happen BEFORE prefetch_all() so providers know which turn it is
# and can gate context/dialectic refresh via contextCadence/dialecticCadence.
if self._memory_manager:
try:
_turn_msg = original_user_message if isinstance(original_user_message, str) else ""
self._memory_manager.on_turn_start(self._user_turn_count, _turn_msg)
except Exception:
pass
# External memory provider: prefetch once before the tool loop.
# Reuse the cached result on every iteration to avoid re-calling
# prefetch_all() on each tool call (10 tool calls = 10x latency + cost).
@ -8620,6 +8764,12 @@ class AIAgent:
new_tcs.append(tc)
am["tool_calls"] = new_tcs
# Proactively strip any surrogate characters before the API call.
# Models served via Ollama (Kimi K2.5, GLM-5, Qwen) can return
# lone surrogates (U+D800-U+DFFF) that crash json.dumps() inside
# the OpenAI SDK. Sanitizing here prevents the 3-retry cycle.
_sanitize_messages_surrogates(api_messages)
# Calculate approximate request size for logging
total_chars = sum(len(str(msg)) for msg in api_messages)
approx_tokens = estimate_messages_tokens_rough(api_messages)
@ -8633,8 +8783,8 @@ class AIAgent:
self._vprint(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
else:
# Animated thinking spinner in quiet mode
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
verb = random.choice(KawaiiSpinner.THINKING_VERBS)
face = random.choice(KawaiiSpinner.get_thinking_faces())
verb = random.choice(KawaiiSpinner.get_thinking_verbs())
if self.thinking_callback:
# CLI TUI mode: use prompt_toolkit widget instead of raw spinner
# (works in both streaming and non-streaming modes)
@ -9018,6 +9168,17 @@ class AIAgent:
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
else:
finish_reason = response.choices[0].finish_reason
assistant_message = response.choices[0].message
if self._should_treat_stop_as_truncated(
finish_reason,
assistant_message,
messages,
):
self._vprint(
f"{self.log_prefix}⚠️ Treating suspicious Ollama/GLM stop response as truncated",
force=True,
)
finish_reason = "length"
if finish_reason == "length":
self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True)
@ -10792,8 +10953,9 @@ class AIAgent:
# tool(result) → assistant("(empty)") → user(nudge)
# Without this, we'd have tool → user which most
# APIs reject as an invalid sequence.
assistant_msg["content"] = "(empty)"
messages.append(assistant_msg)
_nudge_msg = self._build_assistant_message(assistant_message, finish_reason)
_nudge_msg["content"] = "(empty)"
messages.append(_nudge_msg)
messages.append({
"role": "user",
"content": (

View file

@ -64,7 +64,9 @@ AUTHOR_MAP = {
"259807879+Bartok9@users.noreply.github.com": "Bartok9",
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
"268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
"27917469+nosleepcassette@users.noreply.github.com": "nosleepcassette",
"241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
"109555139+davetist@users.noreply.github.com": "davetist",
# contributors (manual mapping from git names)
"dmayhem93@gmail.com": "dmahan93",
"samherring99@gmail.com": "samherring99",
@ -83,6 +85,7 @@ AUTHOR_MAP = {
"4317663+helix4u@users.noreply.github.com": "helix4u",
"331214+counterposition@users.noreply.github.com": "counterposition",
"blspear@gmail.com": "BrennerSpear",
"239876380+handsdiff@users.noreply.github.com": "handsdiff",
"gpickett00@gmail.com": "gpickett00",
"mcosma@gmail.com": "wakamex",
"clawdia.nash@proton.me": "clawdia-nash",
@ -124,6 +127,7 @@ AUTHOR_MAP = {
"balyan.sid@gmail.com": "balyansid",
"oluwadareab12@gmail.com": "bennytimz",
"simon@simonmarcus.org": "simon-marcus",
"xowiekk@gmail.com": "Xowiek",
"1243352777@qq.com": "zons-zhaozhy",
# ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
# crossref, and GH contributor list matching (April 2026 audit) ──
@ -175,6 +179,7 @@ AUTHOR_MAP = {
"limars874@gmail.com": "limars874",
"lisicheng168@gmail.com": "lesterli",
"mingjwan@microsoft.com": "MagicRay1217",
"orangeko@gmail.com": "GenKoKo",
"niyant@spicefi.xyz": "spniyant",
"olafthiele@gmail.com": "olafthiele",
"oncuevtv@gmail.com": "sprmn24",

View file

@ -351,8 +351,8 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
|----------|------|-------------|
| OpenRouter | API key | `OPENROUTER_API_KEY` |
| Anthropic | API key | `ANTHROPIC_API_KEY` |
| Nous Portal | OAuth | `hermes login --provider nous` |
| OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
| Nous Portal | OAuth | `hermes auth` |
| OpenAI Codex | OAuth | `hermes auth` |
| GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
| Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
| DeepSeek | API key | `DEEPSEEK_API_KEY` |

View file

@ -47,6 +47,13 @@ SCOPES = [
]
def _normalize_authorized_user_payload(payload: dict) -> dict:
normalized = dict(payload)
if not normalized.get("type"):
normalized["type"] = "authorized_user"
return normalized
def _ensure_authenticated():
if not TOKEN_PATH.exists():
print("Not authenticated. Run the setup script first:", file=sys.stderr)
@ -170,7 +177,12 @@ def get_credentials():
creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), _stored_token_scopes())
if creds.expired and creds.refresh_token:
creds.refresh(Request())
TOKEN_PATH.write_text(creds.to_json())
TOKEN_PATH.write_text(
json.dumps(
_normalize_authorized_user_payload(json.loads(creds.to_json())),
indent=2,
)
)
if not creds.valid:
print("Token is invalid. Re-run setup.", file=sys.stderr)
sys.exit(1)

View file

@ -19,6 +19,13 @@ def get_token_path() -> Path:
return get_hermes_home() / "google_token.json"
def _normalize_authorized_user_payload(payload: dict) -> dict:
normalized = dict(payload)
if not normalized.get("type"):
normalized["type"] = "authorized_user"
return normalized
def refresh_token(token_data: dict) -> dict:
"""Refresh the access token using the refresh token."""
import urllib.error
@ -55,7 +62,9 @@ def refresh_token(token_data: dict) -> dict:
tz=timezone.utc,
).isoformat()
get_token_path().write_text(json.dumps(token_data, indent=2))
get_token_path().write_text(
json.dumps(_normalize_authorized_user_payload(token_data), indent=2)
)
return token_data

View file

@ -60,6 +60,13 @@ REQUIRED_PACKAGES = ["google-api-python-client", "google-auth-oauthlib", "google
REDIRECT_URI = "http://localhost:1"
def _normalize_authorized_user_payload(payload: dict) -> dict:
normalized = dict(payload)
if not normalized.get("type"):
normalized["type"] = "authorized_user"
return normalized
def _load_token_payload(path: Path = TOKEN_PATH) -> dict:
try:
return json.loads(path.read_text())
@ -151,7 +158,12 @@ def check_auth():
if creds.expired and creds.refresh_token:
try:
creds.refresh(Request())
TOKEN_PATH.write_text(creds.to_json())
TOKEN_PATH.write_text(
json.dumps(
_normalize_authorized_user_payload(json.loads(creds.to_json())),
indent=2,
)
)
missing_scopes = _missing_scopes_from_payload(_load_token_payload(TOKEN_PATH))
if missing_scopes:
print(f"AUTHENTICATED (partial): Token refreshed but missing {len(missing_scopes)} scopes:")
@ -313,7 +325,7 @@ def exchange_auth_code(code: str):
sys.exit(1)
creds = flow.credentials
token_payload = json.loads(creds.to_json())
token_payload = _normalize_authorized_user_payload(json.loads(creds.to_json()))
# Store only the scopes actually granted by the user, not what was requested.
# creds.to_json() writes the requested scopes, which causes refresh to fail

View file

@ -89,7 +89,8 @@ class TestReadCodexAccessToken:
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
result = _read_codex_access_token()
assert result is None
def test_empty_token_returns_none(self, tmp_path, monkeypatch):
@ -146,7 +147,8 @@ class TestReadCodexAccessToken:
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
result = _read_codex_access_token()
assert result is None, "Expired JWT should return None"
def test_valid_jwt_returns_token(self, tmp_path, monkeypatch):
@ -585,7 +587,10 @@ class TestGetTextAuxiliaryClient:
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()
assert model == "gpt-5.2-codex"
@ -623,17 +628,21 @@ class TestGetTextAuxiliaryClient:
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
with patch("agent.auxiliary_client._resolve_auto", return_value=(None, None)):
client, model = get_text_auxiliary_client()
assert client is None
assert model is None
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self, monkeypatch):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
with patch("agent.auxiliary_client._resolve_custom_runtime",
return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client()

View file

@ -232,7 +232,7 @@ class TestResolveVisionProviderClientModelNormalization:
assert provider == "zai"
assert client is not None
assert model == "glm-5.1"
assert model == "glm-5v-turbo" # zai has dedicated vision model in _PROVIDER_VISION_MODELS
class TestVisionPathApiMode:

View file

@ -252,6 +252,11 @@ def test_exhausted_402_entry_resets_after_one_hour(tmp_path, monkeypatch):
def test_explicit_reset_timestamp_overrides_default_429_ttl(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
# Prevent auto-seeding from Codex CLI tokens on the host
monkeypatch.setattr(
"hermes_cli.auth._import_codex_cli_tokens",
lambda: None,
)
_write_auth_store(
tmp_path,
{

View file

@ -939,3 +939,74 @@ class TestOnMemoryWriteBridge:
mgr.on_memory_write("add", "user", "test")
# Good provider still received the call despite bad provider crashing
assert good.memory_writes == [("add", "user", "test")]
class TestHonchoCadenceTracking:
"""Verify Honcho provider cadence gating depends on on_turn_start().
Bug: _turn_count was never updated because on_turn_start() was not called
from run_conversation(). This meant cadence checks always passed (every
turn fired both context refresh and dialectic). Fixed by calling
on_turn_start(self._user_turn_count, msg) before prefetch_all().
"""
def test_turn_count_updates_on_turn_start(self):
"""on_turn_start sets _turn_count, enabling cadence math."""
from plugins.memory.honcho import HonchoMemoryProvider
p = HonchoMemoryProvider()
assert p._turn_count == 0
p.on_turn_start(1, "hello")
assert p._turn_count == 1
p.on_turn_start(5, "world")
assert p._turn_count == 5
def test_queue_prefetch_respects_dialectic_cadence(self):
"""With dialecticCadence=3, dialectic should skip turns 2 and 3."""
from plugins.memory.honcho import HonchoMemoryProvider
p = HonchoMemoryProvider()
p._dialectic_cadence = 3
p._recall_mode = "context"
p._session_key = "test-session"
# Simulate a manager that records prefetch calls
class FakeManager:
def prefetch_context(self, key, query=None):
pass
def prefetch_dialectic(self, key, query):
pass
p._manager = FakeManager()
# Simulate turn 1: last_dialectic_turn = -999, so (1 - (-999)) >= 3 -> fires
p.on_turn_start(1, "turn 1")
p._last_dialectic_turn = 1 # simulate it fired
p._last_context_turn = 1
# Simulate turn 2: (2 - 1) = 1 < 3 -> should NOT fire dialectic
p.on_turn_start(2, "turn 2")
assert (p._turn_count - p._last_dialectic_turn) < p._dialectic_cadence
# Simulate turn 3: (3 - 1) = 2 < 3 -> should NOT fire dialectic
p.on_turn_start(3, "turn 3")
assert (p._turn_count - p._last_dialectic_turn) < p._dialectic_cadence
# Simulate turn 4: (4 - 1) = 3 >= 3 -> should fire dialectic
p.on_turn_start(4, "turn 4")
assert (p._turn_count - p._last_dialectic_turn) >= p._dialectic_cadence
def test_injection_frequency_first_turn_with_1indexed(self):
"""injection_frequency='first-turn' must inject on turn 1 (1-indexed)."""
from plugins.memory.honcho import HonchoMemoryProvider
p = HonchoMemoryProvider()
p._injection_frequency = "first-turn"
# Turn 1 should inject (not skip)
p.on_turn_start(1, "first message")
assert p._turn_count == 1
# The guard is `_turn_count > 1`, so turn 1 passes through
should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
assert not should_skip, "First turn (turn 1) should NOT be skipped"
# Turn 2 should skip
p.on_turn_start(2, "second message")
should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
assert should_skip, "Second turn (turn 2) SHOULD be skipped"

View file

@ -34,6 +34,7 @@ class _FakeAgent:
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
)
self.flush_memories = MagicMock()
self.commit_memory_session = MagicMock()
self._invalidate_system_prompt = MagicMock()
# Token counters (non-zero to verify reset)

View file

@ -1,5 +1,6 @@
"""Tests for CLI /status command behavior."""
from datetime import datetime
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
@ -83,3 +84,18 @@ def test_show_session_status_prints_gateway_style_summary():
_, kwargs = cli_obj.console.print.call_args
assert kwargs.get("highlight") is False
assert kwargs.get("markup") is False
def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path, capsys):
"""Profile detection works for custom-root deployments (not under ~/.hermes)."""
cli_obj = _make_cli()
profile_home = tmp_path / "profiles" / "coder"
monkeypatch.setenv("HERMES_HOME", str(profile_home))
monkeypatch.setattr(Path, "home", lambda: tmp_path / "unrelated-home")
cli_obj._handle_profile_command()
out = capsys.readouterr().out
assert "Profile: coder" in out
assert f"Home: {profile_home}" in out

View file

@ -144,6 +144,18 @@ class TestGatewayPersonalityNone:
assert "none" in result.lower()
@pytest.mark.asyncio
async def test_empty_personality_list_uses_profile_display_path(self, tmp_path):
runner = self._make_runner(personalities={})
(tmp_path / "config.yaml").write_text(yaml.dump({"agent": {"personalities": {}}}))
with patch("gateway.run._hermes_home", tmp_path), \
patch("hermes_constants.display_hermes_home", return_value="~/.hermes/profiles/coder"):
event = self._make_event("")
result = await runner._handle_personality_command(event)
assert result == "No personalities configured in `~/.hermes/profiles/coder/config.yaml`"
class TestPersonalityDictFormat:
"""Test dict-format custom personalities with description, tone, style."""

66
tests/gateway/conftest.py Normal file
View file

@ -0,0 +1,66 @@
"""Shared fixtures for gateway tests.
The ``_ensure_telegram_mock`` helper guarantees that a minimal mock of
the ``telegram`` package is registered in :data:`sys.modules` **before**
any test file triggers ``from gateway.platforms.telegram import ...``.
Without this, ``pytest-xdist`` workers that happen to collect
``test_telegram_caption_merge.py`` (bare top-level import, no per-file
mock) first will cache ``ChatType = None`` from the production
ImportError fallback, causing 30+ downstream test failures wherever
``ChatType.GROUP`` / ``ChatType.SUPERGROUP`` is accessed.
Individual test files may still call their own ``_ensure_telegram_mock``
it short-circuits when the mock is already present.
"""
import sys
from unittest.mock import MagicMock
def _ensure_telegram_mock() -> None:
"""Install a comprehensive telegram mock in sys.modules.
Idempotent skips when the real library is already imported.
Uses ``sys.modules[name] = mod`` (overwrite) instead of
``setdefault`` so it wins even if a partial/broken import
already cached a module with ``ChatType = None``.
"""
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
return # Real library is installed — nothing to mock
mod = MagicMock()
mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
mod.constants.ParseMode.MARKDOWN = "Markdown"
mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
mod.constants.ParseMode.HTML = "HTML"
mod.constants.ChatType.PRIVATE = "private"
mod.constants.ChatType.GROUP = "group"
mod.constants.ChatType.SUPERGROUP = "supergroup"
mod.constants.ChatType.CHANNEL = "channel"
# Real exception classes so ``except (NetworkError, ...)`` clauses
# in production code don't blow up with TypeError.
mod.error.NetworkError = type("NetworkError", (OSError,), {})
mod.error.TimedOut = type("TimedOut", (OSError,), {})
mod.error.BadRequest = type("BadRequest", (Exception,), {})
mod.error.Forbidden = type("Forbidden", (Exception,), {})
mod.error.InvalidToken = type("InvalidToken", (Exception,), {})
mod.error.RetryAfter = type("RetryAfter", (Exception,), {"retry_after": 1})
mod.error.Conflict = type("Conflict", (Exception,), {})
# Update.ALL_TYPES used in start_polling()
mod.Update.ALL_TYPES = []
for name in (
"telegram",
"telegram.ext",
"telegram.constants",
"telegram.request",
):
sys.modules[name] = mod
sys.modules["telegram.error"] = mod.error
# Run at collection time — before any test file's module-level imports.
_ensure_telegram_mock()

View file

@ -284,6 +284,58 @@ class TestLoadGatewayConfig:
assert config.unauthorized_dm_behavior == "ignore"
assert config.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
def test_bridges_telegram_disable_link_previews_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"telegram:\n"
" disable_link_previews: true\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
assert config.platforms[Platform.TELEGRAM].extra["disable_link_previews"] is True
def test_bridges_telegram_proxy_url_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"telegram:\n"
" proxy_url: socks5://127.0.0.1:1080\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("TELEGRAM_PROXY", raising=False)
load_gateway_config()
import os
assert os.environ.get("TELEGRAM_PROXY") == "socks5://127.0.0.1:1080"
def test_telegram_proxy_env_takes_precedence_over_config(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"telegram:\n"
" proxy_url: http://from-config:8080\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("TELEGRAM_PROXY", "socks5://from-env:1080")
load_gateway_config()
import os
assert os.environ.get("TELEGRAM_PROXY") == "socks5://from-env:1080"
class TestHomeChannelEnvOverrides:
"""Home channel env vars should apply even when the platform was already

View file

@ -1,12 +1,15 @@
"""Tests for duplicate reply suppression across the gateway stack.
Covers three fix paths:
Covers four fix paths:
1. base.py: stale response suppressed when interrupt_event is set and a
pending message exists (#8221 / #2483)
2. run.py return path: already_sent propagated from stream consumer's
already_sent flag without requiring response_previewed (#8375)
3. run.py queued-message path: first response correctly detected as
already-streamed when already_sent is True without response_previewed
2. run.py return path: only confirmed final streamed delivery suppresses
the fallback final send; partial streamed output must not
3. run.py queued-message path: first response is skipped only when the
final response was actually streamed, not merely when partial output existed
4. stream_consumer.py cancellation handler: only confirms final delivery
when the best-effort send actually succeeds, not merely because partial
content was sent earlier
"""
import asyncio
@ -153,15 +156,16 @@ class TestBaseInterruptSuppression:
assert any(s["content"] == "Valid response" for s in adapter.sent)
# ===================================================================
# Test 2: run.py — already_sent without response_previewed (#8375)
# Test 2: run.py — partial streamed output must not suppress final send
# ===================================================================
class TestAlreadySentWithoutResponsePreviewed:
"""The already_sent flag on the response dict should be set when the
stream consumer's already_sent is True, even if response_previewed is
False. This prevents duplicate sends when streaming was interrupted
by flood control."""
class TestOnlyFinalStreamDeliverySuppressesFinalSend:
"""The gateway should suppress the fallback final send only when the
stream consumer confirmed the final assistant reply was delivered.
Partial streamed output is not enough. If only already_sent=True,
the fallback final send must still happen so Telegram users don't lose
the real answer."""
def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
sc = SimpleNamespace(
@ -170,21 +174,20 @@ class TestAlreadySentWithoutResponsePreviewed:
)
return sc
def test_already_sent_set_without_response_previewed(self):
"""Stream consumer already_sent=True should propagate to response
dict even when response_previewed is False."""
def test_partial_stream_output_does_not_set_already_sent(self):
"""already_sent=True alone must NOT suppress final delivery."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
response = {"final_response": "text", "response_previewed": False}
# Reproduce the logic from run.py return path (post-fix)
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_streamed = bool(sc and getattr(sc, "final_response_sent", False))
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
response["already_sent"] = True
assert response.get("already_sent") is True
assert "already_sent" not in response
def test_already_sent_not_set_when_nothing_sent(self):
"""When stream consumer hasn't sent anything, already_sent should
@ -193,24 +196,26 @@ class TestAlreadySentWithoutResponsePreviewed:
response = {"final_response": "text", "response_previewed": False}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_streamed = bool(sc and getattr(sc, "final_response_sent", False))
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
response["already_sent"] = True
assert "already_sent" not in response
def test_already_sent_set_on_final_response_sent(self):
"""final_response_sent=True should still work as before."""
"""final_response_sent=True should suppress duplicate final sends."""
sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
response = {"final_response": "text"}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_streamed = bool(sc and getattr(sc, "final_response_sent", False))
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
response["already_sent"] = True
assert response.get("already_sent") is True
@ -222,10 +227,11 @@ class TestAlreadySentWithoutResponsePreviewed:
response = {"final_response": "Error: something broke", "failed": True}
if sc and isinstance(response, dict) and not response.get("failed"):
if (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
_streamed = bool(sc and getattr(sc, "final_response_sent", False))
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
response["already_sent"] = True
assert "already_sent" not in response
@ -255,10 +261,9 @@ class TestEmptyResponseNotSuppressed:
if sc and isinstance(response, dict) and not response.get("failed"):
_final = response.get("final_response") or ""
_is_empty_sentinel = not _final or _final == "(empty)"
if not _is_empty_sentinel and (
getattr(sc, "final_response_sent", False)
or getattr(sc, "already_sent", False)
):
_streamed = bool(sc and getattr(sc, "final_response_sent", False))
_previewed = bool(response.get("response_previewed"))
if not _is_empty_sentinel and (_streamed or _previewed):
response["already_sent"] = True
def test_empty_sentinel_not_suppressed_with_already_sent(self):
@ -283,10 +288,10 @@ class TestEmptyResponseNotSuppressed:
self._apply_suppression_logic(response, sc)
assert "already_sent" not in response
def test_real_response_still_suppressed_with_already_sent(self):
"""Normal non-empty response should still be suppressed when
streaming delivered content."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
def test_real_response_still_suppressed_only_when_final_delivery_confirmed(self):
"""Normal non-empty response should be suppressed only when the final
response was actually streamed."""
sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=True)
response = {"final_response": "Here are the search results..."}
self._apply_suppression_logic(response, sc)
assert response.get("already_sent") is True
@ -299,8 +304,8 @@ class TestEmptyResponseNotSuppressed:
assert "already_sent" not in response
class TestQueuedMessageAlreadyStreamed:
"""The queued-message path should detect that the first response was
already streamed (already_sent=True) even without response_previewed."""
"""The queued-message path should skip the first response only when the
final response was actually streamed."""
def _make_mock_sc(self, already_sent=False, final_response_sent=False):
return SimpleNamespace(
@ -308,18 +313,38 @@ class TestQueuedMessageAlreadyStreamed:
final_response_sent=final_response_sent,
)
def test_queued_path_detects_already_streamed(self):
"""already_sent=True on stream consumer means first response was
streamed skip re-sending before processing queued message."""
_sc = self._make_mock_sc(already_sent=True)
def test_queued_path_only_skips_send_when_final_response_was_streamed(self):
"""Partial streamed output alone must not suppress the first response
before the queued follow-up is processed."""
_sc = self._make_mock_sc(already_sent=True, final_response_sent=False)
# Reproduce the queued-message logic from run.py (post-fix)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
_sc and getattr(_sc, "final_response_sent", False)
)
assert _already_streamed is False
def test_queued_path_detects_confirmed_final_stream_delivery(self):
"""Confirmed final streamed delivery should skip the resend."""
_sc = self._make_mock_sc(already_sent=True, final_response_sent=True)
response = {"response_previewed": False}
_already_streamed = bool(
(_sc and getattr(_sc, "final_response_sent", False))
or bool(response.get("response_previewed"))
)
assert _already_streamed is True
def test_queued_path_detects_previewed_response_delivery(self):
"""A response already previewed via the adapter should not be resent
before processing the queued follow-up."""
_sc = self._make_mock_sc(already_sent=False, final_response_sent=False)
response = {"response_previewed": True}
_already_streamed = bool(
(_sc and getattr(_sc, "final_response_sent", False))
or bool(response.get("response_previewed"))
)
assert _already_streamed is True
@ -327,14 +352,10 @@ class TestQueuedMessageAlreadyStreamed:
def test_queued_path_sends_when_not_streamed(self):
"""Nothing was streamed — first response should be sent before
processing the queued message."""
_sc = self._make_mock_sc(already_sent=False)
_sc = self._make_mock_sc(already_sent=False, final_response_sent=False)
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
_sc and getattr(_sc, "final_response_sent", False)
)
assert _already_streamed is False
@ -344,11 +365,96 @@ class TestQueuedMessageAlreadyStreamed:
_sc = None
_already_streamed = bool(
_sc
and (
getattr(_sc, "final_response_sent", False)
or getattr(_sc, "already_sent", False)
)
_sc and getattr(_sc, "final_response_sent", False)
)
assert _already_streamed is False
# ===================================================================
# Test 4: stream_consumer.py — cancellation handler delivery confirmation
# ===================================================================
class TestCancellationHandlerDeliveryConfirmation:
"""The stream consumer's cancellation handler should only set
final_response_sent when the best-effort send actually succeeds.
Partial content (already_sent=True) alone must not promote to
final_response_sent that would suppress the gateway's fallback
send even when the user never received the real answer."""
def test_partial_only_no_accumulated_stays_false(self):
"""Cancelled after sending intermediate text, nothing accumulated.
final_response_sent must stay False so the gateway fallback fires."""
already_sent = True
final_response_sent = False
accumulated = ""
message_id = None
_best_effort_ok = False
if accumulated and message_id:
_best_effort_ok = True # wouldn't enter
if _best_effort_ok and not final_response_sent:
final_response_sent = True
assert final_response_sent is False
def test_best_effort_succeeds_sets_true(self):
"""When accumulated content exists and best-effort send succeeds,
final_response_sent should become True."""
already_sent = True
final_response_sent = False
accumulated = "Here are the search results..."
message_id = "msg_123"
_best_effort_ok = False
if accumulated and message_id:
_best_effort_ok = True # simulating successful _send_or_edit
if _best_effort_ok and not final_response_sent:
final_response_sent = True
assert final_response_sent is True
def test_best_effort_fails_stays_false(self):
"""When best-effort send fails (flood control, network), the
gateway fallback must deliver the response."""
already_sent = True
final_response_sent = False
accumulated = "Here are the search results..."
message_id = "msg_123"
_best_effort_ok = False
if accumulated and message_id:
_best_effort_ok = False # simulating failed _send_or_edit
if _best_effort_ok and not final_response_sent:
final_response_sent = True
assert final_response_sent is False
def test_preserves_existing_true(self):
"""If final_response_sent was already True before cancellation,
it must remain True regardless."""
already_sent = True
final_response_sent = True
accumulated = ""
message_id = None
_best_effort_ok = False
if accumulated and message_id:
pass
if _best_effort_ok and not final_response_sent:
final_response_sent = True
assert final_response_sent is True
def test_old_behavior_would_have_promoted_partial(self):
"""Verify the old code would have incorrectly promoted
already_sent to final_response_sent even with no accumulated
content proving the bug existed."""
already_sent = True
final_response_sent = False
# OLD cancellation handler logic:
if already_sent:
final_response_sent = True
assert final_response_sent is True # the bug: partial promoted to final

View file

@ -0,0 +1,54 @@
"""Tests for Unicode dash normalization in /insights command flag parsing.
Telegram on iOS auto-converts -- to em/en dashes. The /insights handler
normalizes these before parsing --days and --source flags.
"""
import re
import pytest
# The regex from gateway/run.py insights handler
_UNICODE_DASH_RE = re.compile(r'[\u2012\u2013\u2014\u2015](days|source)')
def _normalize_insights_args(raw: str) -> str:
"""Apply the same normalization as the /insights handler."""
return _UNICODE_DASH_RE.sub(r'--\1', raw)
class TestInsightsUnicodeDashFlags:
"""--days and --source must survive iOS Unicode dash conversion."""
@pytest.mark.parametrize("input_str,expected", [
# Standard double hyphen (baseline)
("--days 7", "--days 7"),
("--source telegram", "--source telegram"),
# Em dash (U+2014)
("\u2014days 7", "--days 7"),
("\u2014source telegram", "--source telegram"),
# En dash (U+2013)
("\u2013days 7", "--days 7"),
("\u2013source telegram", "--source telegram"),
# Figure dash (U+2012)
("\u2012days 7", "--days 7"),
# Horizontal bar (U+2015)
("\u2015days 7", "--days 7"),
# Combined flags with em dashes
("\u2014days 30 \u2014source cli", "--days 30 --source cli"),
])
def test_unicode_dash_normalized(self, input_str, expected):
result = _normalize_insights_args(input_str)
assert result == expected
def test_regular_hyphens_unaffected(self):
"""Normal --days/--source must pass through unchanged."""
assert _normalize_insights_args("--days 7 --source discord") == "--days 7 --source discord"
def test_bare_number_still_works(self):
"""Shorthand /insights 7 (no flag) must not be mangled."""
assert _normalize_insights_args("7") == "7"
def test_no_flags_unchanged(self):
"""Input with no flags passes through as-is."""
assert _normalize_insights_args("") == ""
assert _normalize_insights_args("30") == "30"

View file

@ -1,5 +1,6 @@
"""Tests for topic-aware gateway progress updates."""
import asyncio
import importlib
import sys
import time
@ -415,6 +416,21 @@ class QueuedCommentaryAgent:
}
class BackgroundReviewAgent:
def __init__(self, **kwargs):
self.background_review_callback = kwargs.get("background_review_callback")
self.tools = []
def run_conversation(self, message, conversation_history=None, task_id=None):
if self.background_review_callback:
self.background_review_callback("💾 Skill 'prospect-scanner' created.")
return {
"final_response": "done",
"messages": [],
"api_calls": 1,
}
class VerboseAgent:
"""Agent that emits a tool call with args whose JSON exceeds 200 chars."""
LONG_CODE = "x" * 300
@ -668,6 +684,66 @@ async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monke
assert "final response 1" in sent_texts
@pytest.mark.asyncio
async def test_run_agent_defers_background_review_notification_until_release(monkeypatch, tmp_path):
adapter, result = await _run_with_agent(
monkeypatch,
tmp_path,
BackgroundReviewAgent,
session_id="sess-bg-review-order",
config_data={"display": {"interim_assistant_messages": True}},
)
assert result["final_response"] == "done"
assert adapter.sent == []
@pytest.mark.asyncio
async def test_base_processing_releases_post_delivery_callback_after_main_send():
"""Post-delivery callbacks on the adapter fire after the main response."""
adapter = ProgressCaptureAdapter()
async def _handler(event):
return "done"
adapter.set_message_handler(_handler)
released = []
def _post_delivery_cb():
released.append(True)
adapter.sent.append(
{
"chat_id": "bg-review",
"content": "💾 Skill 'prospect-scanner' created.",
"reply_to": None,
"metadata": None,
}
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1001",
chat_type="group",
thread_id="17585",
)
event = MessageEvent(
text="hello",
message_type=MessageType.TEXT,
source=source,
message_id="msg-1",
)
session_key = "agent:main:telegram:group:-1001:17585"
adapter._active_sessions[session_key] = asyncio.Event()
adapter._post_delivery_callbacks[session_key] = _post_delivery_cb
await adapter._process_message_background(event, session_key)
sent_texts = [call["content"] for call in adapter.sent]
assert sent_texts == ["done", "💾 Skill 'prospect-scanner' created."]
assert released == [True]
@pytest.mark.asyncio
async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
"""Verbose mode with default tool_preview_length (0) should NOT truncate args.

View file

@ -283,6 +283,19 @@ class TestBuildSessionContextPrompt:
assert "Local" in prompt
assert "machine running this agent" in prompt
def test_local_delivery_path_uses_display_hermes_home(self):
config = GatewayConfig()
source = SessionSource(
platform=Platform.LOCAL, chat_id="cli",
chat_name="CLI terminal", chat_type="dm",
)
ctx = build_session_context(source, config)
with patch("hermes_constants.display_hermes_home", return_value="~/.hermes/profiles/coder"):
prompt = build_session_context_prompt(ctx)
assert "~/.hermes/profiles/coder/cron/output/" in prompt
def test_whatsapp_prompt(self):
config = GatewayConfig(
platforms={

View file

@ -209,11 +209,13 @@ def test_set_session_env_includes_session_key():
# Capture baseline value before setting (may be non-empty from another
# test in the same pytest-xdist worker sharing the context).
baseline = get_session_env("HERMES_SESSION_KEY")
tokens = runner._set_session_env(context)
assert get_session_env("HERMES_SESSION_KEY") == "tg:-1001:17585"
runner._clear_session_env(tokens)
assert get_session_env("HERMES_SESSION_KEY") == baseline
# After clearing, the session key must not retain the value we just set.
# The exact post-clear value depends on context propagation from other
# tests, so only check that our value was removed, not what replaced it.
assert get_session_env("HERMES_SESSION_KEY") != "tg:-1001:17585"
def test_session_key_no_race_condition_with_contextvars(monkeypatch):
@ -251,3 +253,72 @@ def test_session_key_no_race_condition_with_contextvars(monkeypatch):
assert results["session-B"] == "session-B", (
f"Session B got '{results['session-B']}' instead of 'session-B' — race condition!"
)
@pytest.mark.asyncio
async def test_run_in_executor_with_context_preserves_session_env(monkeypatch):
"""Gateway executor work should inherit session contextvars for tool routing."""
runner = object.__new__(GatewayRunner)
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_USER_ID", raising=False)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="2144471399",
chat_type="dm",
user_id="123456",
user_name="alice",
thread_id=None,
)
context = SessionContext(
source=source,
connected_platforms=[],
home_channels={},
session_key="agent:main:telegram:dm:2144471399",
)
tokens = runner._set_session_env(context)
try:
result = await runner._run_in_executor_with_context(
lambda: {
"platform": get_session_env("HERMES_SESSION_PLATFORM"),
"chat_id": get_session_env("HERMES_SESSION_CHAT_ID"),
"user_id": get_session_env("HERMES_SESSION_USER_ID"),
"session_key": get_session_env("HERMES_SESSION_KEY"),
}
)
finally:
runner._clear_session_env(tokens)
assert result == {
"platform": "telegram",
"chat_id": "2144471399",
"user_id": "123456",
"session_key": "agent:main:telegram:dm:2144471399",
}
@pytest.mark.asyncio
async def test_run_in_executor_with_context_forwards_args():
"""_run_in_executor_with_context should forward *args to the callable."""
runner = object.__new__(GatewayRunner)
def add(a, b):
return a + b
result = await runner._run_in_executor_with_context(add, 3, 7)
assert result == 10
@pytest.mark.asyncio
async def test_run_in_executor_with_context_propagates_exceptions():
"""Exceptions inside the executor should propagate to the caller."""
runner = object.__new__(GatewayRunner)
def blow_up():
raise ValueError("boom")
with pytest.raises(ValueError, match="boom"):
await runner._run_in_executor_with_context(blow_up)

View file

@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent, MessageType
from gateway.platforms.base import MessageEvent, MessageType, merge_pending_message_event
from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
from gateway.session import SessionSource, build_session_key
@ -184,6 +184,80 @@ async def test_second_message_during_sentinel_queued_not_duplicate():
await task1
def test_merge_pending_message_event_merges_text_and_photo_followups():
pending = {}
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="12345",
chat_type="dm",
user_id="u1",
)
session_key = build_session_key(source)
text_event = MessageEvent(
text="first follow-up",
message_type=MessageType.TEXT,
source=source,
)
photo_event = MessageEvent(
text="see screenshot",
message_type=MessageType.PHOTO,
source=source,
media_urls=["/tmp/test.png"],
media_types=["image/png"],
)
merge_pending_message_event(pending, session_key, text_event, merge_text=True)
merge_pending_message_event(pending, session_key, photo_event, merge_text=True)
merged = pending[session_key]
assert merged.message_type == MessageType.PHOTO
assert merged.text == "first follow-up\n\nsee screenshot"
assert merged.media_urls == ["/tmp/test.png"]
assert merged.media_types == ["image/png"]
@pytest.mark.asyncio
async def test_recent_telegram_text_followup_is_queued_without_interrupt():
runner = _make_runner()
event = _make_event(text="follow-up")
session_key = build_session_key(event.source)
fake_agent = MagicMock()
fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
runner._running_agents[session_key] = fake_agent
import time as _time
runner._running_agents_ts[session_key] = _time.time()
result = await runner._handle_message(event)
assert result is None
fake_agent.interrupt.assert_not_called()
adapter = runner.adapters[Platform.TELEGRAM]
assert adapter._pending_messages[session_key].text == "follow-up"
@pytest.mark.asyncio
async def test_recent_telegram_followups_append_in_pending_queue():
runner = _make_runner()
first = _make_event(text="part one")
second = _make_event(text="part two")
session_key = build_session_key(first.source)
fake_agent = MagicMock()
fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
runner._running_agents[session_key] = fake_agent
import time as _time
runner._running_agents_ts[session_key] = _time.time()
await runner._handle_message(first)
await runner._handle_message(second)
fake_agent.interrupt.assert_not_called()
adapter = runner.adapters[Platform.TELEGRAM]
assert adapter._pending_messages[session_key].text == "part one\npart two"
# ------------------------------------------------------------------
# Test 5: Sentinel not placed for command messages
# ------------------------------------------------------------------
@ -273,6 +347,7 @@ async def test_stop_hard_kills_running_agent():
# Simulate a running (possibly hung) agent
fake_agent = MagicMock()
fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
runner._running_agents[session_key] = fake_agent
# Send /stop
@ -305,6 +380,7 @@ async def test_stop_clears_pending_messages():
)
fake_agent = MagicMock()
fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
runner._running_agents[session_key] = fake_agent
runner._pending_messages[session_key] = "some queued text"

View file

@ -1678,11 +1678,11 @@ class TestProgressMessageThread:
msg_event = captured_events[0]
source = msg_event.source
# For a top-level DM: source.thread_id should remain None
# (session keying must not be affected)
assert source.thread_id is None, (
"source.thread_id must stay None for top-level DMs "
"so they share one continuous session"
# With default dm_top_level_threads_as_sessions=True, source.thread_id
# should equal the message ts so each DM thread gets its own session.
assert source.thread_id == "1234567890.000001", (
"source.thread_id must equal the message ts for top-level DMs "
"so each reply thread gets its own session"
)
# The message_id should be the event's ts — this is what the gateway
@ -1707,6 +1707,34 @@ class TestProgressMessageThread:
"ensuring progress messages land in the thread"
)
@pytest.mark.asyncio
async def test_dm_toplevel_shares_session_when_disabled(self, adapter):
"""Opting out restores legacy single-session-per-DM-channel behavior."""
adapter.config.extra["dm_top_level_threads_as_sessions"] = False
event = {
"channel": "D_DM",
"channel_type": "im",
"user": "U_USER",
"text": "Hello bot",
"ts": "1234567890.000001",
}
captured_events = []
adapter.handle_message = AsyncMock(side_effect=lambda e: captured_events.append(e))
with patch.object(adapter, "_resolve_user_name", new=AsyncMock(return_value="testuser")):
await adapter._handle_slack_message(event)
assert len(captured_events) == 1
msg_event = captured_events[0]
source = msg_event.source
assert source.thread_id is None, (
"source.thread_id must stay None when "
"dm_top_level_threads_as_sessions is disabled"
)
@pytest.mark.asyncio
async def test_channel_mention_progress_uses_thread_ts(self, adapter):
"""Progress messages for a channel @mention should go into the reply thread."""

View file

@ -279,3 +279,28 @@ async def test_status_command_bypasses_active_session_guard():
assert "Agent Running" in sent[0]
assert not interrupt_event.is_set(), "/status incorrectly triggered an agent interrupt"
assert session_key not in adapter._pending_messages, "/status was incorrectly queued"
@pytest.mark.asyncio
async def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path):
"""Gateway /profile detects custom-root profiles (not under ~/.hermes)."""
from pathlib import Path
session_entry = SessionEntry(
session_key=build_session_key(_make_source()),
session_id="sess-1",
created_at=datetime.now(),
updated_at=datetime.now(),
platform=Platform.TELEGRAM,
chat_type="dm",
)
runner = _make_runner(session_entry)
profile_home = tmp_path / "profiles" / "coder"
monkeypatch.setenv("HERMES_HOME", str(profile_home))
monkeypatch.setattr(Path, "home", lambda: tmp_path / "unrelated-home")
result = await runner._handle_profile_command(_make_event("/profile"))
assert "**Profile:** `coder`" in result
assert f"**Home:** `{profile_home}`" in result

View file

@ -50,9 +50,9 @@ from gateway.platforms.telegram import TelegramAdapter
from gateway.config import Platform, PlatformConfig
def _make_adapter():
def _make_adapter(extra=None):
"""Create a TelegramAdapter with mocked internals."""
config = PlatformConfig(enabled=True, token="test-token")
config = PlatformConfig(enabled=True, token="test-token", extra=extra or {})
adapter = TelegramAdapter(config)
adapter._bot = AsyncMock()
adapter._app = MagicMock()
@ -134,6 +134,23 @@ class TestTelegramExecApproval:
)
assert result.success is False
@pytest.mark.asyncio
async def test_disable_link_previews_sets_preview_kwargs(self):
adapter = _make_adapter(extra={"disable_link_previews": True})
mock_msg = MagicMock()
mock_msg.message_id = 42
adapter._bot.send_message = AsyncMock(return_value=mock_msg)
await adapter.send_exec_approval(
chat_id="12345", command="ls", session_key="s"
)
kwargs = adapter._bot.send_message.call_args[1]
assert (
kwargs.get("disable_web_page_preview") is True
or kwargs.get("link_preview_options") is not None
)
@pytest.mark.asyncio
async def test_truncates_long_command(self):
adapter = _make_adapter()

View file

@ -45,6 +45,11 @@ class FakeRetryAfter(Exception):
# Build a fake telegram module tree so the adapter's internal imports work
_fake_telegram = types.ModuleType("telegram")
_fake_telegram.Update = object
_fake_telegram.Bot = object
_fake_telegram.Message = object
_fake_telegram.InlineKeyboardButton = object
_fake_telegram.InlineKeyboardMarkup = object
_fake_telegram_error = types.ModuleType("telegram.error")
_fake_telegram_error.NetworkError = FakeNetworkError
_fake_telegram_error.BadRequest = FakeBadRequest
@ -52,7 +57,21 @@ _fake_telegram_error.TimedOut = FakeTimedOut
_fake_telegram.error = _fake_telegram_error
_fake_telegram_constants = types.ModuleType("telegram.constants")
_fake_telegram_constants.ParseMode = SimpleNamespace(MARKDOWN_V2="MarkdownV2")
_fake_telegram_constants.ChatType = SimpleNamespace(
GROUP="group",
SUPERGROUP="supergroup",
CHANNEL="channel",
)
_fake_telegram.constants = _fake_telegram_constants
_fake_telegram_ext = types.ModuleType("telegram.ext")
_fake_telegram_ext.Application = object
_fake_telegram_ext.CommandHandler = object
_fake_telegram_ext.CallbackQueryHandler = object
_fake_telegram_ext.MessageHandler = object
_fake_telegram_ext.ContextTypes = SimpleNamespace(DEFAULT_TYPE=object)
_fake_telegram_ext.filters = object
_fake_telegram_request = types.ModuleType("telegram.request")
_fake_telegram_request.HTTPXRequest = object
@pytest.fixture(autouse=True)
@ -61,6 +80,8 @@ def _inject_fake_telegram(monkeypatch):
monkeypatch.setitem(sys.modules, "telegram", _fake_telegram)
monkeypatch.setitem(sys.modules, "telegram.error", _fake_telegram_error)
monkeypatch.setitem(sys.modules, "telegram.constants", _fake_telegram_constants)
monkeypatch.setitem(sys.modules, "telegram.ext", _fake_telegram_ext)
monkeypatch.setitem(sys.modules, "telegram.request", _fake_telegram_request)
def _make_adapter():
@ -68,6 +89,7 @@ def _make_adapter():
config = PlatformConfig(enabled=True, token="fake-token")
adapter = object.__new__(TelegramAdapter)
adapter.config = config
adapter._config = config
adapter._platform = Platform.TELEGRAM
adapter._connected = True
@ -82,6 +104,81 @@ def _make_adapter():
return adapter
def test_forum_general_topic_without_message_thread_id_keeps_thread_context():
"""Forum General-topic messages should keep synthetic thread context."""
from gateway.platforms import telegram as telegram_mod
adapter = _make_adapter()
message = SimpleNamespace(
text="hello from General",
caption=None,
chat=SimpleNamespace(
id=-100123,
type=telegram_mod.ChatType.SUPERGROUP,
is_forum=True,
title="Forum group",
),
from_user=SimpleNamespace(id=456, full_name="Alice"),
message_thread_id=None,
reply_to_message=None,
message_id=10,
date=None,
)
event = adapter._build_message_event(message, msg_type=SimpleNamespace(value="text"))
assert event.source.chat_id == "-100123"
assert event.source.chat_type == "group"
assert event.source.thread_id == "1"
@pytest.mark.asyncio
async def test_send_omits_general_topic_thread_id():
"""Telegram sends to forum General should omit message_thread_id=1."""
adapter = _make_adapter()
call_log = []
async def mock_send_message(**kwargs):
call_log.append(dict(kwargs))
return SimpleNamespace(message_id=42)
adapter._bot = SimpleNamespace(send_message=mock_send_message)
result = await adapter.send(
chat_id="-100123",
content="test message",
metadata={"thread_id": "1"},
)
assert result.success is True
assert len(call_log) == 1
assert call_log[0]["chat_id"] == -100123
assert call_log[0]["text"] == "test message"
assert call_log[0]["reply_to_message_id"] is None
assert call_log[0]["message_thread_id"] is None
@pytest.mark.asyncio
async def test_send_typing_retries_without_general_thread_when_not_found():
"""Typing for forum General should fall back if Telegram rejects thread 1."""
adapter = _make_adapter()
call_log = []
async def mock_send_chat_action(**kwargs):
call_log.append(dict(kwargs))
if kwargs.get("message_thread_id") == 1:
raise FakeBadRequest("Message thread not found")
adapter._bot = SimpleNamespace(send_chat_action=mock_send_chat_action)
await adapter.send_typing("-100123", metadata={"thread_id": "1"})
assert call_log == [
{"chat_id": -100123, "action": "typing", "message_thread_id": 1},
{"chat_id": -100123, "action": "typing", "message_thread_id": None},
]
@pytest.mark.asyncio
async def test_send_retries_without_thread_on_thread_not_found():
"""When message_thread_id causes 'thread not found', retry without it."""

View file

@ -613,6 +613,7 @@ class TestDetectVenvDir:
# Not inside a virtualenv
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
dot_venv = tmp_path / ".venv"
@ -624,6 +625,7 @@ class TestDetectVenvDir:
def test_falls_back_to_venv_directory(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
venv = tmp_path / "venv"
@ -635,6 +637,7 @@ class TestDetectVenvDir:
def test_prefers_dot_venv_over_venv(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
(tmp_path / ".venv").mkdir()
@ -646,6 +649,7 @@ class TestDetectVenvDir:
def test_returns_none_when_no_virtualenv(self, tmp_path, monkeypatch):
monkeypatch.setattr("sys.prefix", "/usr")
monkeypatch.setattr("sys.base_prefix", "/usr")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
result = gateway_cli._detect_venv_dir()

View file

@ -0,0 +1,101 @@
"""Regression tests for Copilot api_mode recomputation during /model switch.
When switching models within the Copilot provider (e.g. GPT-5 Claude),
the stale api_mode from resolve_runtime_provider must be overridden with
a fresh value computed from the *new* model. Without the fix, Claude
requests went through the Responses API and failed with
``unsupported_api_for_model``.
"""
from unittest.mock import patch
from hermes_cli.model_switch import switch_model
_MOCK_VALIDATION = {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
def _run_copilot_switch(
raw_input: str,
current_provider: str = "copilot",
current_model: str = "gpt-5.4",
explicit_provider: str = "",
runtime_api_mode: str = "codex_responses",
):
"""Run switch_model with Copilot mocks and return the result."""
with (
patch("hermes_cli.model_switch.resolve_alias", return_value=None),
patch("hermes_cli.model_switch.list_provider_models", return_value=[]),
patch(
"hermes_cli.runtime_provider.resolve_runtime_provider",
return_value={
"api_key": "ghu_test_token",
"base_url": "https://api.githubcopilot.com",
"api_mode": runtime_api_mode,
},
),
patch(
"hermes_cli.models.validate_requested_model",
return_value=_MOCK_VALIDATION,
),
patch("hermes_cli.model_switch.get_model_info", return_value=None),
patch("hermes_cli.model_switch.get_model_capabilities", return_value=None),
patch("hermes_cli.models.detect_provider_for_model", return_value=None),
):
return switch_model(
raw_input=raw_input,
current_provider=current_provider,
current_model=current_model,
explicit_provider=explicit_provider,
)
def test_same_provider_copilot_switch_recomputes_api_mode():
"""GPT-5 → Claude on copilot: api_mode must flip to chat_completions."""
result = _run_copilot_switch(
raw_input="claude-opus-4.6",
current_provider="copilot",
current_model="gpt-5.4",
)
assert result.success, f"switch_model failed: {result.error_message}"
assert result.new_model == "claude-opus-4.6"
assert result.target_provider == "copilot"
assert result.api_mode == "chat_completions"
def test_explicit_copilot_switch_uses_selected_model_api_mode():
"""Cross-provider switch to copilot: api_mode from new model, not stale runtime."""
result = _run_copilot_switch(
raw_input="claude-opus-4.6",
current_provider="openrouter",
current_model="anthropic/claude-sonnet-4.6",
explicit_provider="copilot",
)
assert result.success, f"switch_model failed: {result.error_message}"
assert result.new_model == "claude-opus-4.6"
assert result.target_provider == "github-copilot"
assert result.api_mode == "chat_completions"
def test_copilot_gpt5_keeps_codex_responses():
"""GPT-5 → GPT-5 on copilot: api_mode must stay codex_responses."""
result = _run_copilot_switch(
raw_input="gpt-5.4-mini",
current_provider="copilot",
current_model="gpt-5.4",
runtime_api_mode="codex_responses",
)
assert result.success, f"switch_model failed: {result.error_message}"
assert result.new_model == "gpt-5.4-mini"
assert result.target_provider == "copilot"
# gpt-5.4-mini is a GPT-5 variant — should use codex_responses
# (gpt-5-mini is the special case that uses chat_completions)
assert result.api_mode == "codex_responses"

View file

@ -163,7 +163,7 @@ class TestNormalizeProvider:
class TestProviderLabel:
def test_known_labels_and_auto(self):
assert provider_label("anthropic") == "Anthropic"
assert provider_label("kimi") == "Kimi / Moonshot"
assert provider_label("kimi") == "Kimi / Kimi Coding Plan"
assert provider_label("copilot") == "GitHub Copilot"
assert provider_label("copilot-acp") == "GitHub Copilot ACP"
assert provider_label("auto") == "Auto"

View file

@ -0,0 +1,351 @@
"""Tests for Ollama Cloud provider integration."""
import os
import pytest
from unittest.mock import patch, MagicMock
from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key_provider_credentials
from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider
from hermes_cli.model_normalize import normalize_model_for_provider
from agent.model_metadata import _URL_TO_PROVIDER, _PROVIDER_PREFIXES
from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models
# ── Provider Registry ──
class TestOllamaCloudProviderRegistry:
def test_ollama_cloud_in_registry(self):
assert "ollama-cloud" in PROVIDER_REGISTRY
def test_ollama_cloud_config(self):
pconfig = PROVIDER_REGISTRY["ollama-cloud"]
assert pconfig.id == "ollama-cloud"
assert pconfig.name == "Ollama Cloud"
assert pconfig.auth_type == "api_key"
assert pconfig.inference_base_url == "https://ollama.com/v1"
def test_ollama_cloud_env_vars(self):
pconfig = PROVIDER_REGISTRY["ollama-cloud"]
assert pconfig.api_key_env_vars == ("OLLAMA_API_KEY",)
assert pconfig.base_url_env_var == "OLLAMA_BASE_URL"
def test_ollama_cloud_base_url(self):
assert "ollama.com" in PROVIDER_REGISTRY["ollama-cloud"].inference_base_url
# ── Provider Aliases ──
PROVIDER_ENV_VARS = (
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
"GOOGLE_API_KEY", "GEMINI_API_KEY", "OLLAMA_API_KEY",
"GLM_API_KEY", "ZAI_API_KEY", "KIMI_API_KEY",
"MINIMAX_API_KEY", "DEEPSEEK_API_KEY",
)
@pytest.fixture(autouse=True)
def _clean_provider_env(monkeypatch):
for var in PROVIDER_ENV_VARS:
monkeypatch.delenv(var, raising=False)
class TestOllamaCloudAliases:
def test_explicit_ollama_cloud(self):
assert resolve_provider("ollama-cloud") == "ollama-cloud"
def test_alias_ollama_underscore(self):
"""ollama_cloud (underscore) is the unambiguous cloud alias."""
assert resolve_provider("ollama_cloud") == "ollama-cloud"
def test_bare_ollama_stays_local(self):
"""Bare 'ollama' alias routes to 'custom' (local) — not cloud."""
assert resolve_provider("ollama") == "custom"
def test_models_py_aliases(self):
assert _PROVIDER_ALIASES.get("ollama_cloud") == "ollama-cloud"
# bare "ollama" stays local
assert _PROVIDER_ALIASES.get("ollama") == "custom"
def test_normalize_provider(self):
assert normalize_provider("ollama-cloud") == "ollama-cloud"
# ── Auto-detection ──
class TestOllamaCloudAutoDetection:
def test_auto_detects_ollama_api_key(self, monkeypatch):
monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key")
assert resolve_provider("auto") == "ollama-cloud"
# ── Credential Resolution ──
class TestOllamaCloudCredentials:
def test_resolve_with_ollama_api_key(self, monkeypatch):
monkeypatch.setenv("OLLAMA_API_KEY", "ollama-secret")
creds = resolve_api_key_provider_credentials("ollama-cloud")
assert creds["provider"] == "ollama-cloud"
assert creds["api_key"] == "ollama-secret"
assert creds["base_url"] == "https://ollama.com/v1"
def test_resolve_with_custom_base_url(self, monkeypatch):
monkeypatch.setenv("OLLAMA_API_KEY", "key")
monkeypatch.setenv("OLLAMA_BASE_URL", "https://custom.ollama/v1")
creds = resolve_api_key_provider_credentials("ollama-cloud")
assert creds["base_url"] == "https://custom.ollama/v1"
def test_runtime_ollama_cloud(self, monkeypatch):
monkeypatch.setenv("OLLAMA_API_KEY", "ollama-key")
from hermes_cli.runtime_provider import resolve_runtime_provider
result = resolve_runtime_provider(requested="ollama-cloud")
assert result["provider"] == "ollama-cloud"
assert result["api_mode"] == "chat_completions"
assert result["api_key"] == "ollama-key"
assert result["base_url"] == "https://ollama.com/v1"
# ── Model Catalog (dynamic — no static list) ──
class TestOllamaCloudModelCatalog:
def test_no_static_model_list(self):
"""Ollama Cloud models are fetched dynamically — no static list to maintain."""
assert "ollama-cloud" not in _PROVIDER_MODELS
def test_provider_label(self):
assert "ollama-cloud" in _PROVIDER_LABELS
assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud"
# ── Merged Model Discovery ──
class TestOllamaCloudMergedDiscovery:
def test_merges_live_and_models_dev(self, tmp_path, monkeypatch):
"""Live API models appear first, models.dev additions fill gaps."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
mock_mdev = {
"ollama-cloud": {
"models": {
"glm-5": {"tool_call": True},
"kimi-k2.5": {"tool_call": True},
"nemotron-3-super": {"tool_call": True},
}
}
}
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b", "glm-5"]), \
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
# Live models first, then models.dev additions (deduped)
assert result[0] == "qwen3.5:397b" # from live API
assert result[1] == "glm-5" # from live API (also in models.dev)
assert "kimi-k2.5" in result # from models.dev only
assert "nemotron-3-super" in result # from models.dev only
assert result.count("glm-5") == 1 # no duplicates
def test_falls_back_to_models_dev_without_api_key(self, tmp_path, monkeypatch):
"""Without API key, only models.dev results are returned."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {
"glm-5": {"tool_call": True},
}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert result == ["glm-5"]
def test_uses_disk_cache(self, tmp_path, monkeypatch):
"""Second call returns cached results without hitting APIs."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
patch("agent.models_dev.fetch_models_dev", return_value={}):
first = fetch_ollama_cloud_models(force_refresh=True)
assert first == ["model-a"]
assert mock_api.call_count == 1
# Second call — should use disk cache, not call API
second = fetch_ollama_cloud_models()
assert second == ["model-a"]
assert mock_api.call_count == 1 # no extra API call
def test_force_refresh_bypasses_cache(self, tmp_path, monkeypatch):
"""force_refresh=True always hits the API even with fresh cache."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
patch("agent.models_dev.fetch_models_dev", return_value={}):
fetch_ollama_cloud_models(force_refresh=True)
fetch_ollama_cloud_models(force_refresh=True)
assert mock_api.call_count == 2
def test_stale_cache_used_on_total_failure(self, tmp_path, monkeypatch):
"""If both API and models.dev fail, stale cache is returned."""
from hermes_cli.models import fetch_ollama_cloud_models, _save_ollama_cloud_cache
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
# Pre-populate a stale cache
_save_ollama_cloud_cache(["stale-model"])
# Make the cache appear stale by backdating it
import json
cache_path = tmp_path / "ollama_cloud_models_cache.json"
with open(cache_path) as f:
data = json.load(f)
data["cached_at"] = 0 # epoch = very stale
with open(cache_path, "w") as f:
json.dump(data, f)
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
patch("agent.models_dev.fetch_models_dev", return_value={}):
result = fetch_ollama_cloud_models(force_refresh=True)
assert result == ["stale-model"]
def test_empty_on_total_failure_no_cache(self, tmp_path, monkeypatch):
"""Returns empty list when everything fails and no cache exists."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
with patch("agent.models_dev.fetch_models_dev", return_value={}):
result = fetch_ollama_cloud_models(force_refresh=True)
assert result == []
# ── Model Normalization ──
class TestOllamaCloudModelNormalization:
def test_passthrough_bare_name(self):
"""Ollama Cloud is a passthrough provider — model names used as-is."""
assert normalize_model_for_provider("qwen3.5:397b", "ollama-cloud") == "qwen3.5:397b"
def test_passthrough_with_tag(self):
assert normalize_model_for_provider("cogito-2.1:671b", "ollama-cloud") == "cogito-2.1:671b"
def test_passthrough_no_tag(self):
assert normalize_model_for_provider("glm-5", "ollama-cloud") == "glm-5"
# ── URL-to-Provider Mapping ──
class TestOllamaCloudUrlMapping:
def test_url_to_provider(self):
assert _URL_TO_PROVIDER.get("ollama.com") == "ollama-cloud"
def test_provider_prefix_canonical(self):
assert "ollama-cloud" in _PROVIDER_PREFIXES
def test_provider_prefix_alias(self):
assert "ollama" in _PROVIDER_PREFIXES
# ── models.dev Integration ──
class TestOllamaCloudModelsDev:
def test_ollama_cloud_mapped(self):
assert PROVIDER_TO_MODELS_DEV.get("ollama-cloud") == "ollama-cloud"
def test_list_agentic_models_with_mock_data(self):
"""list_agentic_models filters correctly from mock models.dev data."""
mock_data = {
"ollama-cloud": {
"models": {
"qwen3.5:397b": {"tool_call": True},
"glm-5": {"tool_call": True},
"nemotron-3-nano:30b": {"tool_call": True},
"some-embedding:latest": {"tool_call": False},
}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
result = list_agentic_models("ollama-cloud")
assert "qwen3.5:397b" in result
assert "glm-5" in result
assert "nemotron-3-nano:30b" in result
assert "some-embedding:latest" not in result # no tool_call
# ── Agent Init (no SyntaxError) ──
class TestOllamaCloudAgentInit:
def test_agent_imports_without_error(self):
"""Verify run_agent.py has no SyntaxError."""
import importlib
import run_agent
importlib.reload(run_agent)
def test_ollama_cloud_agent_uses_chat_completions(self, monkeypatch):
"""Ollama Cloud falls through to chat_completions — no special elif needed."""
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
with patch("run_agent.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
from run_agent import AIAgent
agent = AIAgent(
model="qwen3.5:397b",
provider="ollama-cloud",
api_key="test-key",
base_url="https://ollama.com/v1",
)
assert agent.api_mode == "chat_completions"
assert agent.provider == "ollama-cloud"
# ── providers.py New System ──
class TestOllamaCloudProvidersNew:
def test_overlay_exists(self):
from hermes_cli.providers import HERMES_OVERLAYS
assert "ollama-cloud" in HERMES_OVERLAYS
overlay = HERMES_OVERLAYS["ollama-cloud"]
assert overlay.transport == "openai_chat"
assert overlay.base_url_env_var == "OLLAMA_BASE_URL"
def test_alias_resolves(self):
from hermes_cli.providers import normalize_provider as np
assert np("ollama") == "custom" # bare "ollama" = local
assert np("ollama-cloud") == "ollama-cloud"
def test_label_override(self):
from hermes_cli.providers import _LABEL_OVERRIDES
assert _LABEL_OVERRIDES.get("ollama-cloud") == "Ollama Cloud"
def test_get_label(self):
from hermes_cli.providers import get_label
assert get_label("ollama-cloud") == "Ollama Cloud"
def test_get_provider(self):
from hermes_cli.providers import get_provider
pdef = get_provider("ollama-cloud")
assert pdef is not None
assert pdef.id == "ollama-cloud"
assert pdef.transport == "openai_chat"
# ── Auxiliary Model ──
class TestOllamaCloudAuxiliary:
def test_aux_model_defined(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"

View file

@ -18,6 +18,8 @@ from hermes_cli.plugins import (
PluginManager,
PluginManifest,
get_plugin_manager,
get_plugin_command_handler,
get_plugin_commands,
get_pre_tool_call_block_message,
discover_plugins,
invoke_hook,
@ -605,7 +607,292 @@ class TestPreLlmCallTargetRouting:
assert "plain text C" in _plugin_user_context
# NOTE: TestPluginCommands removed register_command() was never implemented
# in PluginContext (hermes_cli/plugins.py). The tests referenced _plugin_commands,
# commands_registered, get_plugin_command_handler, and GATEWAY_KNOWN_COMMANDS
# integration — all of which are unimplemented features.
# ── TestPluginCommands ────────────────────────────────────────────────────
class TestPluginCommands:
"""Tests for plugin slash command registration via register_command()."""
def test_register_command_basic(self):
"""register_command() stores handler, description, and plugin name."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
handler = lambda args: f"echo {args}"
ctx.register_command("mycmd", handler, description="My custom command")
assert "mycmd" in mgr._plugin_commands
entry = mgr._plugin_commands["mycmd"]
assert entry["handler"] is handler
assert entry["description"] == "My custom command"
assert entry["plugin"] == "test-plugin"
def test_register_command_normalizes_name(self):
"""Names are lowercased, stripped, and leading slashes removed."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
ctx.register_command("/MyCmd ", lambda a: a, description="test")
assert "mycmd" in mgr._plugin_commands
assert "/MyCmd " not in mgr._plugin_commands
def test_register_command_empty_name_rejected(self, caplog):
"""Empty name after normalization is rejected with a warning."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
with caplog.at_level(logging.WARNING):
ctx.register_command("", lambda a: a)
assert len(mgr._plugin_commands) == 0
assert "empty name" in caplog.text
def test_register_command_builtin_conflict_rejected(self, caplog):
"""Commands that conflict with built-in names are rejected."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
with caplog.at_level(logging.WARNING):
ctx.register_command("help", lambda a: a)
assert "help" not in mgr._plugin_commands
assert "conflicts" in caplog.text.lower()
def test_register_command_default_description(self):
"""Missing description defaults to 'Plugin command'."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
ctx.register_command("status-cmd", lambda a: a)
assert mgr._plugin_commands["status-cmd"]["description"] == "Plugin command"
def test_get_plugin_command_handler_found(self):
"""get_plugin_command_handler() returns the handler for a registered command."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
handler = lambda args: f"result: {args}"
ctx.register_command("mycmd", handler, description="test")
with patch("hermes_cli.plugins._plugin_manager", mgr):
result = get_plugin_command_handler("mycmd")
assert result is handler
def test_get_plugin_command_handler_not_found(self):
"""get_plugin_command_handler() returns None for unregistered commands."""
mgr = PluginManager()
with patch("hermes_cli.plugins._plugin_manager", mgr):
assert get_plugin_command_handler("nonexistent") is None
def test_get_plugin_commands_returns_dict(self):
"""get_plugin_commands() returns the full commands dict."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
ctx.register_command("cmd-a", lambda a: a, description="A")
ctx.register_command("cmd-b", lambda a: a, description="B")
with patch("hermes_cli.plugins._plugin_manager", mgr):
cmds = get_plugin_commands()
assert "cmd-a" in cmds
assert "cmd-b" in cmds
assert cmds["cmd-a"]["description"] == "A"
def test_commands_tracked_on_loaded_plugin(self, tmp_path, monkeypatch):
"""Commands registered during discover_and_load() are tracked on LoadedPlugin."""
plugins_dir = tmp_path / "hermes_test" / "plugins"
_make_plugin_dir(
plugins_dir, "cmd-plugin",
register_body=(
'ctx.register_command("mycmd", lambda a: "ok", description="Test")'
),
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
loaded = mgr._plugins["cmd-plugin"]
assert loaded.enabled
assert "mycmd" in loaded.commands_registered
def test_commands_in_list_plugins_output(self, tmp_path, monkeypatch):
"""list_plugins() includes command count."""
plugins_dir = tmp_path / "hermes_test" / "plugins"
_make_plugin_dir(
plugins_dir, "cmd-plugin",
register_body=(
'ctx.register_command("mycmd", lambda a: "ok", description="Test")'
),
)
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
mgr = PluginManager()
mgr.discover_and_load()
info = mgr.list_plugins()
assert len(info) == 1
assert info[0]["commands"] == 1
def test_handler_receives_raw_args(self):
"""The handler is called with the raw argument string."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
received = []
ctx.register_command("echo", lambda args: received.append(args) or "ok")
handler = mgr._plugin_commands["echo"]["handler"]
handler("hello world")
assert received == ["hello world"]
def test_multiple_plugins_register_different_commands(self):
"""Multiple plugins can each register their own commands."""
mgr = PluginManager()
for plugin_name, cmd_name in [("plugin-a", "cmd-a"), ("plugin-b", "cmd-b")]:
manifest = PluginManifest(name=plugin_name, source="user")
ctx = PluginContext(manifest, mgr)
ctx.register_command(cmd_name, lambda a: a, description=f"From {plugin_name}")
assert "cmd-a" in mgr._plugin_commands
assert "cmd-b" in mgr._plugin_commands
assert mgr._plugin_commands["cmd-a"]["plugin"] == "plugin-a"
assert mgr._plugin_commands["cmd-b"]["plugin"] == "plugin-b"
# ── TestPluginDispatchTool ────────────────────────────────────────────────
class TestPluginDispatchTool:
"""Tests for PluginContext.dispatch_tool() — tool dispatch with agent context."""
def test_dispatch_tool_calls_registry(self):
"""dispatch_tool() delegates to registry.dispatch()."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"result": "ok"}'
with patch("hermes_cli.plugins.PluginContext.dispatch_tool.__module__", "hermes_cli.plugins"):
with patch.dict("sys.modules", {}):
with patch("tools.registry.registry", mock_registry):
result = ctx.dispatch_tool("web_search", {"query": "test"})
assert result == '{"result": "ok"}'
def test_dispatch_tool_injects_parent_agent_from_cli_ref(self):
"""When _cli_ref has an agent, it's passed as parent_agent."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mock_agent = MagicMock()
mock_cli = MagicMock()
mock_cli.agent = mock_agent
mgr._cli_ref = mock_cli
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"ok": true}'
with patch("tools.registry.registry", mock_registry):
ctx.dispatch_tool("delegate_task", {"goal": "test"})
mock_registry.dispatch.assert_called_once()
call_kwargs = mock_registry.dispatch.call_args
assert call_kwargs[1].get("parent_agent") is mock_agent
def test_dispatch_tool_no_parent_agent_when_no_cli_ref(self):
"""When _cli_ref is None (gateway mode), no parent_agent is injected."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mgr._cli_ref = None
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"ok": true}'
with patch("tools.registry.registry", mock_registry):
ctx.dispatch_tool("delegate_task", {"goal": "test"})
call_kwargs = mock_registry.dispatch.call_args
assert "parent_agent" not in call_kwargs[1]
def test_dispatch_tool_no_parent_agent_when_agent_is_none(self):
"""When cli_ref exists but agent is None (not yet initialized), skip parent_agent."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mock_cli = MagicMock()
mock_cli.agent = None
mgr._cli_ref = mock_cli
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"ok": true}'
with patch("tools.registry.registry", mock_registry):
ctx.dispatch_tool("delegate_task", {"goal": "test"})
call_kwargs = mock_registry.dispatch.call_args
assert "parent_agent" not in call_kwargs[1]
def test_dispatch_tool_respects_explicit_parent_agent(self):
"""Explicit parent_agent kwarg is not overwritten by _cli_ref.agent."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
cli_agent = MagicMock(name="cli_agent")
mock_cli = MagicMock()
mock_cli.agent = cli_agent
mgr._cli_ref = mock_cli
explicit_agent = MagicMock(name="explicit_agent")
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"ok": true}'
with patch("tools.registry.registry", mock_registry):
ctx.dispatch_tool("delegate_task", {"goal": "test"}, parent_agent=explicit_agent)
call_kwargs = mock_registry.dispatch.call_args
assert call_kwargs[1]["parent_agent"] is explicit_agent
def test_dispatch_tool_forwards_extra_kwargs(self):
"""Extra kwargs are forwarded to registry.dispatch()."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mgr._cli_ref = None
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"ok": true}'
with patch("tools.registry.registry", mock_registry):
ctx.dispatch_tool("some_tool", {"x": 1}, task_id="test-123")
call_kwargs = mock_registry.dispatch.call_args
assert call_kwargs[1]["task_id"] == "test-123"
def test_dispatch_tool_returns_json_string(self):
"""dispatch_tool() returns the raw JSON string from the registry."""
mgr = PluginManager()
manifest = PluginManifest(name="test-plugin", source="user")
ctx = PluginContext(manifest, mgr)
mgr._cli_ref = None
mock_registry = MagicMock()
mock_registry.dispatch.return_value = '{"error": "Unknown tool: fake"}'
with patch("tools.registry.registry", mock_registry):
result = ctx.dispatch_tool("fake", {})
assert '"error"' in result

View file

@ -0,0 +1,56 @@
"""Tests for plugins/memory/honcho/cli.py."""
from types import SimpleNamespace
class TestCmdStatus:
def test_reports_connection_failure_when_session_setup_fails(self, monkeypatch, capsys, tmp_path):
import plugins.memory.honcho.cli as honcho_cli
cfg_path = tmp_path / "honcho.json"
cfg_path.write_text("{}")
class FakeConfig:
enabled = True
api_key = "root-key"
workspace_id = "hermes"
host = "hermes"
base_url = None
ai_peer = "hermes"
peer_name = "eri"
recall_mode = "hybrid"
user_observe_me = True
user_observe_others = False
ai_observe_me = False
ai_observe_others = True
write_frequency = "async"
session_strategy = "per-session"
context_tokens = 800
def resolve_session_name(self):
return "hermes"
monkeypatch.setattr(honcho_cli, "_read_config", lambda: {"apiKey": "***"})
monkeypatch.setattr(honcho_cli, "_config_path", lambda: cfg_path)
monkeypatch.setattr(honcho_cli, "_local_config_path", lambda: cfg_path)
monkeypatch.setattr(honcho_cli, "_active_profile_name", lambda: "default")
monkeypatch.setattr(
"plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
lambda host=None: FakeConfig(),
)
monkeypatch.setattr(
"plugins.memory.honcho.client.get_honcho_client",
lambda cfg: object(),
)
def _boom(hcfg, client):
raise RuntimeError("Invalid API key")
monkeypatch.setattr(honcho_cli, "_show_peer_cards", _boom)
monkeypatch.setitem(__import__("sys").modules, "honcho", SimpleNamespace())
honcho_cli.cmd_status(SimpleNamespace(all=False))
out = capsys.readouterr().out
assert "FAILED (Invalid API key)" in out
assert "Connection... OK" not in out

View file

@ -1,5 +1,6 @@
"""Tests for plugins/memory/honcho/client.py — Honcho client configuration."""
import importlib.util
import json
import os
from pathlib import Path
@ -25,6 +26,7 @@ class TestHonchoClientConfigDefaults:
assert config.workspace_id == "hermes"
assert config.api_key is None
assert config.environment == "production"
assert config.timeout is None
assert config.enabled is False
assert config.save_messages is True
assert config.session_strategy == "per-directory"
@ -76,6 +78,11 @@ class TestFromEnv:
assert config.base_url == "http://localhost:8000"
assert config.enabled is True
def test_reads_timeout_from_env(self):
with patch.dict(os.environ, {"HONCHO_TIMEOUT": "90"}, clear=True):
config = HonchoClientConfig.from_env()
assert config.timeout == 90.0
class TestFromGlobalConfig:
def test_missing_config_falls_back_to_env(self, tmp_path):
@ -87,10 +94,10 @@ class TestFromGlobalConfig:
assert config.enabled is False
assert config.api_key is None
def test_reads_full_config(self, tmp_path):
def test_reads_full_config(self, tmp_path, monkeypatch):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "my-honcho-key",
"apiKey": "***",
"workspace": "my-workspace",
"environment": "staging",
"peerName": "alice",
@ -108,9 +115,11 @@ class TestFromGlobalConfig:
}
}
}))
# Isolate from real ~/.hermes/honcho.json
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.api_key == "my-honcho-key"
assert config.api_key == "***"
# Host block workspace overrides root workspace
assert config.workspace_id == "override-ws"
assert config.ai_peer == "override-ai"
@ -154,10 +163,31 @@ class TestFromGlobalConfig:
def test_session_strategy_default_from_global_config(self, tmp_path):
"""from_global_config with no sessionStrategy should match dataclass default."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "key"}))
config_file.write_text(json.dumps({"apiKey": "***"}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.session_strategy == "per-directory"
def test_context_tokens_default_is_none(self, tmp_path):
"""Default context_tokens should be None (uncapped) unless explicitly set."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***"}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.context_tokens is None
def test_context_tokens_explicit_sets_cap(self, tmp_path):
"""Explicit contextTokens in config sets the cap."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***", "contextTokens": 1200}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.context_tokens == 1200
def test_context_tokens_explicit_overrides_default(self, tmp_path):
"""Explicit contextTokens in config should override the default."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***", "contextTokens": 2000}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.context_tokens == 2000
def test_context_tokens_host_block_wins(self, tmp_path):
"""Host block contextTokens should override root."""
config_file = tmp_path / "config.json"
@ -232,6 +262,20 @@ class TestFromGlobalConfig:
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.base_url == "http://root:9000"
def test_timeout_from_config_root(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"timeout": 75}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.timeout == 75.0
def test_request_timeout_alias_from_config_root(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"requestTimeout": "82.5"}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.timeout == 82.5
class TestResolveSessionName:
def test_manual_override(self):
@ -333,13 +377,14 @@ class TestResolveConfigPath:
hermes_home.mkdir()
local_cfg = hermes_home / "honcho.json"
local_cfg.write_text(json.dumps({
"apiKey": "local-key",
"apiKey": "***",
"workspace": "local-ws",
}))
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}):
with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
patch.object(Path, "home", return_value=tmp_path):
config = HonchoClientConfig.from_global_config()
assert config.api_key == "local-key"
assert config.api_key == "***"
assert config.workspace_id == "local-ws"
@ -500,46 +545,115 @@ class TestObservationModeMigration:
assert cfg.ai_observe_others is True
class TestInitOnSessionStart:
"""Tests for the initOnSessionStart config field."""
class TestGetHonchoClient:
def teardown_method(self):
reset_honcho_client()
def test_default_is_false(self):
@pytest.mark.skipif(
not importlib.util.find_spec("honcho"),
reason="honcho SDK not installed"
)
def test_passes_timeout_from_config(self):
fake_honcho = MagicMock(name="Honcho")
cfg = HonchoClientConfig(
api_key="test-key",
timeout=91.0,
workspace_id="hermes",
environment="production",
)
with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho:
client = get_honcho_client(cfg)
assert client is fake_honcho
mock_honcho.assert_called_once()
assert mock_honcho.call_args.kwargs["timeout"] == 91.0
@pytest.mark.skipif(
not importlib.util.find_spec("honcho"),
reason="honcho SDK not installed"
)
def test_hermes_config_timeout_override_used_when_config_timeout_missing(self):
fake_honcho = MagicMock(name="Honcho")
cfg = HonchoClientConfig(
api_key="test-key",
workspace_id="hermes",
environment="production",
)
with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
patch("hermes_cli.config.load_config", return_value={"honcho": {"timeout": 88}}):
client = get_honcho_client(cfg)
assert client is fake_honcho
mock_honcho.assert_called_once()
assert mock_honcho.call_args.kwargs["timeout"] == 88.0
@pytest.mark.skipif(
not importlib.util.find_spec("honcho"),
reason="honcho SDK not installed"
)
def test_hermes_request_timeout_alias_used(self):
fake_honcho = MagicMock(name="Honcho")
cfg = HonchoClientConfig(
api_key="test-key",
workspace_id="hermes",
environment="production",
)
with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
patch("hermes_cli.config.load_config", return_value={"honcho": {"request_timeout": "77.5"}}):
client = get_honcho_client(cfg)
assert client is fake_honcho
mock_honcho.assert_called_once()
assert mock_honcho.call_args.kwargs["timeout"] == 77.5
class TestResolveSessionNameGatewayKey:
"""Regression tests for gateway_session_key priority in resolve_session_name.
Ensures gateway platforms get stable per-chat Honcho sessions even when
sessionStrategy=per-session would otherwise create ephemeral sessions.
Regression: plugin refactor 924bc67e dropped gateway key plumbing.
"""
def test_gateway_key_overrides_per_session_strategy(self):
"""gateway_session_key must win over per-session session_id."""
config = HonchoClientConfig(session_strategy="per-session")
result = config.resolve_session_name(
session_id="20260412_171002_69bb38",
gateway_session_key="agent:main:telegram:dm:8439114563",
)
assert result == "agent-main-telegram-dm-8439114563"
def test_session_title_still_wins_over_gateway_key(self):
"""Explicit /title remap takes priority over gateway_session_key."""
config = HonchoClientConfig(session_strategy="per-session")
result = config.resolve_session_name(
session_title="my-custom-title",
session_id="20260412_171002_69bb38",
gateway_session_key="agent:main:telegram:dm:8439114563",
)
assert result == "my-custom-title"
def test_per_session_fallback_without_gateway_key(self):
"""Without gateway_session_key, per-session returns session_id (CLI path)."""
config = HonchoClientConfig(session_strategy="per-session")
result = config.resolve_session_name(
session_id="20260412_171002_69bb38",
gateway_session_key=None,
)
assert result == "20260412_171002_69bb38"
def test_gateway_key_sanitizes_special_chars(self):
"""Colons and other non-alphanumeric chars are replaced with hyphens."""
config = HonchoClientConfig()
assert config.init_on_session_start is False
def test_root_level_true(self, tmp_path):
cfg_file = tmp_path / "config.json"
cfg_file.write_text(json.dumps({
"apiKey": "k",
"initOnSessionStart": True,
}))
cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
assert cfg.init_on_session_start is True
def test_host_block_overrides_root(self, tmp_path):
cfg_file = tmp_path / "config.json"
cfg_file.write_text(json.dumps({
"apiKey": "k",
"initOnSessionStart": True,
"hosts": {"hermes": {"initOnSessionStart": False}},
}))
cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
assert cfg.init_on_session_start is False
def test_host_block_true_overrides_root_absent(self, tmp_path):
cfg_file = tmp_path / "config.json"
cfg_file.write_text(json.dumps({
"apiKey": "k",
"hosts": {"hermes": {"initOnSessionStart": True}},
}))
cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
assert cfg.init_on_session_start is True
def test_absent_everywhere_defaults_false(self, tmp_path):
cfg_file = tmp_path / "config.json"
cfg_file.write_text(json.dumps({"apiKey": "k"}))
cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
assert cfg.init_on_session_start is False
result = config.resolve_session_name(
gateway_session_key="agent:main:telegram:dm:8439114563",
)
assert result == "agent-main-telegram-dm-8439114563"
assert ":" not in result
class TestResetHonchoClient:
@ -549,3 +663,91 @@ class TestResetHonchoClient:
assert mod._honcho_client is not None
reset_honcho_client()
assert mod._honcho_client is None
class TestDialecticDepthParsing:
"""Tests for _parse_dialectic_depth and _parse_dialectic_depth_levels."""
def test_default_depth_is_1(self, tmp_path):
"""Default dialecticDepth should be 1."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***"}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth == 1
def test_depth_from_root(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": 2}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth == 2
def test_depth_host_block_wins(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "***",
"dialecticDepth": 1,
"hosts": {"hermes": {"dialecticDepth": 3}},
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth == 3
def test_depth_clamped_high(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": 10}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth == 3
def test_depth_clamped_low(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": -1}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth == 1
def test_depth_levels_default_none(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({"apiKey": "***"}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth_levels is None
def test_depth_levels_from_config(self, tmp_path):
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "***",
"dialecticDepth": 2,
"dialecticDepthLevels": ["minimal", "high"],
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth_levels == ["minimal", "high"]
def test_depth_levels_padded_if_short(self, tmp_path):
"""Array shorter than depth gets padded with 'low'."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "***",
"dialecticDepth": 3,
"dialecticDepthLevels": ["high"],
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth_levels == ["high", "low", "low"]
def test_depth_levels_truncated_if_long(self, tmp_path):
"""Array longer than depth gets truncated."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "***",
"dialecticDepth": 1,
"dialecticDepthLevels": ["high", "max", "medium"],
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth_levels == ["high"]
def test_depth_levels_invalid_values_default_to_low(self, tmp_path):
"""Invalid reasoning levels in the array fall back to 'low'."""
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"apiKey": "***",
"dialecticDepth": 2,
"dialecticDepthLevels": ["invalid", "high"],
}))
config = HonchoClientConfig.from_global_config(config_path=config_file)
assert config.dialectic_depth_levels == ["low", "high"]

View file

@ -205,27 +205,62 @@ class TestPeerLookupHelpers:
def test_get_peer_card_uses_direct_peer_lookup(self):
mgr, session = self._make_cached_manager()
user_peer = MagicMock()
user_peer.get_card.return_value = ["Name: Robert"]
mgr._get_or_create_peer = MagicMock(return_value=user_peer)
assistant_peer = MagicMock()
assistant_peer.get_card.return_value = ["Name: Robert"]
mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
assert mgr.get_peer_card(session.key) == ["Name: Robert"]
user_peer.get_card.assert_called_once_with()
assistant_peer.get_card.assert_called_once_with(target=session.user_peer_id)
def test_search_context_uses_peer_context_response(self):
def test_search_context_uses_assistant_perspective_with_target(self):
mgr, session = self._make_cached_manager()
user_peer = MagicMock()
user_peer.context.return_value = SimpleNamespace(
assistant_peer = MagicMock()
assistant_peer.context.return_value = SimpleNamespace(
representation="Robert runs neuralancer",
peer_card=["Location: Melbourne"],
)
mgr._get_or_create_peer = MagicMock(return_value=user_peer)
mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
result = mgr.search_context(session.key, "neuralancer")
assert "Robert runs neuralancer" in result
assert "- Location: Melbourne" in result
user_peer.context.assert_called_once_with(search_query="neuralancer")
assistant_peer.context.assert_called_once_with(
target=session.user_peer_id,
search_query="neuralancer",
)
def test_search_context_unified_mode_uses_user_self_context(self):
mgr, session = self._make_cached_manager()
mgr._ai_observe_others = False
user_peer = MagicMock()
user_peer.context.return_value = SimpleNamespace(
representation="Unified self context",
peer_card=["Name: Robert"],
)
mgr._get_or_create_peer = MagicMock(return_value=user_peer)
result = mgr.search_context(session.key, "self")
assert "Unified self context" in result
user_peer.context.assert_called_once_with(search_query="self")
def test_search_context_accepts_explicit_ai_peer_id(self):
mgr, session = self._make_cached_manager()
ai_peer = MagicMock()
ai_peer.context.return_value = SimpleNamespace(
representation="Assistant self context",
peer_card=["Role: Assistant"],
)
mgr._get_or_create_peer = MagicMock(return_value=ai_peer)
result = mgr.search_context(session.key, "assistant", peer=session.assistant_peer_id)
assert "Assistant self context" in result
ai_peer.context.assert_called_once_with(
target=session.assistant_peer_id,
search_query="assistant",
)
def test_get_prefetch_context_fetches_user_and_ai_from_peer_api(self):
mgr, session = self._make_cached_manager()
@ -235,9 +270,15 @@ class TestPeerLookupHelpers:
peer_card=["Name: Robert"],
)
ai_peer = MagicMock()
ai_peer.context.return_value = SimpleNamespace(
representation="AI representation",
peer_card=["Owner: Robert"],
ai_peer.context.side_effect = lambda **kwargs: SimpleNamespace(
representation=(
"AI representation" if kwargs.get("target") == session.assistant_peer_id
else "Mixed representation"
),
peer_card=(
["Role: Assistant"] if kwargs.get("target") == session.assistant_peer_id
else ["Name: Robert"]
),
)
mgr._get_or_create_peer = MagicMock(side_effect=[user_peer, ai_peer])
@ -247,17 +288,23 @@ class TestPeerLookupHelpers:
"representation": "User representation",
"card": "Name: Robert",
"ai_representation": "AI representation",
"ai_card": "Owner: Robert",
"ai_card": "Role: Assistant",
}
user_peer.context.assert_called_once_with()
ai_peer.context.assert_called_once_with()
user_peer.context.assert_called_once_with(target=session.user_peer_id)
ai_peer.context.assert_called_once_with(target=session.assistant_peer_id)
def test_get_ai_representation_uses_peer_api(self):
mgr, session = self._make_cached_manager()
ai_peer = MagicMock()
ai_peer.context.return_value = SimpleNamespace(
representation="AI representation",
peer_card=["Owner: Robert"],
ai_peer.context.side_effect = lambda **kwargs: SimpleNamespace(
representation=(
"AI representation" if kwargs.get("target") == session.assistant_peer_id
else "Mixed representation"
),
peer_card=(
["Role: Assistant"] if kwargs.get("target") == session.assistant_peer_id
else ["Name: Robert"]
),
)
mgr._get_or_create_peer = MagicMock(return_value=ai_peer)
@ -265,9 +312,167 @@ class TestPeerLookupHelpers:
assert result == {
"representation": "AI representation",
"card": "Owner: Robert",
"card": "Role: Assistant",
}
ai_peer.context.assert_called_once_with()
ai_peer.context.assert_called_once_with(target=session.assistant_peer_id)
def test_create_conclusion_defaults_to_user_target(self):
mgr, session = self._make_cached_manager()
assistant_peer = MagicMock()
scope = MagicMock()
assistant_peer.conclusions_of.return_value = scope
mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
ok = mgr.create_conclusion(session.key, "User prefers dark mode")
assert ok is True
assistant_peer.conclusions_of.assert_called_once_with(session.user_peer_id)
scope.create.assert_called_once_with([{
"content": "User prefers dark mode",
"session_id": session.honcho_session_id,
}])
def test_create_conclusion_can_target_ai_peer(self):
mgr, session = self._make_cached_manager()
assistant_peer = MagicMock()
scope = MagicMock()
assistant_peer.conclusions_of.return_value = scope
mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
ok = mgr.create_conclusion(session.key, "Assistant prefers terse summaries", peer="ai")
assert ok is True
assistant_peer.conclusions_of.assert_called_once_with(session.assistant_peer_id)
scope.create.assert_called_once_with([{
"content": "Assistant prefers terse summaries",
"session_id": session.honcho_session_id,
}])
def test_create_conclusion_accepts_explicit_user_peer_id(self):
mgr, session = self._make_cached_manager()
assistant_peer = MagicMock()
scope = MagicMock()
assistant_peer.conclusions_of.return_value = scope
mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
ok = mgr.create_conclusion(session.key, "Robert prefers vinyl", peer=session.user_peer_id)
assert ok is True
assistant_peer.conclusions_of.assert_called_once_with(session.user_peer_id)
scope.create.assert_called_once_with([{
"content": "Robert prefers vinyl",
"session_id": session.honcho_session_id,
}])
class TestConcludeToolDispatch:
def test_honcho_conclude_defaults_to_user_peer(self):
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._manager.create_conclusion.return_value = True
result = provider.handle_tool_call(
"honcho_conclude",
{"conclusion": "User prefers dark mode"},
)
assert "Conclusion saved for user" in result
provider._manager.create_conclusion.assert_called_once_with(
"telegram:123",
"User prefers dark mode",
peer="user",
)
def test_honcho_conclude_can_target_ai_peer(self):
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._manager.create_conclusion.return_value = True
result = provider.handle_tool_call(
"honcho_conclude",
{"conclusion": "Assistant likes terse replies", "peer": "ai"},
)
assert "Conclusion saved for ai" in result
provider._manager.create_conclusion.assert_called_once_with(
"telegram:123",
"Assistant likes terse replies",
peer="ai",
)
def test_honcho_profile_can_target_explicit_peer_id(self):
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._manager.get_peer_card.return_value = ["Role: Assistant"]
result = provider.handle_tool_call(
"honcho_profile",
{"peer": "hermes"},
)
assert "Role: Assistant" in result
provider._manager.get_peer_card.assert_called_once_with("telegram:123", peer="hermes")
def test_honcho_search_can_target_explicit_peer_id(self):
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._manager.search_context.return_value = "Assistant self context"
result = provider.handle_tool_call(
"honcho_search",
{"query": "assistant", "peer": "hermes"},
)
assert "Assistant self context" in result
provider._manager.search_context.assert_called_once_with(
"telegram:123",
"assistant",
max_tokens=800,
peer="hermes",
)
def test_honcho_reasoning_can_target_explicit_peer_id(self):
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
provider._manager.dialectic_query.return_value = "Assistant answer"
result = provider.handle_tool_call(
"honcho_reasoning",
{"query": "who are you", "peer": "hermes"},
)
assert "Assistant answer" in result
provider._manager.dialectic_query.assert_called_once_with(
"telegram:123",
"who are you",
reasoning_level=None,
peer="hermes",
)
def test_honcho_conclude_missing_both_params_returns_error(self):
"""Calling honcho_conclude with neither conclusion nor delete_id returns a tool error."""
import json
provider = HonchoMemoryProvider()
provider._session_initialized = True
provider._session_key = "telegram:123"
provider._manager = MagicMock()
result = provider.handle_tool_call("honcho_conclude", {})
parsed = json.loads(result)
assert "error" in parsed or "Missing required" in parsed.get("result", "")
provider._manager.create_conclusion.assert_not_called()
provider._manager.delete_conclusion.assert_not_called()
# ---------------------------------------------------------------------------
@ -366,6 +571,54 @@ class TestToolsModeInitBehavior:
assert cfg.peer_name == "8439114563"
class TestPerSessionMigrateGuard:
"""Verify migrate_memory_files is skipped under per-session strategy.
per-session creates a fresh Honcho session every Hermes run. Uploading
MEMORY.md/USER.md/SOUL.md to each short-lived session floods the backend
with duplicate content. The guard was added to prevent orphan sessions
containing only <prior_memory_file> wrappers.
"""
def _make_provider_with_strategy(self, strategy, init_on_session_start=True):
"""Create a HonchoMemoryProvider and track migrate_memory_files calls."""
from plugins.memory.honcho.client import HonchoClientConfig
from unittest.mock import patch, MagicMock
cfg = HonchoClientConfig(
api_key="test-key",
enabled=True,
recall_mode="tools",
init_on_session_start=init_on_session_start,
session_strategy=strategy,
)
provider = HonchoMemoryProvider()
mock_manager = MagicMock()
mock_session = MagicMock()
mock_session.messages = [] # empty = new session → triggers migration path
mock_manager.get_or_create.return_value = mock_session
with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
provider.initialize(session_id="test-session-001")
return provider, mock_manager
def test_migrate_skipped_for_per_session(self):
"""per-session strategy must NOT call migrate_memory_files."""
_, mock_manager = self._make_provider_with_strategy("per-session")
mock_manager.migrate_memory_files.assert_not_called()
def test_migrate_runs_for_per_directory(self):
"""per-directory strategy with empty session SHOULD call migrate_memory_files."""
_, mock_manager = self._make_provider_with_strategy("per-directory")
mock_manager.migrate_memory_files.assert_called_once()
class TestChunkMessage:
def test_short_message_single_chunk(self):
result = HonchoMemoryProvider._chunk_message("hello world", 100)
@ -420,6 +673,60 @@ class TestChunkMessage:
assert len(chunk) <= 25000
# ---------------------------------------------------------------------------
# Context token budget enforcement
# ---------------------------------------------------------------------------
class TestTruncateToBudget:
def test_truncates_oversized_context(self):
"""Text exceeding context_tokens budget is truncated at a word boundary."""
from plugins.memory.honcho.client import HonchoClientConfig
provider = HonchoMemoryProvider()
provider._config = HonchoClientConfig(context_tokens=10)
long_text = "word " * 200 # ~1000 chars, well over 10*4=40 char budget
result = provider._truncate_to_budget(long_text)
assert len(result) <= 50 # budget_chars + ellipsis + word boundary slack
assert result.endswith("")
def test_no_truncation_within_budget(self):
"""Text within budget passes through unchanged."""
from plugins.memory.honcho.client import HonchoClientConfig
provider = HonchoMemoryProvider()
provider._config = HonchoClientConfig(context_tokens=1000)
short_text = "Name: Robert, Location: Melbourne"
assert provider._truncate_to_budget(short_text) == short_text
def test_no_truncation_when_context_tokens_none(self):
"""When context_tokens is None (explicit opt-out), no truncation."""
from plugins.memory.honcho.client import HonchoClientConfig
provider = HonchoMemoryProvider()
provider._config = HonchoClientConfig(context_tokens=None)
long_text = "word " * 500
assert provider._truncate_to_budget(long_text) == long_text
def test_context_tokens_cap_bounds_prefetch(self):
"""With an explicit token budget, oversized prefetch is bounded."""
from plugins.memory.honcho.client import HonchoClientConfig
provider = HonchoMemoryProvider()
provider._config = HonchoClientConfig(context_tokens=1200)
# Simulate a massive representation (10k chars)
huge_text = "x" * 10000
result = provider._truncate_to_budget(huge_text)
# 1200 tokens * 4 chars = 4800 chars + " …"
assert len(result) <= 4805
# ---------------------------------------------------------------------------
# Dialectic input guard
# ---------------------------------------------------------------------------
@ -452,3 +759,387 @@ class TestDialecticInputGuard:
# The query passed to chat() should be truncated
actual_query = mock_peer.chat.call_args[0][0]
assert len(actual_query) <= 100
# ---------------------------------------------------------------------------
class TestDialecticCadenceDefaults:
"""Regression tests for dialectic_cadence default value."""
@staticmethod
def _make_provider(cfg_extra=None):
"""Create a HonchoMemoryProvider with mocked dependencies."""
from unittest.mock import patch, MagicMock
from plugins.memory.honcho.client import HonchoClientConfig
defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
if cfg_extra:
defaults.update(cfg_extra)
cfg = HonchoClientConfig(**defaults)
provider = HonchoMemoryProvider()
mock_manager = MagicMock()
mock_session = MagicMock()
mock_session.messages = []
mock_manager.get_or_create.return_value = mock_session
with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
provider.initialize(session_id="test-session-001")
return provider
def test_default_is_3(self):
"""Default dialectic_cadence should be 3 to avoid per-turn LLM calls."""
provider = self._make_provider()
assert provider._dialectic_cadence == 3
def test_config_override(self):
"""dialecticCadence from config overrides the default."""
provider = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 5}})
assert provider._dialectic_cadence == 5
class TestBaseContextSummary:
"""Base context injection should include session summary when available."""
def test_format_includes_summary(self):
"""Session summary should appear first in the formatted context."""
provider = HonchoMemoryProvider()
ctx = {
"summary": "Testing Honcho tools and dialectic depth.",
"representation": "Eri is a developer.",
"card": "Name: Eri Barrett",
}
formatted = provider._format_first_turn_context(ctx)
assert "## Session Summary" in formatted
assert formatted.index("Session Summary") < formatted.index("User Representation")
def test_format_without_summary(self):
"""No summary key means no summary section."""
provider = HonchoMemoryProvider()
ctx = {"representation": "Eri is a developer.", "card": "Name: Eri"}
formatted = provider._format_first_turn_context(ctx)
assert "Session Summary" not in formatted
assert "User Representation" in formatted
def test_format_empty_summary_skipped(self):
"""Empty summary string should not produce a section."""
provider = HonchoMemoryProvider()
ctx = {"summary": "", "representation": "rep", "card": "card"}
formatted = provider._format_first_turn_context(ctx)
assert "Session Summary" not in formatted
class TestDialecticDepth:
"""Tests for the dialecticDepth multi-pass system."""
@staticmethod
def _make_provider(cfg_extra=None):
from unittest.mock import patch, MagicMock
from plugins.memory.honcho.client import HonchoClientConfig
defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
if cfg_extra:
defaults.update(cfg_extra)
cfg = HonchoClientConfig(**defaults)
provider = HonchoMemoryProvider()
mock_manager = MagicMock()
mock_session = MagicMock()
mock_session.messages = []
mock_manager.get_or_create.return_value = mock_session
with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
provider.initialize(session_id="test-session-001")
return provider
def test_default_depth_is_1(self):
"""Default dialecticDepth should be 1 — single .chat() call."""
provider = self._make_provider()
assert provider._dialectic_depth == 1
def test_depth_from_config(self):
"""dialecticDepth from config sets the depth."""
provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
assert provider._dialectic_depth == 2
def test_depth_clamped_to_3(self):
"""dialecticDepth > 3 gets clamped to 3."""
provider = self._make_provider(cfg_extra={"dialectic_depth": 7})
assert provider._dialectic_depth == 3
def test_depth_clamped_to_1(self):
"""dialecticDepth < 1 gets clamped to 1."""
provider = self._make_provider(cfg_extra={"dialectic_depth": 0})
assert provider._dialectic_depth == 1
def test_depth_levels_from_config(self):
"""dialecticDepthLevels array is read from config."""
provider = self._make_provider(cfg_extra={
"dialectic_depth": 2,
"dialectic_depth_levels": ["minimal", "high"],
})
assert provider._dialectic_depth_levels == ["minimal", "high"]
def test_depth_levels_none_by_default(self):
"""When dialecticDepthLevels is not configured, it's None."""
provider = self._make_provider()
assert provider._dialectic_depth_levels is None
def test_resolve_pass_level_uses_depth_levels(self):
"""Per-pass levels from dialecticDepthLevels override proportional."""
provider = self._make_provider(cfg_extra={
"dialectic_depth": 2,
"dialectic_depth_levels": ["minimal", "high"],
})
assert provider._resolve_pass_level(0) == "minimal"
assert provider._resolve_pass_level(1) == "high"
def test_resolve_pass_level_proportional_depth_1(self):
"""Depth 1 pass 0 uses the base reasoning level."""
provider = self._make_provider(cfg_extra={
"dialectic_depth": 1,
"dialectic_reasoning_level": "medium",
})
assert provider._resolve_pass_level(0) == "medium"
def test_resolve_pass_level_proportional_depth_2(self):
"""Depth 2: pass 0 is minimal, pass 1 is base level."""
provider = self._make_provider(cfg_extra={
"dialectic_depth": 2,
"dialectic_reasoning_level": "high",
})
assert provider._resolve_pass_level(0) == "minimal"
assert provider._resolve_pass_level(1) == "high"
def test_cold_start_prompt(self):
"""Cold start (no base context) uses general user query."""
provider = self._make_provider()
prompt = provider._build_dialectic_prompt(0, [], is_cold=True)
assert "preferences" in prompt.lower()
assert "session" not in prompt.lower()
def test_warm_session_prompt(self):
"""Warm session (has context) uses session-scoped query."""
provider = self._make_provider()
prompt = provider._build_dialectic_prompt(0, [], is_cold=False)
assert "session" in prompt.lower()
assert "current conversation" in prompt.lower()
def test_signal_sufficient_short_response(self):
"""Short responses are not sufficient signal."""
assert not HonchoMemoryProvider._signal_sufficient("ok")
assert not HonchoMemoryProvider._signal_sufficient("")
assert not HonchoMemoryProvider._signal_sufficient(None)
def test_signal_sufficient_structured_response(self):
"""Structured responses with bullets/headers are sufficient."""
result = "## Current State\n- Working on Honcho PR\n- Testing dialectic depth\n" + "x" * 50
assert HonchoMemoryProvider._signal_sufficient(result)
def test_signal_sufficient_long_unstructured(self):
"""Long responses are sufficient even without structure."""
assert HonchoMemoryProvider._signal_sufficient("a" * 301)
def test_run_dialectic_depth_single_pass(self):
"""Depth 1 makes exactly one .chat() call."""
from unittest.mock import MagicMock
provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
provider._manager = MagicMock()
provider._manager.dialectic_query.return_value = "user prefers zero-fluff"
provider._session_key = "test"
provider._base_context_cache = None # cold start
result = provider._run_dialectic_depth("hello")
assert result == "user prefers zero-fluff"
assert provider._manager.dialectic_query.call_count == 1
def test_run_dialectic_depth_two_passes(self):
"""Depth 2 makes two .chat() calls when pass 1 signal is weak."""
from unittest.mock import MagicMock
provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
provider._manager = MagicMock()
provider._manager.dialectic_query.side_effect = [
"thin response", # pass 0: weak signal
"## Synthesis\n- Grounded in evidence\n- Current PR work\n" + "x" * 100, # pass 1: strong
]
provider._session_key = "test"
provider._base_context_cache = "existing context"
result = provider._run_dialectic_depth("test query")
assert provider._manager.dialectic_query.call_count == 2
assert "Synthesis" in result
def test_first_turn_runs_dialectic_synchronously(self):
"""First turn should fire the dialectic synchronously (cold start)."""
from unittest.mock import MagicMock, patch
provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
provider._manager = MagicMock()
provider._manager.dialectic_query.return_value = "cold start synthesis"
provider._manager.get_prefetch_context.return_value = None
provider._manager.pop_context_result.return_value = None
provider._session_key = "test"
provider._base_context_cache = "" # cold start
provider._last_dialectic_turn = -999 # never fired
result = provider.prefetch("hello world")
assert "cold start synthesis" in result
assert provider._manager.dialectic_query.call_count == 1
# After first-turn sync, _last_dialectic_turn should be updated
assert provider._last_dialectic_turn != -999
def test_first_turn_dialectic_does_not_double_fire(self):
"""After first-turn sync dialectic, queue_prefetch should skip (cadence)."""
from unittest.mock import MagicMock
provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
provider._manager = MagicMock()
provider._manager.dialectic_query.return_value = "cold start synthesis"
provider._manager.get_prefetch_context.return_value = None
provider._manager.pop_context_result.return_value = None
provider._session_key = "test"
provider._base_context_cache = ""
provider._last_dialectic_turn = -999
provider._turn_count = 0
# First turn fires sync dialectic
provider.prefetch("hello")
assert provider._manager.dialectic_query.call_count == 1
# Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3)
provider._manager.dialectic_query.reset_mock()
provider.queue_prefetch("hello")
assert provider._manager.dialectic_query.call_count == 0
def test_run_dialectic_depth_bails_early_on_strong_signal(self):
"""Depth 2 skips pass 1 when pass 0 returns strong signal."""
from unittest.mock import MagicMock
provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
provider._manager = MagicMock()
provider._manager.dialectic_query.return_value = (
"## Full Assessment\n- Strong structured response\n- With evidence\n" + "x" * 200
)
provider._session_key = "test"
provider._base_context_cache = "existing context"
result = provider._run_dialectic_depth("test query")
# Only 1 call because pass 0 had sufficient signal
assert provider._manager.dialectic_query.call_count == 1
# ---------------------------------------------------------------------------
# set_peer_card None guard
# ---------------------------------------------------------------------------
class TestSetPeerCardNoneGuard:
"""set_peer_card must return None (not raise) when peer ID cannot be resolved."""
def _make_manager(self):
from plugins.memory.honcho.client import HonchoClientConfig
from plugins.memory.honcho.session import HonchoSessionManager
cfg = HonchoClientConfig(api_key="test-key", enabled=True)
mgr = HonchoSessionManager.__new__(HonchoSessionManager)
mgr._cache = {}
mgr._sessions_cache = {}
mgr._config = cfg
return mgr
def test_returns_none_when_peer_resolves_to_none(self):
"""set_peer_card returns None when _resolve_peer_id returns None."""
from unittest.mock import patch
mgr = self._make_manager()
session = HonchoSession(
key="test",
honcho_session_id="sid",
user_peer_id="user-peer",
assistant_peer_id="ai-peer",
)
mgr._cache["test"] = session
with patch.object(mgr, "_resolve_peer_id", return_value=None):
result = mgr.set_peer_card("test", ["fact 1", "fact 2"], peer="ghost")
assert result is None
def test_returns_none_when_session_missing(self):
"""set_peer_card returns None when session key is not in cache."""
mgr = self._make_manager()
result = mgr.set_peer_card("nonexistent", ["fact"], peer="user")
assert result is None
# ---------------------------------------------------------------------------
# get_session_context cache-miss fallback respects peer param
# ---------------------------------------------------------------------------
class TestGetSessionContextFallback:
"""get_session_context fallback must honour the peer param when honcho_session is absent."""
def _make_manager_with_session(self, user_peer_id="user-peer", assistant_peer_id="ai-peer"):
from plugins.memory.honcho.client import HonchoClientConfig
from plugins.memory.honcho.session import HonchoSessionManager
cfg = HonchoClientConfig(api_key="test-key", enabled=True)
mgr = HonchoSessionManager.__new__(HonchoSessionManager)
mgr._cache = {}
mgr._sessions_cache = {}
mgr._config = cfg
mgr._dialectic_dynamic = True
mgr._dialectic_reasoning_level = "low"
mgr._dialectic_max_input_chars = 10000
mgr._ai_observe_others = True
session = HonchoSession(
key="test",
honcho_session_id="sid-missing-from-sessions-cache",
user_peer_id=user_peer_id,
assistant_peer_id=assistant_peer_id,
)
mgr._cache["test"] = session
# Deliberately NOT adding to _sessions_cache to trigger fallback path
return mgr
def test_fallback_uses_user_peer_for_user(self):
"""On cache miss, peer='user' fetches user peer context."""
mgr = self._make_manager_with_session()
fetch_calls = []
def _fake_fetch(peer_id, search_query=None, *, target=None):
fetch_calls.append((peer_id, target))
return {"representation": "user rep", "card": []}
mgr._fetch_peer_context = _fake_fetch
mgr.get_session_context("test", peer="user")
assert len(fetch_calls) == 1
peer_id, target = fetch_calls[0]
assert peer_id == "user-peer"
assert target == "user-peer"
def test_fallback_uses_ai_peer_for_ai(self):
"""On cache miss, peer='ai' fetches assistant peer context, not user."""
mgr = self._make_manager_with_session()
fetch_calls = []
def _fake_fetch(peer_id, search_query=None, *, target=None):
fetch_calls.append((peer_id, target))
return {"representation": "ai rep", "card": []}
mgr._fetch_peer_context = _fake_fetch
mgr.get_session_context("test", peer="ai")
assert len(fetch_calls) == 1
peer_id, target = fetch_calls[0]
assert peer_id == "ai-peer", f"expected ai-peer, got {peer_id}"
assert target == "ai-peer"

View file

@ -0,0 +1,139 @@
"""Tests for interrupt handling in concurrent tool execution."""
import concurrent.futures
import threading
import time
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture(autouse=True)
def _isolate_hermes(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
(tmp_path / ".hermes").mkdir(exist_ok=True)
def _make_agent(monkeypatch):
"""Create a minimal AIAgent-like object with just the methods under test."""
monkeypatch.setenv("OPENROUTER_API_KEY", "")
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "")
# Avoid full AIAgent init — just import the class and build a stub
import run_agent as _ra
class _Stub:
_interrupt_requested = False
log_prefix = ""
quiet_mode = True
verbose_logging = False
log_prefix_chars = 200
_checkpoint_mgr = MagicMock(enabled=False)
_subdirectory_hints = MagicMock()
tool_progress_callback = None
tool_start_callback = None
tool_complete_callback = None
_todo_store = MagicMock()
_session_db = None
valid_tool_names = set()
_turns_since_memory = 0
_iters_since_skill = 0
_current_tool = None
_last_activity = 0
_print_fn = print
def _touch_activity(self, desc):
self._last_activity = time.time()
def _vprint(self, msg, force=False):
pass
def _safe_print(self, msg):
pass
def _should_emit_quiet_tool_messages(self):
return False
def _should_start_quiet_spinner(self):
return False
def _has_stream_consumers(self):
return False
stub = _Stub()
# Bind the real methods
stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
stub._invoke_tool = MagicMock(side_effect=lambda *a, **kw: '{"ok": true}')
return stub
class _FakeToolCall:
def __init__(self, name, args="{}", call_id="tc_1"):
self.function = MagicMock(name=name, arguments=args)
self.function.name = name
self.id = call_id
class _FakeAssistantMsg:
def __init__(self, tool_calls):
self.tool_calls = tool_calls
def test_concurrent_interrupt_cancels_pending(monkeypatch):
"""When _interrupt_requested is set during concurrent execution,
the wait loop should exit early and cancelled tools get interrupt messages."""
agent = _make_agent(monkeypatch)
# Create a tool that blocks until interrupted
barrier = threading.Event()
original_invoke = agent._invoke_tool
def slow_tool(name, args, task_id, call_id=None):
if name == "slow_one":
# Block until the test sets the interrupt
barrier.wait(timeout=10)
return '{"slow": true}'
return '{"fast": true}'
agent._invoke_tool = MagicMock(side_effect=slow_tool)
tc1 = _FakeToolCall("fast_one", call_id="tc_fast")
tc2 = _FakeToolCall("slow_one", call_id="tc_slow")
msg = _FakeAssistantMsg([tc1, tc2])
messages = []
def _set_interrupt_after_delay():
time.sleep(0.3)
agent._interrupt_requested = True
barrier.set() # unblock the slow tool
t = threading.Thread(target=_set_interrupt_after_delay)
t.start()
agent._execute_tool_calls_concurrent(msg, messages, "test_task")
t.join()
# Both tools should have results in messages
assert len(messages) == 2
# The interrupt was detected
assert agent._interrupt_requested is True
def test_concurrent_preflight_interrupt_skips_all(monkeypatch):
"""When _interrupt_requested is already set before concurrent execution,
all tools are skipped with cancellation messages."""
agent = _make_agent(monkeypatch)
agent._interrupt_requested = True
tc1 = _FakeToolCall("tool_a", call_id="tc_a")
tc2 = _FakeToolCall("tool_b", call_id="tc_b")
msg = _FakeAssistantMsg([tc1, tc2])
messages = []
agent._execute_tool_calls_concurrent(msg, messages, "test_task")
assert len(messages) == 2
assert "skipped due to user interrupt" in messages[0]["content"]
assert "skipped due to user interrupt" in messages[1]["content"]
# _invoke_tool should never have been called
agent._invoke_tool.assert_not_called()

View file

@ -9,6 +9,8 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
if custom_providers is not None:
cfg["custom_providers"] = custom_providers
base_url = model_cfg.get("base_url", "")
with (
patch("hermes_cli.config.load_config", return_value=cfg),
patch("agent.model_metadata.get_model_context_length", return_value=128_000),
@ -21,6 +23,7 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
agent = AIAgent(
model=model,
api_key="test-key-1234567890",
base_url=base_url,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,

View file

@ -805,7 +805,10 @@ class TestCodexReasoningPreflight:
reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
assert len(reasoning_items) == 1
assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
assert reasoning_items[0]["id"] == "r_001"
# Note: "id" is intentionally excluded from normalized output —
# with store=False the API returns 404 on server-side id resolution.
# The id is only used for local deduplication via seen_ids.
assert "id" not in reasoning_items[0]
assert reasoning_items[0]["summary"] == [{"type": "summary_text", "text": "Thinking about it"}]
def test_reasoning_item_without_id(self, monkeypatch):

View file

@ -928,6 +928,7 @@ class TestBuildApiKwargs:
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 4096
def test_qwen_portal_formats_messages_and_metadata(self, agent):
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
@ -984,6 +985,46 @@ class TestBuildApiKwargs:
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 65536
def test_ollama_think_false_on_effort_none(self, agent):
"""Custom (Ollama) provider with effort=none should inject think=false."""
agent.provider = "custom"
agent.base_url = "http://localhost:11434/v1"
agent._base_url_lower = agent.base_url.lower()
agent.reasoning_config = {"effort": "none"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs.get("extra_body", {}).get("think") is False
def test_ollama_think_false_on_enabled_false(self, agent):
"""Custom (Ollama) provider with enabled=false should inject think=false."""
agent.provider = "custom"
agent.base_url = "http://localhost:11434/v1"
agent._base_url_lower = agent.base_url.lower()
agent.reasoning_config = {"enabled": False}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs.get("extra_body", {}).get("think") is False
def test_ollama_no_think_param_when_reasoning_enabled(self, agent):
"""Custom provider with reasoning enabled should NOT inject think=false."""
agent.provider = "custom"
agent.base_url = "http://localhost:11434/v1"
agent._base_url_lower = agent.base_url.lower()
agent.reasoning_config = {"enabled": True, "effort": "medium"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs.get("extra_body", {}).get("think") is None
def test_non_custom_provider_unaffected(self, agent):
"""OpenRouter provider with effort=none should NOT inject think=false."""
agent.provider = "openrouter"
agent.model = "qwen/qwen3.5-plus-02-15"
agent.reasoning_config = {"effort": "none"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs.get("extra_body", {}).get("think") is None
class TestBuildAssistantMessage:
def test_basic_message(self, agent):
@ -2202,6 +2243,114 @@ class TestRunConversation:
assert second_call_messages[-1]["role"] == "user"
assert "truncated by the output length limit" in second_call_messages[-1]["content"]
def test_ollama_glm_stop_after_tools_without_terminal_boundary_requests_continuation(self, agent):
"""Ollama-hosted GLM responses can misreport truncated output as stop."""
self._setup_agent(agent)
agent.base_url = "http://localhost:11434/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "glm-5.1:cloud"
tool_turn = _mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
)
misreported_stop = _mock_response(
content="Based on the search results, the best next",
finish_reason="stop",
)
continued = _mock_response(
content=" step is to update the config.",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [
tool_turn,
misreported_stop,
continued,
]
with (
patch("run_agent.handle_function_call", return_value="search result"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello")
assert result["completed"] is True
assert result["api_calls"] == 3
assert (
result["final_response"]
== "Based on the search results, the best next step is to update the config."
)
third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
assert third_call_messages[-1]["role"] == "user"
assert "truncated by the output length limit" in third_call_messages[-1]["content"]
def test_ollama_glm_stop_with_terminal_boundary_does_not_continue(self, agent):
"""Complete Ollama/GLM responses should not be reclassified as truncated."""
self._setup_agent(agent)
agent.base_url = "http://localhost:11434/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "glm-5.1:cloud"
tool_turn = _mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
)
complete_stop = _mock_response(
content="Based on the search results, the best next step is to update the config.",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [tool_turn, complete_stop]
with (
patch("run_agent.handle_function_call", return_value="search result"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello")
assert result["completed"] is True
assert result["api_calls"] == 2
assert (
result["final_response"]
== "Based on the search results, the best next step is to update the config."
)
def test_non_ollama_stop_without_terminal_boundary_does_not_continue(self, agent):
"""The stop->length workaround should stay scoped to Ollama/GLM backends."""
self._setup_agent(agent)
agent.base_url = "https://api.openai.com/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "gpt-4o-mini"
tool_turn = _mock_response(
content="",
finish_reason="tool_calls",
tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
)
normal_stop = _mock_response(
content="Based on the search results, the best next",
finish_reason="stop",
)
agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop]
with (
patch("run_agent.handle_function_call", return_value="search result"),
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("hello")
assert result["completed"] is True
assert result["api_calls"] == 2
assert result["final_response"] == "Based on the search results, the best next"
def test_length_thinking_exhausted_skips_continuation(self, agent):
"""When finish_reason='length' but content is only thinking, skip retries."""
self._setup_agent(agent)
@ -3998,3 +4147,63 @@ class TestDeadRetryCode:
f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
f"but found {occurrences}"
)
class TestMemoryContextSanitization:
"""run_conversation() must strip leaked <memory-context> blocks from user input."""
def test_memory_context_stripped_from_user_message(self):
"""Verify that <memory-context> blocks are removed before the message
enters the conversation loop prevents stale Honcho injection from
leaking into user text."""
import inspect
src = inspect.getsource(AIAgent.run_conversation)
# The sanitize_context call must appear in run_conversation's preamble
assert "sanitize_context(user_message)" in src
assert "sanitize_context(persist_user_message)" in src
def test_sanitize_context_strips_full_block(self):
"""End-to-end: a user message with an embedded memory-context block
is cleaned to just the actual user text."""
from agent.memory_manager import sanitize_context
user_text = "how is the honcho working"
injected = (
user_text + "\n\n"
"<memory-context>\n"
"[System note: The following is recalled memory context, "
"NOT new user input. Treat as informational background data.]\n\n"
"## User Representation\n"
"[2026-01-13 02:13:00] stale observation about AstroMap\n"
"</memory-context>"
)
result = sanitize_context(injected)
assert "memory-context" not in result.lower()
assert "stale observation" not in result
assert "how is the honcho working" in result
class TestMemoryProviderTurnStart:
"""run_conversation() must call memory_manager.on_turn_start() before prefetch_all().
Without this call, providers like Honcho never update _turn_count, so cadence
checks (contextCadence, dialecticCadence) are always satisfied every turn
fires both context refresh and dialectic, ignoring the configured cadence.
"""
def test_on_turn_start_called_before_prefetch(self):
"""Source-level check: on_turn_start appears before prefetch_all in run_conversation."""
import inspect
src = inspect.getsource(AIAgent.run_conversation)
# Find the actual method calls, not comments
idx_turn_start = src.index(".on_turn_start(")
idx_prefetch = src.index(".prefetch_all(")
assert idx_turn_start < idx_prefetch, (
"on_turn_start() must be called before prefetch_all() in run_conversation "
"so that memory providers have the correct turn count for cadence checks"
)
def test_on_turn_start_uses_user_turn_count(self):
"""Source-level check: on_turn_start receives self._user_turn_count."""
import inspect
src = inspect.getsource(AIAgent.run_conversation)
assert "on_turn_start(self._user_turn_count" in src

View file

@ -160,7 +160,9 @@ class TestExchangeAuthCode:
assert flow.state == "saved-state"
assert flow.code_verifier == "saved-verifier"
assert flow.fetch_token_calls == [{"code": "4/test-auth-code"}]
assert json.loads(setup_module.TOKEN_PATH.read_text())["token"] == "access-token"
saved = json.loads(setup_module.TOKEN_PATH.read_text())
assert saved["token"] == "access-token"
assert saved["type"] == "authorized_user"
assert not setup_module.PENDING_AUTH_PATH.exists()
def test_extracts_code_from_redirect_url_and_checks_state(self, setup_module):

View file

@ -46,6 +46,12 @@ def api_module(monkeypatch, tmp_path):
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
# Ensure the gws CLI code path is taken even when the binary isn't
# installed (CI). Without this, calendar_list() falls through to the
# Python SDK path which imports ``googleapiclient`` — not in deps.
module._gws_binary = lambda: "/usr/bin/gws"
# Bypass authentication check — no real token file in CI.
module._ensure_authenticated = lambda: None
return module
@ -94,6 +100,7 @@ def test_bridge_refreshes_expired_token(bridge_module, tmp_path):
# Verify persisted
saved = json.loads(token_path.read_text())
assert saved["token"] == "ya29.refreshed"
assert saved["type"] == "authorized_user"
def test_bridge_exits_on_missing_token(bridge_module):
@ -124,35 +131,41 @@ def test_bridge_main_injects_token_env(bridge_module, tmp_path):
assert captured["cmd"] == ["gws", "gmail", "+triage"]
def test_api_calendar_list_uses_agenda_by_default(api_module):
"""calendar list without dates uses +agenda helper."""
def test_api_calendar_list_uses_events_list(api_module):
"""calendar_list calls _run_gws with events list + params."""
captured = {}
def capture_run(cmd, **kwargs):
captured["cmd"] = cmd
return MagicMock(returncode=0)
return MagicMock(returncode=0, stdout="{}", stderr="")
args = api_module.argparse.Namespace(
start="", end="", max=25, calendar="primary", func=api_module.calendar_list,
)
with patch.object(subprocess, "run", side_effect=capture_run):
with pytest.raises(SystemExit):
api_module.calendar_list(args)
with patch.object(api_module.subprocess, "run", side_effect=capture_run):
api_module.calendar_list(args)
gws_args = captured["cmd"][2:] # skip python + bridge path
assert "calendar" in gws_args
assert "+agenda" in gws_args
assert "--days" in gws_args
cmd = captured["cmd"]
# _gws_binary() returns "/usr/bin/gws", so cmd[0] is that binary
assert cmd[0] == "/usr/bin/gws"
assert "calendar" in cmd
assert "events" in cmd
assert "list" in cmd
assert "--params" in cmd
params = json.loads(cmd[cmd.index("--params") + 1])
assert "timeMin" in params
assert "timeMax" in params
assert params["calendarId"] == "primary"
def test_api_calendar_list_respects_date_range(api_module):
"""calendar list with --start/--end uses raw events list API."""
"""calendar list with --start/--end passes correct time bounds."""
captured = {}
def capture_run(cmd, **kwargs):
captured["cmd"] = cmd
return MagicMock(returncode=0)
return MagicMock(returncode=0, stdout="{}", stderr="")
args = api_module.argparse.Namespace(
start="2026-04-01T00:00:00Z",
@ -162,14 +175,62 @@ def test_api_calendar_list_respects_date_range(api_module):
func=api_module.calendar_list,
)
with patch.object(subprocess, "run", side_effect=capture_run):
with pytest.raises(SystemExit):
api_module.calendar_list(args)
with patch.object(api_module.subprocess, "run", side_effect=capture_run):
api_module.calendar_list(args)
gws_args = captured["cmd"][2:]
assert "events" in gws_args
assert "list" in gws_args
params_idx = gws_args.index("--params")
params = json.loads(gws_args[params_idx + 1])
cmd = captured["cmd"]
params_idx = cmd.index("--params")
params = json.loads(cmd[params_idx + 1])
assert params["timeMin"] == "2026-04-01T00:00:00Z"
assert params["timeMax"] == "2026-04-07T23:59:59Z"
def test_api_get_credentials_refresh_persists_authorized_user_type(api_module, monkeypatch):
token_path = api_module.TOKEN_PATH
_write_token(token_path, token="ya29.old")
class FakeCredentials:
def __init__(self):
self.expired = True
self.refresh_token = "1//refresh"
self.valid = True
def refresh(self, request):
self.expired = False
def to_json(self):
return json.dumps({
"token": "ya29.refreshed",
"refresh_token": "1//refresh",
"client_id": "123.apps.googleusercontent.com",
"client_secret": "secret",
"token_uri": "https://oauth2.googleapis.com/token",
})
class FakeCredentialsModule:
@staticmethod
def from_authorized_user_file(filename, scopes):
assert filename == str(token_path)
assert scopes == api_module.SCOPES
return FakeCredentials()
google_module = types.ModuleType("google")
oauth2_module = types.ModuleType("google.oauth2")
credentials_module = types.ModuleType("google.oauth2.credentials")
credentials_module.Credentials = FakeCredentialsModule
transport_module = types.ModuleType("google.auth.transport")
requests_module = types.ModuleType("google.auth.transport.requests")
requests_module.Request = lambda: object()
monkeypatch.setitem(sys.modules, "google", google_module)
monkeypatch.setitem(sys.modules, "google.oauth2", oauth2_module)
monkeypatch.setitem(sys.modules, "google.oauth2.credentials", credentials_module)
monkeypatch.setitem(sys.modules, "google.auth.transport", transport_module)
monkeypatch.setitem(sys.modules, "google.auth.transport.requests", requests_module)
creds = api_module.get_credentials()
saved = json.loads(token_path.read_text())
assert isinstance(creds, FakeCredentials)
assert saved["token"] == "ya29.refreshed"
assert saved["type"] == "authorized_user"

View file

@ -1,8 +1,8 @@
"""Persistence tests for the Camofox browser backend.
Tests that managed persistence uses stable identity while default mode
uses random identity. The actual browser profile persistence is handled
by the Camofox server (when CAMOFOX_PROFILE_DIR is set).
uses random identity. Camofox automatically maps each userId to a
dedicated persistent Firefox profile on the server side.
"""
import json

View file

@ -0,0 +1,166 @@
"""Tests for cloud browser provider runtime fallback to local Chromium.
Covers the fallback logic in _get_session_info() when a cloud provider
is configured but fails at runtime (issue #10883).
"""
import logging
from unittest.mock import Mock, patch
import pytest
import tools.browser_tool as browser_tool
def _reset_session_state(monkeypatch):
"""Clear caches so each test starts fresh."""
monkeypatch.setattr(browser_tool, "_active_sessions", {})
monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None)
monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False)
monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None)
class TestCloudProviderRuntimeFallback:
"""Tests for _get_session_info cloud → local fallback."""
def test_cloud_failure_falls_back_to_local(self, monkeypatch):
"""When cloud provider.create_session raises, fall back to local."""
_reset_session_state(monkeypatch)
provider = Mock()
provider.create_session.side_effect = RuntimeError("401 Unauthorized")
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
session = browser_tool._get_session_info("task-1")
assert session["fallback_from_cloud"] is True
assert "401 Unauthorized" in session["fallback_reason"]
assert session["fallback_provider"] == "Mock"
assert session["features"]["local"] is True
assert session["cdp_url"] is None
def test_cloud_success_no_fallback(self, monkeypatch):
"""When cloud succeeds, no fallback markers are present."""
_reset_session_state(monkeypatch)
provider = Mock()
provider.create_session.return_value = {
"session_name": "cloud-sess",
"bb_session_id": "bb_123",
"cdp_url": None,
"features": {"browser_use": True},
}
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
session = browser_tool._get_session_info("task-2")
assert session["session_name"] == "cloud-sess"
assert "fallback_from_cloud" not in session
assert "fallback_reason" not in session
def test_cloud_and_local_both_fail(self, monkeypatch):
"""When both cloud and local fail, raise RuntimeError with both contexts."""
_reset_session_state(monkeypatch)
provider = Mock()
provider.create_session.side_effect = RuntimeError("cloud boom")
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
monkeypatch.setattr(
browser_tool, "_create_local_session",
Mock(side_effect=OSError("no chromium")),
)
with pytest.raises(RuntimeError, match="cloud boom.*local.*no chromium"):
browser_tool._get_session_info("task-3")
def test_no_provider_uses_local_directly(self, monkeypatch):
"""When no cloud provider is configured, local mode is used with no fallback markers."""
_reset_session_state(monkeypatch)
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
session = browser_tool._get_session_info("task-4")
assert session["features"]["local"] is True
assert "fallback_from_cloud" not in session
def test_cdp_override_bypasses_provider(self, monkeypatch):
"""CDP override takes priority — cloud provider is never consulted."""
_reset_session_state(monkeypatch)
provider = Mock()
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://host:9222/devtools/browser/abc")
session = browser_tool._get_session_info("task-5")
provider.create_session.assert_not_called()
assert session["cdp_url"] == "ws://host:9222/devtools/browser/abc"
def test_fallback_logs_warning_with_provider_name(self, monkeypatch, caplog):
"""Fallback emits a warning log with the provider class name and error."""
_reset_session_state(monkeypatch)
BrowserUseProviderFake = type("BrowserUseProvider", (), {
"create_session": Mock(side_effect=ConnectionError("timeout")),
})
provider = BrowserUseProviderFake()
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
with caplog.at_level(logging.WARNING, logger="tools.browser_tool"):
session = browser_tool._get_session_info("task-6")
assert session["fallback_from_cloud"] is True
assert any("BrowserUseProvider" in r.message and "timeout" in r.message
for r in caplog.records)
def test_cloud_failure_does_not_poison_next_task(self, monkeypatch):
"""A fallback for one task_id doesn't affect a new task_id when cloud recovers."""
_reset_session_state(monkeypatch)
call_count = 0
def create_session_flaky(task_id):
nonlocal call_count
call_count += 1
if call_count == 1:
raise RuntimeError("transient failure")
return {
"session_name": "cloud-ok",
"bb_session_id": "bb_999",
"cdp_url": None,
"features": {"browser_use": True},
}
provider = Mock()
provider.create_session.side_effect = create_session_flaky
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
# First call fails → fallback
s1 = browser_tool._get_session_info("task-a")
assert s1["fallback_from_cloud"] is True
# Second call (different task) → cloud succeeds
s2 = browser_tool._get_session_info("task-b")
assert "fallback_from_cloud" not in s2
assert s2["session_name"] == "cloud-ok"
def test_cloud_returns_invalid_session_triggers_fallback(self, monkeypatch):
"""Cloud provider returning None or empty dict triggers fallback."""
_reset_session_state(monkeypatch)
provider = Mock()
provider.create_session.return_value = None
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
session = browser_tool._get_session_info("task-7")
assert session["fallback_from_cloud"] is True
assert "invalid session" in session["fallback_reason"]

View file

@ -123,7 +123,7 @@ class TestSendMatrix:
session.put.assert_called_once()
call_kwargs = session.put.call_args
url = call_kwargs[0][0]
assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/!room:example.com/send/m.room.message/")
assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/%21room%3Aexample.com/send/m.room.message/")
assert call_kwargs[1]["headers"]["Authorization"] == "Bearer syt_tok"
payload = call_kwargs[1]["json"]
assert payload["msgtype"] == "m.text"

View file

@ -12,6 +12,7 @@ from gateway.config import Platform
from tools.send_message_tool import (
_parse_target_ref,
_send_discord,
_send_matrix_via_adapter,
_send_telegram,
_send_to_platform,
send_message_tool,
@ -576,7 +577,7 @@ class TestSendToPlatformChunking:
sent_calls = []
async def fake_send(token, chat_id, message, media_files=None, thread_id=None):
async def fake_send(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False):
sent_calls.append(media_files or [])
return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(len(sent_calls))}
@ -594,6 +595,103 @@ class TestSendToPlatformChunking:
assert all(call == [] for call in sent_calls[:-1])
assert sent_calls[-1] == media
def test_matrix_media_uses_native_adapter_helper(self):
doc_path = Path("/tmp/test-send-message-matrix.pdf")
doc_path.write_bytes(b"%PDF-1.4 test")
try:
helper = AsyncMock(return_value={"success": True, "platform": "matrix", "chat_id": "!room:example.com", "message_id": "$evt"})
with patch("tools.send_message_tool._send_matrix_via_adapter", helper):
result = asyncio.run(
_send_to_platform(
Platform.MATRIX,
SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
"!room:example.com",
"here you go",
media_files=[(str(doc_path), False)],
)
)
assert result["success"] is True
helper.assert_awaited_once()
call = helper.await_args
assert call.args[1] == "!room:example.com"
assert call.args[2] == "here you go"
assert call.kwargs["media_files"] == [(str(doc_path), False)]
finally:
doc_path.unlink(missing_ok=True)
def test_matrix_text_only_uses_lightweight_path(self):
"""Text-only Matrix sends should NOT go through the heavy adapter path."""
helper = AsyncMock()
lightweight = AsyncMock(return_value={"success": True, "platform": "matrix", "chat_id": "!room:ex.com", "message_id": "$txt"})
with patch("tools.send_message_tool._send_matrix_via_adapter", helper), \
patch("tools.send_message_tool._send_matrix", lightweight):
result = asyncio.run(
_send_to_platform(
Platform.MATRIX,
SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
"!room:ex.com",
"just text, no files",
)
)
assert result["success"] is True
helper.assert_not_awaited()
lightweight.assert_awaited_once()
def test_send_matrix_via_adapter_sends_document(self, tmp_path):
file_path = tmp_path / "report.pdf"
file_path.write_bytes(b"%PDF-1.4 test")
calls = []
class FakeAdapter:
def __init__(self, _config):
self.connected = False
async def connect(self):
self.connected = True
calls.append(("connect",))
return True
async def send(self, chat_id, message, metadata=None):
calls.append(("send", chat_id, message, metadata))
return SimpleNamespace(success=True, message_id="$text")
async def send_document(self, chat_id, file_path, metadata=None):
calls.append(("send_document", chat_id, file_path, metadata))
return SimpleNamespace(success=True, message_id="$file")
async def disconnect(self):
calls.append(("disconnect",))
fake_module = SimpleNamespace(MatrixAdapter=FakeAdapter)
with patch.dict(sys.modules, {"gateway.platforms.matrix": fake_module}):
result = asyncio.run(
_send_matrix_via_adapter(
SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
"!room:example.com",
"report attached",
media_files=[(str(file_path), False)],
)
)
assert result == {
"success": True,
"platform": "matrix",
"chat_id": "!room:example.com",
"message_id": "$file",
}
assert calls == [
("connect",),
("send", "!room:example.com", "report attached", None),
("send_document", "!room:example.com", str(file_path), None),
("disconnect",),
]
# ---------------------------------------------------------------------------
# HTML auto-detection in Telegram send
@ -658,6 +756,17 @@ class TestSendTelegramHtmlDetection:
kwargs = bot.send_message.await_args.kwargs
assert kwargs["parse_mode"] == "MarkdownV2"
def test_disable_link_previews_sets_disable_web_page_preview(self, monkeypatch):
bot = self._make_bot()
_install_telegram_mock(monkeypatch, bot)
asyncio.run(
_send_telegram("tok", "123", "https://example.com", disable_link_previews=True)
)
kwargs = bot.send_message.await_args.kwargs
assert kwargs["disable_web_page_preview"] is True
def test_html_with_code_and_pre_tags(self, monkeypatch):
bot = self._make_bot()
_install_telegram_mock(monkeypatch, bot)
@ -707,6 +816,23 @@ class TestSendTelegramHtmlDetection:
second_call = bot.send_message.await_args_list[1].kwargs
assert second_call["parse_mode"] is None
def test_transient_bad_gateway_retries_text_send(self, monkeypatch):
bot = self._make_bot()
bot.send_message = AsyncMock(
side_effect=[
Exception("502 Bad Gateway"),
SimpleNamespace(message_id=2),
]
)
_install_telegram_mock(monkeypatch, bot)
with patch("asyncio.sleep", new=AsyncMock()) as sleep_mock:
result = asyncio.run(_send_telegram("tok", "123", "hello"))
assert result["success"] is True
assert bot.send_message.await_count == 2
sleep_mock.assert_awaited_once()
# ---------------------------------------------------------------------------
# Tests for Discord thread_id support

View file

@ -873,12 +873,37 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
if provider is None:
session_info = _create_local_session(task_id)
else:
session_info = provider.create_session(task_id)
if session_info.get("cdp_url"):
# Some cloud providers (including Browser-Use v3) return an HTTP
# CDP discovery URL instead of a raw websocket endpoint.
session_info = dict(session_info)
session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
try:
session_info = provider.create_session(task_id)
# Validate cloud provider returned a usable session
if not session_info or not isinstance(session_info, dict):
raise ValueError(f"Cloud provider returned invalid session: {session_info!r}")
if session_info.get("cdp_url"):
# Some cloud providers (including Browser-Use v3) return an HTTP
# CDP discovery URL instead of a raw websocket endpoint.
session_info = dict(session_info)
session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
except Exception as e:
provider_name = type(provider).__name__
logger.warning(
"Cloud provider %s failed (%s); attempting fallback to local "
"Chromium for task %s",
provider_name, e, task_id,
exc_info=True,
)
try:
session_info = _create_local_session(task_id)
except Exception as local_error:
raise RuntimeError(
f"Cloud provider {provider_name} failed ({e}) and local "
f"fallback also failed ({local_error})"
) from e
# Mark session as degraded for observability
if isinstance(session_info, dict):
session_info = dict(session_info)
session_info["fallback_from_cloud"] = True
session_info["fallback_reason"] = str(e)
session_info["fallback_provider"] = provider_name
with _cleanup_lock:
# Double-check: another thread may have created a session while we

View file

@ -988,7 +988,8 @@ def execute_code(
# (terminal.env_passthrough) are passed through.
_SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
"TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
"XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
"HERMES_")
_SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
"PASSWD", "AUTH")
try:
@ -1015,10 +1016,13 @@ def execute_code(
_existing_pp = child_env.get("PYTHONPATH", "")
child_env["PYTHONPATH"] = _hermes_root + (os.pathsep + _existing_pp if _existing_pp else "")
# Inject user's configured timezone so datetime.now() in sandboxed
# code reflects the correct wall-clock time.
# code reflects the correct wall-clock time. Only TZ is set —
# HERMES_TIMEZONE is an internal Hermes setting and must not leak
# into child processes.
_tz_name = os.getenv("HERMES_TIMEZONE", "").strip()
if _tz_name:
child_env["TZ"] = _tz_name
child_env.pop("HERMES_TIMEZONE", None)
# Per-profile HOME isolation: redirect system tool configs into
# {HERMES_HOME}/home/ when that directory exists.

View file

@ -807,21 +807,61 @@ def delegate_task(
)
futures[future] = i
for future in as_completed(futures):
try:
entry = future.result()
except Exception as exc:
idx = futures[future]
entry = {
"task_index": idx,
"status": "error",
"summary": None,
"error": str(exc),
"api_calls": 0,
"duration_seconds": 0,
}
results.append(entry)
completed_count += 1
# Poll futures with interrupt checking. as_completed() blocks
# until ALL futures finish — if a child agent gets stuck,
# the parent blocks forever even after interrupt propagation.
# Instead, use wait() with a short timeout so we can bail
# when the parent is interrupted.
pending = set(futures.keys())
while pending:
if getattr(parent_agent, "_interrupt_requested", False) is True:
# Parent interrupted — collect whatever finished and
# abandon the rest. Children already received the
# interrupt signal; we just can't wait forever.
for f in pending:
idx = futures[f]
if f.done():
try:
entry = f.result()
except Exception as exc:
entry = {
"task_index": idx,
"status": "error",
"summary": None,
"error": str(exc),
"api_calls": 0,
"duration_seconds": 0,
}
else:
entry = {
"task_index": idx,
"status": "interrupted",
"summary": None,
"error": "Parent agent interrupted — child did not finish in time",
"api_calls": 0,
"duration_seconds": 0,
}
results.append(entry)
completed_count += 1
break
from concurrent.futures import wait as _cf_wait, FIRST_COMPLETED
done, pending = _cf_wait(pending, timeout=0.5, return_when=FIRST_COMPLETED)
for future in done:
try:
entry = future.result()
except Exception as exc:
idx = futures[future]
entry = {
"task_index": idx,
"status": "error",
"summary": None,
"error": str(exc),
"api_calls": 0,
"duration_seconds": 0,
}
results.append(entry)
completed_count += 1
# Print per-task completion line above the spinner
idx = entry["task_index"]

View file

@ -1166,6 +1166,14 @@ class MCPServerTask:
_servers: Dict[str, MCPServerTask] = {}
# Circuit breaker: consecutive error counts per server. After
# _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns
# a "server unreachable" message that tells the model to stop retrying,
# preventing the 90-iteration burn loop described in #10447.
# Reset to 0 on any successful call.
_server_error_counts: Dict[str, int] = {}
_CIRCUIT_BREAKER_THRESHOLD = 3
# Dedicated event loop running in a background daemon thread.
_mcp_loop: Optional[asyncio.AbstractEventLoop] = None
_mcp_thread: Optional[threading.Thread] = None
@ -1356,9 +1364,23 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
"""
def _handler(args: dict, **kwargs) -> str:
# Circuit breaker: if this server has failed too many times
# consecutively, short-circuit with a clear message so the model
# stops retrying and uses alternative approaches (#10447).
if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD:
return json.dumps({
"error": (
f"MCP server '{server_name}' is unreachable after "
f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. "
f"Do NOT retry this tool — use alternative approaches "
f"or ask the user to check the MCP server."
)
}, ensure_ascii=False)
with _lock:
server = _servers.get(server_name)
if not server or not server.session:
_server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
return json.dumps({
"error": f"MCP server '{server_name}' is not connected"
}, ensure_ascii=False)
@ -1399,10 +1421,21 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
return json.dumps({"result": text_result}, ensure_ascii=False)
try:
return _run_on_mcp_loop(_call(), timeout=tool_timeout)
result = _run_on_mcp_loop(_call(), timeout=tool_timeout)
# Check if the MCP tool itself returned an error
try:
parsed = json.loads(result)
if "error" in parsed:
_server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
else:
_server_error_counts[server_name] = 0 # success — reset
except (json.JSONDecodeError, TypeError):
_server_error_counts[server_name] = 0 # non-JSON = success
return result
except InterruptedError:
return _interrupted_call_result()
except Exception as exc:
_server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
logger.error(
"MCP tool %s/%s call failed: %s",
server_name, tool_name, exc,

View file

@ -345,7 +345,7 @@ class ProcessRegistry:
pty_env = _sanitize_subprocess_env(os.environ, env_vars)
pty_env["PYTHONUNBUFFERED"] = "1"
pty_proc = _PtyProcessCls.spawn(
[user_shell, "-lic", command],
[user_shell, "-lic", f"set +m; {command}"],
cwd=session.cwd,
env=pty_env,
dimensions=(30, 120),
@ -386,7 +386,7 @@ class ProcessRegistry:
bg_env = _sanitize_subprocess_env(os.environ, env_vars)
bg_env["PYTHONUNBUFFERED"] = "1"
proc = subprocess.Popen(
[user_shell, "-lic", command],
[user_shell, "-lic", f"set +m; {command}"],
text=True,
cwd=session.cwd,
env=bg_env,

View file

@ -5,6 +5,7 @@ Sends a message to a user or channel on any connected messaging platform
human-friendly channel names to IDs. Works in both CLI and gateway contexts.
"""
import asyncio
import json
import logging
import os
@ -48,6 +49,49 @@ def _error(message: str) -> dict:
return {"error": _sanitize_error_text(message)}
def _telegram_retry_delay(exc: Exception, attempt: int) -> float | None:
retry_after = getattr(exc, "retry_after", None)
if retry_after is not None:
try:
return max(float(retry_after), 0.0)
except (TypeError, ValueError):
return 1.0
text = str(exc).lower()
if "timed out" in text or "timeout" in text:
return None
if (
"bad gateway" in text
or "502" in text
or "too many requests" in text
or "429" in text
or "service unavailable" in text
or "503" in text
or "gateway timeout" in text
or "504" in text
):
return float(2 ** attempt)
return None
async def _send_telegram_message_with_retry(bot, *, attempts: int = 3, **kwargs):
for attempt in range(attempts):
try:
return await bot.send_message(**kwargs)
except Exception as exc:
delay = _telegram_retry_delay(exc, attempt)
if delay is None or attempt >= attempts - 1:
raise
logger.warning(
"Transient Telegram send failure (attempt %d/%d), retrying in %.1fs: %s",
attempt + 1,
attempts,
delay,
_sanitize_error_text(exc),
)
await asyncio.sleep(delay)
SEND_MESSAGE_SCHEMA = {
"name": "send_message",
"description": (
@ -327,10 +371,16 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
"""
from gateway.config import Platform
from gateway.platforms.base import BasePlatformAdapter, utf16_len
from gateway.platforms.telegram import TelegramAdapter
from gateway.platforms.discord import DiscordAdapter
from gateway.platforms.slack import SlackAdapter
# Telegram adapter import is optional (requires python-telegram-bot)
try:
from gateway.platforms.telegram import TelegramAdapter
_telegram_available = True
except ImportError:
_telegram_available = False
# Feishu adapter import is optional (requires lark-oapi)
try:
from gateway.platforms.feishu import FeishuAdapter
@ -349,7 +399,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
# Platform message length limits (from adapter class attributes)
_MAX_LENGTHS = {
Platform.TELEGRAM: TelegramAdapter.MAX_MESSAGE_LENGTH,
Platform.TELEGRAM: TelegramAdapter.MAX_MESSAGE_LENGTH if _telegram_available else 4096,
Platform.DISCORD: DiscordAdapter.MAX_MESSAGE_LENGTH,
Platform.SLACK: SlackAdapter.MAX_MESSAGE_LENGTH,
}
@ -369,6 +419,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
# --- Telegram: special handling for media attachments ---
if platform == Platform.TELEGRAM:
last_result = None
disable_link_previews = bool(getattr(pconfig, "extra", {}) and pconfig.extra.get("disable_link_previews"))
for i, chunk in enumerate(chunks):
is_last = (i == len(chunks) - 1)
result = await _send_telegram(
@ -377,6 +428,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
chunk,
media_files=media_files if is_last else [],
thread_id=thread_id,
disable_link_previews=disable_link_previews,
)
if isinstance(result, dict) and result.get("error"):
return result
@ -404,11 +456,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
last_result = result
return last_result
# --- Matrix: use the native adapter helper when media is present ---
if platform == Platform.MATRIX and media_files:
last_result = None
for i, chunk in enumerate(chunks):
is_last = (i == len(chunks) - 1)
result = await _send_matrix_via_adapter(
pconfig,
chat_id,
chunk,
media_files=media_files if is_last else [],
thread_id=thread_id,
)
if isinstance(result, dict) and result.get("error"):
return result
last_result = result
return last_result
# --- Non-Telegram/Discord platforms ---
if media_files and not message.strip():
return {
"error": (
f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, and weixin; "
f"target {platform.value} had only media attachments"
)
}
@ -416,7 +485,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
if media_files:
warning = (
f"MEDIA attachments were omitted for {platform.value}; "
"native send_message media delivery is currently only supported for telegram, discord, and weixin"
"native send_message media delivery is currently only supported for telegram, discord, matrix, and weixin"
)
last_result = None
@ -461,7 +530,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
return last_result
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None):
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False):
"""Send via Telegram Bot API (one-shot, no polling needed).
Applies markdownMarkdownV2 formatting (same as the gateway adapter)
@ -497,13 +566,16 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
thread_kwargs = {}
if thread_id is not None:
thread_kwargs["message_thread_id"] = int(thread_id)
if disable_link_previews:
thread_kwargs["disable_web_page_preview"] = True
last_msg = None
warnings = []
if formatted.strip():
try:
last_msg = await bot.send_message(
last_msg = await _send_telegram_message_with_retry(
bot,
chat_id=int_chat_id, text=formatted,
parse_mode=send_parse_mode, **thread_kwargs
)
@ -523,7 +595,8 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
plain = message
else:
plain = message
last_msg = await bot.send_message(
last_msg = await _send_telegram_message_with_retry(
bot,
chat_id=int_chat_id, text=plain,
parse_mode=None, **thread_kwargs
)
@ -907,6 +980,66 @@ async def _send_matrix(token, extra, chat_id, message):
return _error(f"Matrix send failed: {e}")
async def _send_matrix_via_adapter(pconfig, chat_id, message, media_files=None, thread_id=None):
"""Send via the Matrix adapter so native Matrix media uploads are preserved."""
try:
from gateway.platforms.matrix import MatrixAdapter
except ImportError:
return {"error": "Matrix dependencies not installed. Run: pip install 'mautrix[encryption]'"}
media_files = media_files or []
try:
adapter = MatrixAdapter(pconfig)
connected = await adapter.connect()
if not connected:
return _error("Matrix connect failed")
metadata = {"thread_id": thread_id} if thread_id else None
last_result = None
if message.strip():
last_result = await adapter.send(chat_id, message, metadata=metadata)
if not last_result.success:
return _error(f"Matrix send failed: {last_result.error}")
for media_path, is_voice in media_files:
if not os.path.exists(media_path):
return _error(f"Media file not found: {media_path}")
ext = os.path.splitext(media_path)[1].lower()
if ext in _IMAGE_EXTS:
last_result = await adapter.send_image_file(chat_id, media_path, metadata=metadata)
elif ext in _VIDEO_EXTS:
last_result = await adapter.send_video(chat_id, media_path, metadata=metadata)
elif ext in _VOICE_EXTS and is_voice:
last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
elif ext in _AUDIO_EXTS:
last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
else:
last_result = await adapter.send_document(chat_id, media_path, metadata=metadata)
if not last_result.success:
return _error(f"Matrix media send failed: {last_result.error}")
if last_result is None:
return {"error": "No deliverable text or media remained after processing MEDIA tags"}
return {
"success": True,
"platform": "matrix",
"chat_id": chat_id,
"message_id": last_result.message_id,
}
except Exception as e:
return _error(f"Matrix send failed: {e}")
finally:
try:
await adapter.disconnect()
except Exception:
pass
async def _send_homeassistant(token, extra, chat_id, message):
"""Send via Home Assistant notify service."""
try:

View file

@ -1263,6 +1263,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
"related_skills": related_skills,
"content": content,
"path": rel_path,
"skill_dir": str(skill_dir) if skill_dir else None,
"linked_files": linked_files if linked_files else None,
"usage_hint": "To view linked files, call skill_view(name, file_path) where file_path is e.g. 'references/api.md' or 'assets/config.yaml'"
if linked_files

View file

@ -45,6 +45,7 @@ from hermes_constants import display_hermes_home
logger = logging.getLogger(__name__)
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
from tools.xai_http import hermes_xai_user_agent
# ---------------------------------------------------------------------------
# Lazy imports -- providers are imported only when actually used to avoid
@ -93,6 +94,11 @@ DEFAULT_MINIMAX_VOICE_ID = "English_Graceful_Lady"
DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2"
DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603"
DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" # Paul - Neutral
DEFAULT_XAI_VOICE_ID = "eve"
DEFAULT_XAI_LANGUAGE = "en"
DEFAULT_XAI_SAMPLE_RATE = 24000
DEFAULT_XAI_BIT_RATE = 128000
DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
def _get_default_output_dir() -> str:
from hermes_constants import get_hermes_dir
@ -299,6 +305,71 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
close()
# ===========================================================================
# Provider: xAI TTS
# ===========================================================================
def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
"""
Generate audio using xAI TTS.
xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
API shape, so this is implemented as a separate backend.
"""
import requests
api_key = os.getenv("XAI_API_KEY", "").strip()
if not api_key:
raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
xai_config = tts_config.get("xai", {})
voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID
language = str(xai_config.get("language", DEFAULT_XAI_LANGUAGE)).strip() or DEFAULT_XAI_LANGUAGE
sample_rate = int(xai_config.get("sample_rate", DEFAULT_XAI_SAMPLE_RATE))
bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE))
base_url = str(
xai_config.get("base_url")
or os.getenv("XAI_BASE_URL")
or DEFAULT_XAI_BASE_URL
).strip().rstrip("/")
# Match the documented minimal POST /v1/tts shape by default. Only send
# output_format when Hermes actually needs a non-default format/override.
codec = "wav" if output_path.endswith(".wav") else "mp3"
payload: Dict[str, Any] = {
"text": text,
"voice_id": voice_id,
"language": language,
}
if (
codec != "mp3"
or sample_rate != DEFAULT_XAI_SAMPLE_RATE
or (codec == "mp3" and bit_rate != DEFAULT_XAI_BIT_RATE)
):
output_format: Dict[str, Any] = {"codec": codec}
if sample_rate:
output_format["sample_rate"] = sample_rate
if codec == "mp3" and bit_rate:
output_format["bit_rate"] = bit_rate
payload["output_format"] = output_format
response = requests.post(
f"{base_url}/tts",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
"User-Agent": hermes_xai_user_agent(),
},
json=payload,
timeout=60,
)
response.raise_for_status()
with open(output_path, "wb") as f:
f.write(response.content)
return output_path
# ===========================================================================
# Provider: MiniMax TTS
# ===========================================================================
@ -600,6 +671,10 @@ def text_to_speech_tool(
logger.info("Generating speech with MiniMax TTS...")
_generate_minimax_tts(text, file_str, tts_config)
elif provider == "xai":
logger.info("Generating speech with xAI TTS...")
_generate_xai_tts(text, file_str, tts_config)
elif provider == "mistral":
try:
_import_mistral_client()
@ -661,7 +736,7 @@ def text_to_speech_tool(
# Try Opus conversion for Telegram compatibility
# Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
voice_compatible = False
if provider in ("edge", "neutts", "minimax") and not file_str.endswith(".ogg"):
if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
opus_path = _convert_to_opus(file_str)
if opus_path:
file_str = opus_path
@ -734,6 +809,8 @@ def check_tts_requirements() -> bool:
pass
if os.getenv("MINIMAX_API_KEY"):
return True
if os.getenv("XAI_API_KEY"):
return True
try:
_import_mistral_client()
if os.getenv("MISTRAL_API_KEY"):

12
tools/xai_http.py Normal file
View file

@ -0,0 +1,12 @@
"""Shared helpers for direct xAI HTTP integrations."""
from __future__ import annotations
def hermes_xai_user_agent() -> str:
"""Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
try:
from hermes_cli import __version__
except Exception:
__version__ = "unknown"
return f"Hermes-Agent/{__version__}"

View file

@ -151,7 +151,7 @@ TOOLSETS = {
},
"tts": {
"description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI",
"description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, OpenAI, or xAI",
"tools": ["text_to_speech"],
"includes": []
},

View file

@ -3,6 +3,7 @@
import json
import logging
import os
import stat
import tempfile
from pathlib import Path
from typing import Any, Union
@ -31,6 +32,31 @@ def env_var_enabled(name: str, default: str = "") -> bool:
return is_truthy_value(os.getenv(name, default), default=False)
def _preserve_file_mode(path: Path) -> "int | None":
"""Capture the permission bits of *path* if it exists, else ``None``."""
try:
return stat.S_IMODE(path.stat().st_mode) if path.exists() else None
except OSError:
return None
def _restore_file_mode(path: Path, mode: "int | None") -> None:
"""Re-apply *mode* to *path* after an atomic replace.
``tempfile.mkstemp`` creates files with 0o600 (owner-only). After
``os.replace`` swaps the temp file into place the target inherits
those restrictive permissions, breaking Docker / NAS volume mounts
that rely on broader permissions set by the user. Calling this
right after ``os.replace`` restores the original permissions.
"""
if mode is None:
return
try:
os.chmod(path, mode)
except OSError:
pass
def atomic_json_write(
path: Union[str, Path],
data: Any,
@ -54,6 +80,8 @@ def atomic_json_write(
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent),
prefix=f".{path.stem}_",
@ -71,6 +99,7 @@ def atomic_json_write(
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException:
# Intentionally catch BaseException so temp-file cleanup still runs for
# KeyboardInterrupt/SystemExit before re-raising the original signal.
@ -106,6 +135,8 @@ def atomic_yaml_write(
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent),
prefix=f".{path.stem}_",
@ -119,6 +150,7 @@ def atomic_yaml_write(
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException:
# Match atomic_json_write: cleanup must also happen for process-level
# interruptions before we re-raise them.

Some files were not shown because too many files have changed in this diff Show more