Merge remote-tracking branch 'origin/main' into bb/tui-long-session-perf

# Conflicts:
#	ui-tui/src/app/interfaces.ts
This commit is contained in:
Brooklyn Nicholson 2026-04-26 13:39:57 -05:00
commit cc16d0ef77
82 changed files with 6072 additions and 712 deletions

View file

@ -14,6 +14,7 @@ from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.config import get_env_value
import hermes_cli.auth as auth_mod
from hermes_cli.auth import (
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@ -1273,7 +1274,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
token = os.getenv("OPENROUTER_API_KEY", "").strip()
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
@ -1299,7 +1301,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
env_url = ""
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
env_vars = list(pconfig.api_key_env_vars)
if provider == "anthropic":
@ -1310,7 +1312,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
]
for env_var in env_vars:
token = os.getenv(env_var, "").strip()
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value(env_var) or "").strip()
if not token:
continue
source = f"env:{env_var}"

View file

@ -145,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
"claude": 200000,
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
"gpt-5.5": 400000,
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
# This hardcoded value is only reached when every probe misses.
"gpt-5.5": 1050000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
@ -164,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek
# DeepSeek — V4 family ships with a 1M context window. The legacy
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
# and inherit the same 1M window. The ``deepseek`` substring entry
# below remains as a 128K fallback for older / unknown DeepSeek model
# ids (e.g. via custom endpoints).
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
"deepseek": 128000,
# Meta
"llama": 131072,

View file

@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str:
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
# Buckets with reset windows shorter than this are treated as transient
# (upstream jitter, secondary throttling) rather than a genuine quota
# exhaustion worth a cross-session breaker trip.
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
def is_genuine_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
last_known_state: Optional[Any] = None,
) -> bool:
"""Decide whether a 429 from Nous Portal is a real account rate limit.
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
exhausted a genuine rate limit that will last until the
bucket resets.
(b) The upstream provider is out of capacity for a specific model
transient, clears in seconds, and has nothing to do with
the caller's quota on Nous.
Tripping the cross-session breaker on (b) blocks ALL Nous requests
(and all models, since Nous is one provider key) for minutes even
though the caller's account is healthy and a different model would
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
We tell the two apart by looking at:
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
the full suite on every response including 429s. An exhausted
bucket (``remaining == 0`` with a reset window >= 60s) is
proof of (a).
2. The last-known-good rate-limit state captured by
``_capture_rate_limits()`` on the previous successful
response. If any bucket there was already near-exhausted with
a substantial reset window, the current 429 is almost
certainly (a) continuing from that condition.
If neither signal fires, we treat the 429 as (b): fail the single
request, let the retry loop or model-switch proceed, and do NOT
write the cross-session breaker file.
Returns True when the evidence points at (a).
"""
# Signal 1: current 429 response headers.
state = _parse_buckets_from_headers(headers)
if _has_exhausted_bucket(state):
return True
# Signal 2: last-known-good state from a recent successful response.
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
# or a dict of bucket snapshots.
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
return True
return False
def _parse_buckets_from_headers(
headers: Optional[Mapping[str, str]],
) -> dict[str, tuple[Optional[int], Optional[float]]]:
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
Returns empty dict when no rate-limit headers are present.
"""
if not headers:
return {}
lowered = {k.lower(): v for k, v in headers.items()}
if not any(k.startswith("x-ratelimit-") for k in lowered):
return {}
def _maybe_int(raw: Optional[str]) -> Optional[int]:
if raw is None:
return None
try:
return int(float(raw))
except (TypeError, ValueError):
return None
def _maybe_float(raw: Optional[str]) -> Optional[float]:
if raw is None:
return None
try:
return float(raw)
except (TypeError, ValueError):
return None
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
if remaining is not None or reset is not None:
result[tag] = (remaining, reset)
return result
def _has_exhausted_bucket(
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
) -> bool:
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
for remaining, reset in buckets.values():
if remaining is None or remaining > 0:
continue
if reset is None:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
def _has_exhausted_bucket_in_object(state: Any) -> bool:
"""Check a RateLimitState-like object for an exhausted bucket.
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
exposed as attributes ``requests_min``, ``requests_hour``,
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
object missing those attributes.
"""
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
bucket = getattr(state, attr, None)
if bucket is None:
continue
limit = getattr(bucket, "limit", 0) or 0
remaining = getattr(bucket, "remaining", 0) or 0
# Prefer the adjusted "remaining_seconds_now" property when present;
# fall back to raw reset_seconds.
reset = getattr(bucket, "remaining_seconds_now", None)
if reset is None:
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
if limit <= 0:
continue
if remaining > 0:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False

144
agent/onboarding.py Normal file
View file

@ -0,0 +1,144 @@
"""
Contextual first-touch onboarding hints.
Instead of blocking first-run questionnaires, show a one-time hint the *first*
time a user hits a behavior fork message-while-running, first long-running
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
``onboarding.seen.<flag>``) and then never again.
Keep this module tiny and dependency-free so both the CLI and gateway can import
it without pulling in heavy modules.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------------
# Flag names (stable — used as config.yaml keys under onboarding.seen)
# -------------------------------------------------------------------------
BUSY_INPUT_FLAG = "busy_input_prompt"
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
# -------------------------------------------------------------------------
# Hint content
# -------------------------------------------------------------------------
def busy_input_hint_gateway(mode: str) -> str:
"""Hint shown the first time a user messages while the agent is busy.
``mode`` is the effective busy_input_mode that was just applied, so the
message matches reality ("I just interrupted…" vs "I just queued…").
"""
if mode == "queue":
return (
"💡 First-time tip — I queued your message instead of interrupting. "
"Send `/busy interrupt` to make new messages stop the current task "
"immediately, or `/busy status` to check. This notice won't appear again."
)
return (
"💡 First-time tip — I just interrupted my current task to answer you. "
"Send `/busy queue` to queue follow-ups for after the current task instead, "
"or `/busy status` to check. This notice won't appear again."
)
def busy_input_hint_cli(mode: str) -> str:
"""CLI version of the busy-input hint (plain text, no markdown)."""
if mode == "queue":
return (
"(tip) Your message was queued for the next turn. "
"Use /busy interrupt to make Enter stop the current run instead. "
"This tip only shows once."
)
return (
"(tip) Your message interrupted the current run. "
"Use /busy queue to queue messages for the next turn instead. "
"This tip only shows once."
)
def tool_progress_hint_gateway() -> str:
return (
"💡 First-time tip — that tool took a while and I'm streaming every step. "
"If the progress messages feel noisy, send `/verbose` to cycle modes "
"(all → new → off). This notice won't appear again."
)
def tool_progress_hint_cli() -> str:
return (
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
"display modes (all -> new -> off -> verbose). This tip only shows once."
)
# -------------------------------------------------------------------------
# State read / write
# -------------------------------------------------------------------------
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
if not isinstance(onboarding, Mapping):
return {}
seen = onboarding.get("seen")
return seen if isinstance(seen, Mapping) else {}
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
"""Return True if the user has already been shown this first-touch hint."""
return bool(_get_seen_dict(config).get(flag))
def mark_seen(config_path: Path, flag: str) -> bool:
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
Uses the atomic YAML writer so a concurrent process can't observe a
partially-written file. Returns True on success, False on any error
(including the config file being absent onboarding is best-effort).
"""
try:
import yaml
from utils import atomic_yaml_write
except Exception as e: # pragma: no cover — dependency issue
logger.debug("onboarding: failed to import yaml/utils: %s", e)
return False
try:
cfg: dict = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
if not isinstance(cfg.get("onboarding"), dict):
cfg["onboarding"] = {}
seen = cfg["onboarding"].get("seen")
if not isinstance(seen, dict):
seen = {}
cfg["onboarding"]["seen"] = seen
if seen.get(flag) is True:
return True # already marked — nothing to do
seen[flag] = True
atomic_yaml_write(config_path, cfg)
return True
except Exception as e:
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
return False
__all__ = [
"BUSY_INPUT_FLAG",
"TOOL_PROGRESS_FLAG",
"busy_input_hint_gateway",
"busy_input_hint_cli",
"tool_progress_hint_gateway",
"tool_progress_hint_cli",
"is_seen",
"mark_seen",
]

View file

@ -329,7 +329,7 @@ def build_skill_invocation_message(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
"you to follow its instructions. The full skill content is loaded below.]"
)
return _build_skill_message(
@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
"preloaded. Treat its instructions as active guidance for the duration of this "
"session unless the user overrides them.]"
)

View file

@ -824,7 +824,9 @@ delegation:
# Display
# =============================================================================
display:
# Use compact banner mode
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
# true: Compact single-line banner
# false: Full ASCII banner with tool/skill summary (default)
compact: false
# Tool progress display level (CLI and gateway)
@ -838,12 +840,15 @@ display:
# Gateway-only natural mid-turn assistant updates.
# When true, completed assistant status messages are sent as separate chat
# messages. This is independent of tool_progress and gateway streaming.
# true: Send mid-turn assistant updates as separate messages (default)
# false: Only send the final response
interim_assistant_messages: true
# What Enter does when Hermes is already busy in the CLI.
# What Enter does when Hermes is already busy (CLI and gateway platforms).
# interrupt: Interrupt the current run and redirect Hermes (default)
# queue: Queue your message for the next turn
# Ctrl+C always interrupts regardless of this setting.
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
busy_input_mode: interrupt
# Background process notifications (gateway/messaging only).
@ -859,17 +864,22 @@ display:
# Play terminal bell when agent finishes a response.
# Useful for long-running tasks — your terminal will ding when the agent is done.
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
# true: Ring the terminal bell on each response
# false: Silent (default)
bell_on_complete: false
# Show model reasoning/thinking before each response.
# When enabled, a dim box shows the model's thought process above the response.
# Toggle at runtime with /reasoning show or /reasoning hide.
# true: Show the reasoning box
# false: Hide reasoning (default)
show_reasoning: false
# Stream tokens to the terminal as they arrive instead of waiting for the
# full response. The response box opens on first token and text appears
# line-by-line. Tool calls are still captured silently.
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
# true: Stream tokens as they arrive (default)
# false: Wait for the full response before rendering
streaming: true
# ───────────────────────────────────────────────────────────────────────────
@ -879,10 +889,15 @@ display:
# response box label, and branding text. Change at runtime with /skin <name>.
#
# Built-in skins:
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
# daylight — Bright light-mode theme
# warm-lightmode — Warm paper-tone light-mode theme
# poseidon — Sea-green/teal Olympian theme
# sisyphus — Earthy stone-and-moss theme
# charizard — Fiery orange dragon theme
#
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
# Schema (all fields optional, missing values inherit from default):

214
cli.py
View file

@ -417,6 +417,11 @@ def load_cli_config() -> Dict[str, Any]:
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
},
"onboarding": {
# First-touch hint flags (see agent/onboarding.py). Each hint is
# shown once per install then latched here.
"seen": {},
},
}
# Track whether the config file explicitly set terminal config.
@ -1373,7 +1378,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
def _format_process_notification(evt: dict) -> "str | None":
"""Format a process notification event into a [SYSTEM: ...] message.
"""Format a process notification event into a [IMPORTANT: ...] message.
Handles both completion events (notify_on_complete) and watch pattern
match events from the unified completion_queue.
@ -1383,14 +1388,14 @@ def _format_process_notification(evt: dict) -> "str | None":
_cmd = evt.get("command", "unknown")
if evt_type == "watch_disabled":
return f"[SYSTEM: {evt.get('message', '')}]"
return f"[IMPORTANT: {evt.get('message', '')}]"
if evt_type == "watch_match":
_pat = evt.get("pattern", "?")
_out = evt.get("output", "")
_sup = evt.get("suppressed", 0)
text = (
f"[SYSTEM: Background process {_sid} matched "
f"[IMPORTANT: Background process {_sid} matched "
f"watch pattern \"{_pat}\".\n"
f"Command: {_cmd}\n"
f"Matched output:\n{_out}"
@ -1404,7 +1409,7 @@ def _format_process_notification(evt: dict) -> "str | None":
_exit = evt.get("exit_code", "?")
_out = evt.get("output", "")
return (
f"[SYSTEM: Background process {_sid} completed "
f"[IMPORTANT: Background process {_sid} completed "
f"(exit code {_exit}).\n"
f"Command: {_cmd}\n"
f"Output:\n{_out}]"
@ -4910,6 +4915,12 @@ class HermesCLI:
if self.agent:
self.agent.session_id = new_session_id
self.agent.session_start = now
# Redirect the JSON session log to the new branch session file so
# messages written after branching land in the correct file.
if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
self.agent.session_log_file = (
self.agent.logs_dir / f"session_{new_session_id}.json"
)
self.agent.reset_session_state()
if hasattr(self.agent, "_last_flushed_db_idx"):
self.agent._last_flushed_db_idx = len(self.conversation_history)
@ -5153,27 +5164,29 @@ class HermesCLI:
_cprint(f" ✓ Model switched: {result.new_model}")
_cprint(f" Provider: {provider_label}")
# Context: always resolve via the provider-aware chain so Codex OAuth,
# Copilot, and Nous-enforced caps win over the raw models.dev entry
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
mi = result.model_info
try:
from hermes_cli.model_switch import resolve_display_context_length
ctx = resolve_display_context_length(
result.new_model,
result.target_provider,
base_url=result.base_url or self.base_url or "",
api_key=result.api_key or self.api_key or "",
model_info=mi,
)
if ctx:
_cprint(f" Context: {ctx:,} tokens")
except Exception:
pass
if mi:
if mi.context_window:
_cprint(f" Context: {mi.context_window:,} tokens")
if mi.max_output:
_cprint(f" Max output: {mi.max_output:,} tokens")
if mi.has_cost_data():
_cprint(f" Cost: {mi.format_cost()}")
_cprint(f" Capabilities: {mi.format_capabilities()}")
else:
try:
from agent.model_metadata import get_model_context_length
ctx = get_model_context_length(
result.new_model,
base_url=result.base_url or self.base_url,
api_key=result.api_key or self.api_key,
provider=result.target_provider,
)
_cprint(f" Context: {ctx:,} tokens")
except Exception:
pass
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
@ -6122,8 +6135,6 @@ class HermesCLI:
self._handle_agents_command()
elif canonical == "background":
self._handle_background_command(cmd_original)
elif canonical == "btw":
self._handle_btw_command(cmd_original)
elif canonical == "queue":
# Extract prompt after "/queue " or "/q "
parts = cmd_original.split(None, 1)
@ -6410,122 +6421,6 @@ class HermesCLI:
self._background_tasks[task_id] = thread
thread.start()
def _handle_btw_command(self, cmd: str):
"""Handle /btw <question> — ephemeral side question using session context.
Snapshots the current conversation history, spawns a no-tools agent in
a background thread, and prints the answer without persisting anything
to the main session.
"""
parts = cmd.strip().split(maxsplit=1)
if len(parts) < 2 or not parts[1].strip():
_cprint(" Usage: /btw <question>")
_cprint(" Example: /btw what module owns session title sanitization?")
_cprint(" Answers using session context. No tools, not persisted.")
return
question = parts[1].strip()
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
if not self._ensure_runtime_credentials():
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
return
turn_route = self._resolve_turn_agent_config(question)
history_snapshot = list(self.conversation_history)
preview = question[:60] + ("..." if len(question) > 60 else "")
_cprint(f' 💬 /btw: "{preview}"')
def run_btw():
try:
btw_agent = AIAgent(
model=turn_route["model"],
api_key=turn_route["runtime"].get("api_key"),
base_url=turn_route["runtime"].get("base_url"),
provider=turn_route["runtime"].get("provider"),
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_iterations=8,
enabled_toolsets=[],
quiet_mode=True,
verbose_logging=False,
session_id=task_id,
platform="cli",
reasoning_config=self.reasoning_config,
service_tier=self.service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=self._providers_only,
providers_ignored=self._providers_ignore,
providers_order=self._providers_order,
provider_sort=self._provider_sort,
provider_require_parameters=self._provider_require_params,
provider_data_collection=self._provider_data_collection,
fallback_model=self._fallback_model,
session_db=None,
skip_memory=True,
skip_context_files=True,
persist_session=False,
)
btw_prompt = (
"[Ephemeral /btw side question. Answer using the conversation "
"context. No tools available. Be direct and concise.]\n\n"
+ question
)
result = btw_agent.run_conversation(
user_message=btw_prompt,
conversation_history=history_snapshot,
task_id=task_id,
)
response = (result.get("final_response") or "") if result else ""
if not response and result and result.get("error"):
response = f"Error: {result['error']}"
# TUI refresh before printing
if self._app:
self._app.invalidate()
time.sleep(0.05)
print()
if response:
try:
from hermes_cli.skin_engine import get_active_skin
_skin = get_active_skin()
_resp_color = _skin.get_color("response_border", "#4F6D4A")
except Exception:
_resp_color = "#4F6D4A"
ChatConsole().print(Panel(
_render_final_assistant_content(response, mode=self.final_response_markdown),
title=f"[{_resp_color} bold]⚕ /btw[/]",
title_align="left",
border_style=_resp_color,
box=rich_box.HORIZONTALS,
padding=(1, 4),
))
else:
_cprint(" 💬 /btw: (no response)")
if self.bell_on_complete:
sys.stdout.write("\a")
sys.stdout.flush()
except Exception as e:
if self._app:
self._app.invalidate()
time.sleep(0.05)
print()
_cprint(f" ❌ /btw failed: {e}")
finally:
if self._app:
self._invalidate(min_interval=0)
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
thread.start()
@staticmethod
def _try_launch_chrome_debug(port: int, system: str) -> bool:
"""Try to launch Chrome/Chromium with remote debugging enabled.
@ -7328,7 +7223,7 @@ class HermesCLI:
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
self.conversation_history.append({
"role": "user",
"content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
"content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
})
# Persist session immediately so the session log reflects the
@ -7410,6 +7305,31 @@ class HermesCLI:
_cprint(f" {line}")
except Exception:
pass
# First-touch onboarding: on the first tool in this process
# that takes longer than the threshold while we're in the
# noisiest progress mode, print a one-time hint about
# /verbose. Latched on self so it fires at most once per
# process; persisted to config.yaml so it never fires again
# across processes either.
try:
if (
not getattr(self, "_long_tool_hint_fired", False)
and self.tool_progress_mode == "all"
and duration >= 30.0
):
from agent.onboarding import (
TOOL_PROGRESS_FLAG,
is_seen,
mark_seen,
tool_progress_hint_cli,
)
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
self._long_tool_hint_fired = True
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
except Exception:
pass
self._invalidate()
return
if event_type != "tool.started":
@ -9293,6 +9213,24 @@ class HermesCLI:
f"agent_running={self._agent_running}\n")
except Exception:
pass
# First-touch onboarding: on the very first busy-while-running
# event for this install, print a one-line tip explaining the
# /busy knob. Flag persists to config.yaml and never fires
# again. Guarded for exceptions so onboarding can't break
# the input loop.
try:
from agent.onboarding import (
BUSY_INPUT_FLAG,
busy_input_hint_cli,
is_seen,
mark_seen,
)
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
except Exception:
pass
else:
self._pending_input.put(payload)
event.app.current_buffer.reset(append_to_history=True)
@ -9909,7 +9847,7 @@ class HermesCLI:
status = cli_ref._command_status or "Processing command..."
return f"{frame} {status}"
if cli_ref._agent_running:
return "type a message + Enter to interrupt, Ctrl+C to cancel"
return "msg=interrupt · /queue · /bg · /steer · Ctrl+C cancel"
if cli_ref._voice_mode:
return "type or Ctrl+B to record"
return ""

View file

@ -715,7 +715,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
# Always prepend cron execution guidance so the agent knows how
# delivery works and can suppress delivery when appropriate.
cron_hint = (
"[SYSTEM: You are running as a scheduled cron job. "
"[IMPORTANT: You are running as a scheduled cron job. "
"DELIVERY: Your final response will be automatically delivered "
"to the user — do NOT use send_message or try to deliver "
"the output yourself. Just produce your report/output as your "
@ -751,7 +751,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
parts.append("")
parts.extend(
[
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content,
]
@ -759,7 +759,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
if skipped:
notice = (
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
f"and were skipped: {', '.join(skipped)}. "
f"Start your response with a brief notice so the user is aware, e.g.: "
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"

View file

@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
echo "Warning: chown failed (rootless container?) — continuing anyway"
fi
# Ensure config.yaml is readable by the hermes runtime user even if it was
# edited on the host after initial ownership setup. Must run here (as root)
# rather than after the gosu drop, otherwise a non-root caller like
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
fi
echo "Dropping root privileges"
exec gosu hermes "$0" "$@"
fi
@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
fi
# Ensure the main config file remains accessible to the hermes runtime user
# even if it was edited on the host after initial ownership setup.
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml"
chmod 640 "$HERMES_HOME/config.yaml"
fi
# SOUL.md
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"

View file

@ -1025,7 +1025,20 @@ class BasePlatformAdapter(ABC):
self._post_delivery_callbacks: Dict[str, Any] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Chats where auto-TTS on voice input is disabled (set by /voice off)
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
# Per-chat overrides live in two sets populated from ``_voice_mode``:
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
# the global default is False.
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
# global default is True.
# The gate in _process_message() is:
# fire if chat in _auto_tts_enabled_chats
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
self._auto_tts_default: bool = False
self._auto_tts_enabled_chats: set = set()
self._auto_tts_disabled_chats: set = set()
# Chats where typing indicator is paused (e.g. during approval waits).
# _keep_typing skips send_typing when the chat_id is in this set.
@ -1047,6 +1060,21 @@ class BasePlatformAdapter(ABC):
def fatal_error_retryable(self) -> bool:
return self._fatal_error_retryable
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
"""Whether auto-TTS on voice input should fire for ``chat_id``.
Decision layers (Issue #16007):
1. Explicit ``/voice on`` or ``/voice tts`` always fire (even if
``voice.auto_tts`` is False).
2. Explicit ``/voice off`` never fire.
3. Fall back to the global ``voice.auto_tts`` config default.
"""
if chat_id in self._auto_tts_enabled_chats:
return True
if chat_id in self._auto_tts_disabled_chats:
return False
return bool(self._auto_tts_default)
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
self._fatal_error_handler = handler
@ -2214,12 +2242,14 @@ class BasePlatformAdapter(ABC):
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
# Skipped when the chat has voice mode disabled (/voice off)
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
# True globally and no ``/voice off`` has been issued.
_tts_path = None
if (event.message_type == MessageType.VOICE
if (self._should_auto_tts_for_chat(event.source.chat_id)
and event.message_type == MessageType.VOICE
and text_content
and not media_files
and event.source.chat_id not in self._auto_tts_disabled_chats):
and not media_files):
try:
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():

View file

@ -2315,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_background(interaction: discord.Interaction, prompt: str):
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
@tree.command(name="btw", description="Ephemeral side question using session context")
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
async def slash_btw(interaction: discord.Interaction, question: str):
await self._run_simple_slash(interaction, f"/btw {question}")
# ── Auto-register any gateway-available commands not yet on the tree ──
# This ensures new commands added to COMMAND_REGISTRY in
# hermes_cli/commands.py automatically appear as Discord slash

View file

@ -207,8 +207,31 @@ class SlackAdapter(BasePlatformAdapter):
async def handle_assistant_thread_context_changed(event, say):
await self._handle_assistant_thread_lifecycle_event(event)
# Register slash command handler
@self._app.command("/hermes")
# Register slash command handler(s)
#
# Every gateway command from COMMAND_REGISTRY is a native Slack
# slash, matching Discord and Telegram's model (e.g. /btw, /stop,
# /model work directly without /hermes prefix). A single regex
# matcher dispatches all of them to one handler so we don't need
# N identical @app.command() decorators.
#
# The slash commands must ALSO be declared in the Slack app
# manifest (see `hermes slack manifest`). In Socket Mode, Slack
# routes the command event through the socket regardless of the
# manifest's request URL, but it will not deliver an event for
# a slash command the manifest doesn't declare.
from hermes_cli.commands import slack_native_slashes
import re as _re
_slash_names = [name for name, _d, _h in slack_native_slashes()]
if _slash_names:
_slash_pattern = _re.compile(
r"^/(?:" + "|".join(_re.escape(n) for n in _slash_names) + r")$"
)
else: # pragma: no cover - registry always non-empty
_slash_pattern = _re.compile(r"^/hermes$")
@self._app.command(_slash_pattern)
async def handle_hermes_command(ack, command):
await ack()
await self._handle_slash_command(command)
@ -1561,7 +1584,20 @@ class SlackAdapter(BasePlatformAdapter):
return ""
async def _handle_slash_command(self, command: dict) -> None:
"""Handle /hermes slash command."""
"""Handle Slack slash commands.
Every gateway command in COMMAND_REGISTRY is registered as a native
Slack slash (``/btw``, ``/stop``, ``/model``, etc.), matching the
Discord and Telegram model. The slash name itself is the command;
any text after it is the argument list.
The legacy ``/hermes <subcommand> [args]`` form is preserved for
backward compatibility with older workspace manifests and for users
who want a single entry point for free-form questions (``/hermes
what's the weather`` — non-slash text is treated as a regular
message).
"""
slash_name = (command.get("command") or "").lstrip("/").strip()
text = command.get("text", "").strip()
user_id = command.get("user_id", "")
channel_id = command.get("channel_id", "")
@ -1571,20 +1607,25 @@ class SlackAdapter(BasePlatformAdapter):
if team_id and channel_id:
self._channel_team[channel_id] = team_id
# Map subcommands to gateway commands — derived from central registry.
# Also keep "compact" as a Slack-specific alias for /compress.
from hermes_cli.commands import slack_subcommand_map
subcommand_map = slack_subcommand_map()
subcommand_map["compact"] = "/compress"
first_word = text.split()[0] if text else ""
if first_word in subcommand_map:
# Preserve arguments after the subcommand
rest = text[len(first_word):].strip()
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
elif text:
pass # Treat as a regular question
if slash_name in ("hermes", ""):
# Legacy /hermes <subcommand> [args] routing + free-form questions.
# Empty slash_name falls into this branch for backward compat
# with any caller that didn't populate command["command"].
from hermes_cli.commands import slack_subcommand_map
subcommand_map = slack_subcommand_map()
subcommand_map["compact"] = "/compress"
first_word = text.split()[0] if text else ""
if first_word in subcommand_map:
rest = text[len(first_word):].strip()
text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word]
elif text:
pass # Treat as a regular question
else:
text = "/help"
else:
text = "/help"
# Native slash — /<slash_name> [args]. Route directly through the
# gateway command dispatcher by prepending the slash.
text = f"/{slash_name} {text}".strip()
source = self.build_source(
chat_id=channel_id,

View file

@ -591,20 +591,20 @@ def _parse_session_key(session_key: str) -> "dict | None":
def _format_gateway_process_notification(evt: dict) -> "str | None":
"""Format a watch pattern event from completion_queue into a [SYSTEM:] message."""
"""Format a watch pattern event from completion_queue into a [IMPORTANT:] message."""
evt_type = evt.get("type", "completion")
_sid = evt.get("session_id", "unknown")
_cmd = evt.get("command", "unknown")
if evt_type == "watch_disabled":
return f"[SYSTEM: {evt.get('message', '')}]"
return f"[IMPORTANT: {evt.get('message', '')}]"
if evt_type == "watch_match":
_pat = evt.get("pattern", "?")
_out = evt.get("output", "")
_sup = evt.get("suppressed", 0)
text = (
f"[SYSTEM: Background process {_sid} matched "
f"[IMPORTANT: Background process {_sid} matched "
f"watch pattern \"{_pat}\".\n"
f"Command: {_cmd}\n"
f"Matched output:\n{_out}"
@ -881,23 +881,74 @@ class GatewayRunner:
return
if disabled:
disabled_chats.add(chat_id)
# ``/voice off`` also clears any explicit enable — it's a hard override.
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if isinstance(enabled_chats, set):
enabled_chats.discard(chat_id)
else:
disabled_chats.discard(chat_id)
def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
"""Restore persisted /voice off state into a live platform adapter."""
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
if not isinstance(disabled_chats, set):
def _set_adapter_auto_tts_enabled(self, adapter, chat_id: str, enabled: bool) -> None:
"""Update an adapter's per-chat auto-TTS opt-in set if present.
Used for ``/voice on``/``/voice tts`` where the user explicitly wants
auto-TTS even when ``voice.auto_tts`` is False globally.
"""
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if not isinstance(enabled_chats, set):
return
if enabled:
enabled_chats.add(chat_id)
# An explicit opt-in clears any stale /voice off for this chat.
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
if isinstance(disabled_chats, set):
disabled_chats.discard(chat_id)
else:
enabled_chats.discard(chat_id)
def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
"""Restore persisted /voice state into a live platform adapter.
Populates three fields from config + ``self._voice_mode``:
- ``_auto_tts_default``: global default from ``voice.auto_tts``
- ``_auto_tts_enabled_chats``: chats with mode ``voice_only``/``all``
- ``_auto_tts_disabled_chats``: chats with mode ``off``
"""
platform = getattr(adapter, "platform", None)
if not isinstance(platform, Platform):
return
disabled_chats.clear()
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
if not isinstance(disabled_chats, set) and not isinstance(enabled_chats, set):
return
# Push the global voice.auto_tts default (config.yaml) onto the adapter.
# Lazy import to avoid adding a module-level dep from gateway → hermes_cli.
try:
from hermes_cli.config import load_config as _load_full_config
_full_cfg = _load_full_config()
_auto_tts_default = bool(
(_full_cfg.get("voice") or {}).get("auto_tts", False)
)
except Exception:
_auto_tts_default = False
if hasattr(adapter, "_auto_tts_default"):
adapter._auto_tts_default = _auto_tts_default
prefix = f"{platform.value}:"
disabled_chats.update(
key[len(prefix):] for key, mode in self._voice_mode.items()
if mode == "off" and key.startswith(prefix)
)
if isinstance(disabled_chats, set):
disabled_chats.clear()
disabled_chats.update(
key[len(prefix):] for key, mode in self._voice_mode.items()
if mode == "off" and key.startswith(prefix)
)
if isinstance(enabled_chats, set):
enabled_chats.clear()
enabled_chats.update(
key[len(prefix):] for key, mode in self._voice_mode.items()
if mode in ("voice_only", "all") and key.startswith(prefix)
)
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
"""Call adapter.disconnect() defensively, swallowing any error.
@ -1579,6 +1630,27 @@ class GatewayRunner:
f"I'll respond to your message shortly."
)
# First-touch onboarding: the very first time a user sends a message
# while the agent is busy, append a one-time hint explaining the
# queue/interrupt knob. Flag is persisted to config.yaml so it never
# fires again on this install.
try:
from agent.onboarding import (
BUSY_INPUT_FLAG,
busy_input_hint_gateway,
is_seen,
mark_seen,
)
_user_cfg = _load_gateway_config()
if not is_seen(_user_cfg, BUSY_INPUT_FLAG):
message = (
f"{message}\n\n"
f"{busy_input_hint_gateway('queue' if is_queue_mode else 'interrupt')}"
)
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
except Exception as _onb_err:
logger.debug("Failed to apply busy-input onboarding hint: %s", _onb_err)
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
try:
await adapter._send_with_retry(
@ -3426,6 +3498,8 @@ class GatewayRunner:
# /background must bypass the running-agent guard — it starts a
# parallel task and must never interrupt the active conversation.
# /btw is an alias of /background and resolves to the same canonical
# name, so this branch handles both commands.
if _cmd_def_inner and _cmd_def_inner.name == "background":
return await self._handle_background_command(event)
@ -3701,9 +3775,6 @@ class GatewayRunner:
if canonical == "background":
return await self._handle_background_command(event)
if canonical == "btw":
return await self._handle_btw_command(event)
if canonical == "steer":
# No active agent — /steer has no tool call to inject into.
# Strip the prefix so downstream treats it as a normal user
@ -4161,7 +4232,7 @@ class GatewayRunner:
if _loaded:
_loaded_skill, _skill_dir, _display_name = _loaded
_note = (
f'[SYSTEM: The "{_display_name}" skill is auto-loaded. '
f'[IMPORTANT: The "{_display_name}" skill is auto-loaded. '
f"Follow its instructions for this session.]"
)
_part = _build_skill_message(_loaded_skill, _skill_dir, _note)
@ -5977,7 +6048,7 @@ class GatewayRunner:
self._voice_mode[voice_key] = "voice_only"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return (
"Voice mode enabled.\n"
"I'll reply with voice when you send voice messages.\n"
@ -5993,7 +6064,7 @@ class GatewayRunner:
self._voice_mode[voice_key] = "all"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return (
"Auto-TTS enabled.\n"
"All replies will include a voice message."
@ -6032,7 +6103,7 @@ class GatewayRunner:
self._voice_mode[voice_key] = "voice_only"
self._save_voice_modes()
if adapter:
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
return "Voice mode enabled."
else:
self._voice_mode[voice_key] = "off"
@ -6083,7 +6154,7 @@ class GatewayRunner:
adapter._voice_sources[guild_id] = event.source.to_dict()
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
self._save_voice_modes()
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
self._set_adapter_auto_tts_enabled(adapter, event.source.chat_id, enabled=True)
return (
f"Joined voice channel **{voice_channel.name}**.\n"
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
@ -6601,177 +6672,6 @@ class GatewayRunner:
except Exception:
pass
async def _handle_btw_command(self, event: MessageEvent) -> str:
"""Handle /btw <question> — ephemeral side question in the same chat."""
question = event.get_command_args().strip()
if not question:
return (
"Usage: /btw <question>\n"
"Example: /btw what module owns session title sanitization?\n\n"
"Answers using session context. No tools, not persisted."
)
source = event.source
session_key = self._session_key_for_source(source)
# Guard: one /btw at a time per session
existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
if existing and not existing.done():
return "A /btw is already running for this chat. Wait for it to finish."
if not hasattr(self, "_active_btw_tasks"):
self._active_btw_tasks: dict = {}
import uuid as _uuid
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
_task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
self._background_tasks.add(_task)
self._active_btw_tasks[session_key] = _task
def _cleanup(task):
self._background_tasks.discard(task)
if self._active_btw_tasks.get(session_key) is task:
self._active_btw_tasks.pop(session_key, None)
_task.add_done_callback(_cleanup)
preview = question[:60] + ("..." if len(question) > 60 else "")
return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
async def _run_btw_task(
self, question: str, source, session_key: str, task_id: str,
) -> None:
"""Execute an ephemeral /btw side question and deliver the answer."""
from run_agent import AIAgent
adapter = self.adapters.get(source.platform)
if not adapter:
logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
return
_thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
try:
user_config = _load_gateway_config()
model, runtime_kwargs = self._resolve_session_agent_runtime(
source=source,
session_key=session_key,
user_config=user_config,
)
if not runtime_kwargs.get("api_key"):
await adapter.send(
source.chat_id,
"❌ /btw failed: no provider credentials configured.",
metadata=_thread_meta,
)
return
platform_key = _platform_config_key(source.platform)
reasoning_config = self._resolve_session_reasoning_config(
source=source,
session_key=session_key,
)
self._service_tier = self._load_service_tier()
turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
pr = self._provider_routing
# Snapshot history from running agent or stored transcript
running_agent = self._running_agents.get(session_key)
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
else:
session_entry = self.session_store.get_or_create_session(source)
history_snapshot = self.session_store.load_transcript(session_entry.session_id)
btw_prompt = (
"[Ephemeral /btw side question. Answer using the conversation "
"context. No tools available. Be direct and concise.]\n\n"
+ question
)
def run_sync():
agent = AIAgent(
model=turn_route["model"],
**turn_route["runtime"],
max_iterations=8,
quiet_mode=True,
verbose_logging=False,
enabled_toolsets=[],
reasoning_config=reasoning_config,
service_tier=self._service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=pr.get("only"),
providers_ignored=pr.get("ignore"),
providers_order=pr.get("order"),
provider_sort=pr.get("sort"),
provider_require_parameters=pr.get("require_parameters", False),
provider_data_collection=pr.get("data_collection"),
session_id=task_id,
platform=platform_key,
session_db=None,
fallback_model=self._fallback_model,
skip_memory=True,
skip_context_files=True,
persist_session=False,
)
try:
return agent.run_conversation(
user_message=btw_prompt,
conversation_history=history_snapshot,
task_id=task_id,
)
finally:
self._cleanup_agent_resources(agent)
result = await self._run_in_executor_with_context(run_sync)
response = (result.get("final_response") or "") if result else ""
if not response and result and result.get("error"):
response = f"Error: {result['error']}"
if not response:
response = "(No response generated)"
media_files, response = adapter.extract_media(response)
images, text_content = adapter.extract_images(response)
preview = question[:60] + ("..." if len(question) > 60 else "")
header = f'💬 /btw: "{preview}"\n\n'
if text_content:
await adapter.send(
chat_id=source.chat_id,
content=header + text_content,
metadata=_thread_meta,
)
elif not images and not media_files:
await adapter.send(
chat_id=source.chat_id,
content=header + "(No response generated)",
metadata=_thread_meta,
)
for image_url, alt_text in (images or []):
try:
await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
except Exception:
pass
for media_path, _is_voice in (media_files or []):
try:
await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
except Exception:
pass
except Exception as e:
logger.exception("/btw task %s failed", task_id)
try:
await adapter.send(
chat_id=source.chat_id,
content=f"❌ /btw failed: {e}",
metadata=_thread_meta,
)
except Exception:
pass
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
"""Handle /reasoning command — manage reasoning effort and display toggle.
@ -7573,7 +7473,7 @@ class GatewayRunner:
change_detail = ". ".join(change_parts) + ". " if change_parts else ""
reload_msg = {
"role": "user",
"content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
"content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]",
}
try:
session_entry = self.session_store.get_or_create_session(event.source)
@ -8512,7 +8412,7 @@ class GatewayRunner:
from tools.ansi_strip import strip_ansi
_out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
synth_text = (
f"[SYSTEM: Background process {session_id} completed "
f"[IMPORTANT: Background process {session_id} completed "
f"(exit code {session.exit_code}).\n"
f"Command: {session.command}\n"
f"Output:\n{_out}]"
@ -8822,6 +8722,25 @@ class GatewayRunner:
with _lock:
self._agent_cache.pop(session_key, None)
@staticmethod
def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None:
"""Reset per-turn state on a cached agent before a new turn starts.
Both _last_activity_ts and _last_activity_desc are only reset for
fresh external turns (depth 0); they are semantically paired
desc describes the activity *at* ts, so updating one without the
other would make get_activity_summary() misleading.
For interrupt-recursive turns both are preserved so the inactivity
watchdog can accumulate stuck-turn idle time and fire the 30-min
timeout (#15654). The depth-0 reset is still needed: a session
idle for 29 min would otherwise trip the watchdog before the new
turn makes its first API call (#9051).
"""
if interrupt_depth == 0:
agent._last_activity_ts = time.time()
agent._last_activity_desc = "starting new turn (cached)"
agent._api_call_count = 0
def _release_evicted_agent_soft(self, agent: Any) -> None:
"""Soft cleanup for cache-evicted agents — preserves session tool state.
@ -9360,16 +9279,62 @@ class GatewayRunner:
last_tool = [None] # Mutable container for tracking in closure
last_progress_msg = [None] # Track last message for dedup
repeat_count = [0] # How many times the same message repeated
# First-touch onboarding latch: fires at most once per run, even if
# several tools exceed the threshold.
long_tool_hint_fired = [False]
_LONG_TOOL_THRESHOLD_S = 30.0
def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
"""Callback invoked by agent on tool lifecycle events."""
if not progress_queue or not _run_still_current():
return
# First-touch onboarding: the first time a tool takes longer than
# _LONG_TOOL_THRESHOLD_S during a run that's streaming every tool
# (progress_mode == "all"), append a one-time hint suggesting
# /verbose. We only fire when (a) the user hasn't seen the hint
# before and (b) /verbose is actually usable on this platform
# (gateway gate must be open). The CLI has its own trigger.
if event_type == "tool.completed" and not long_tool_hint_fired[0]:
try:
duration = kwargs.get("duration") or 0
if duration >= _LONG_TOOL_THRESHOLD_S and progress_mode == "all":
from agent.onboarding import (
TOOL_PROGRESS_FLAG,
is_seen,
mark_seen,
tool_progress_hint_gateway,
)
_cfg = _load_gateway_config()
gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False))
if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
long_tool_hint_fired[0] = True
progress_queue.put(tool_progress_hint_gateway())
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
except Exception as _hint_err:
logger.debug("tool-progress onboarding hint failed: %s", _hint_err)
return
# Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
if event_type not in ("tool.started",):
return
# Suppress tool-progress bubbles once the user has sent `stop`.
# When the LLM response carries N parallel tool calls, the agent
# fires N "tool.started" events back-to-back before checking for
# interrupts — without this guard, a late `stop` still renders
# all N as 🔍 bubbles, making the interrupt feel ignored.
# (agent lives in run_sync's scope; agent_holder[0] is the shared
# handle across nested scopes — see line ~9607.)
try:
_agent_for_interrupt = agent_holder[0] if agent_holder else None
if _agent_for_interrupt is not None and getattr(
_agent_for_interrupt, "is_interrupted", False
):
return
except Exception:
pass
# "new" mode: only report when tool changes
if progress_mode == "new" and tool_name == last_tool[0]:
return
@ -9476,6 +9441,22 @@ class GatewayRunner:
raw = progress_queue.get_nowait()
# Drain silently when interrupted: events queued in the
# window between tool parse and interrupt processing
# should not render as bubbles. The "⚡ Interrupting
# current task" message is sent separately and is the
# last progress-flavored bubble the user should see.
try:
_agent_for_interrupt = agent_holder[0] if agent_holder else None
if _agent_for_interrupt is not None and getattr(
_agent_for_interrupt, "is_interrupted", False
):
# Drop this event and continue draining.
await asyncio.sleep(0)
continue
except Exception:
pass
# Handle dedup messages: update last line with repeat counter
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
_, base_msg, count = raw
@ -9804,12 +9785,7 @@ class GatewayRunner:
_cache.move_to_end(session_key)
except KeyError:
pass
# Reset activity timestamp so the inactivity timeout
# handler doesn't see stale idle time from the previous
# turn and immediately kill this agent. (#9051)
agent._last_activity_ts = time.time()
agent._last_activity_desc = "starting new turn (cached)"
agent._api_call_count = 0
self._init_cached_agent_for_turn(agent, _interrupt_depth)
logger.debug("Reusing cached agent for session %s", session_key)
if agent is None:

View file

@ -467,11 +467,27 @@ def _resolve_api_key_provider_secret(
pass
return "", ""
from hermes_cli.config import get_env_value
for env_var in pconfig.api_key_env_vars:
val = os.getenv(env_var, "").strip()
# Check both os.environ and ~/.hermes/.env file
val = (get_env_value(env_var) or "").strip()
if has_usable_secret(val):
return val, env_var
# Fallback: try credential pool (e.g. zai key stored via auth.json)
try:
from agent.credential_pool import load_pool
pool = load_pool(provider_id)
if pool and pool.has_credentials():
entry = pool.peek()
if entry:
key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
key = str(key).strip()
if has_usable_secret(key):
return key, f"credential_pool:{provider_id}"
except Exception:
pass
return "", ""
@ -4244,10 +4260,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
_PROVIDER_MODELS, get_pricing_for_provider,
get_curated_nous_model_ids, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = _PROVIDER_MODELS.get("nous", [])
model_ids = get_curated_nous_model_ids()
print()
unavailable_models: list = []

View file

@ -84,9 +84,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("deny", "Deny a pending dangerous command", "Session",
gateway_only=True),
CommandDef("background", "Run a prompt in the background", "Session",
aliases=("bg",), args_hint="<prompt>"),
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
args_hint="<question>"),
aliases=("bg", "btw"), args_hint="<prompt>"),
CommandDef("agents", "Show active agents and running tasks", "Session",
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@ -808,6 +806,114 @@ def discord_skill_commands_by_category(
return trimmed_categories, uncategorized, hidden
# ---------------------------------------------------------------------------
# Slack native slash commands
# ---------------------------------------------------------------------------
# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
# commands per app.
_SLACK_MAX_SLASH_COMMANDS = 50
_SLACK_NAME_LIMIT = 32
_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
def _sanitize_slack_name(raw: str) -> str:
"""Convert a command name to a valid Slack slash command name.
Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
chars. Uppercase is lowercased; invalid chars are stripped.
"""
name = raw.lower()
name = _SLACK_INVALID_CHARS.sub("", name)
name = name.strip("-_")
return name[:_SLACK_NAME_LIMIT]
def slack_native_slashes() -> list[tuple[str, str, str]]:
"""Return (slash_name, description, usage_hint) triples for Slack.
Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
matching Discord's and Telegram's model where every command is a
first-class slash and not a ``/hermes <verb>`` subcommand.
Both canonical names and aliases are included so users can type any
documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
Plugin-registered slash commands are included too.
Results are clamped to Slack's 50-command limit with duplicate-name
avoidance. ``/hermes`` is always reserved as the first entry so the
legacy ``/hermes <subcommand>`` form keeps working for anything that
gets dropped by the clamp or for free-form questions.
"""
overrides = _resolve_config_gates()
entries: list[tuple[str, str, str]] = []
seen: set[str] = set()
# Reserve /hermes as the catch-all top-level command.
entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
seen.add("hermes")
def _add(name: str, desc: str, hint: str) -> None:
slack_name = _sanitize_slack_name(name)
if not slack_name or slack_name in seen:
return
if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
return
# Slack description cap is 2000 chars; keep it short.
entries.append((slack_name, desc[:140], hint[:100]))
seen.add(slack_name)
# First pass: canonical names (so they win slots if we hit the cap).
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
_add(cmd.name, cmd.description, cmd.args_hint or "")
# Second pass: aliases.
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
for alias in cmd.aliases:
# Skip aliases that only differ from canonical by case/punctuation
# normalization (already covered by _add dedup).
_add(alias, f"Alias for /{cmd.name}{cmd.description}", cmd.args_hint or "")
# Third pass: plugin commands.
for name, description, args_hint in _iter_plugin_command_entries():
_add(name, description, args_hint or "")
return entries
def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
"""Generate a Slack app manifest with all gateway commands as slashes.
``request_url`` is required by Slack's manifest schema for every slash
command, but in Socket Mode (which we use) Slack ignores it and routes
the command event through the WebSocket. A placeholder URL is fine.
The returned dict is the ``features.slash_commands`` portion only
callers compose it into a full manifest (or merge into an existing
one). Keeping it narrow avoids coupling us to the rest of the manifest
schema (display_information, oauth_config, settings, etc.) which users
set up once in the Slack UI and rarely change.
"""
slashes = []
for name, desc, usage in slack_native_slashes():
entry = {
"command": f"/{name}",
"description": desc or f"Run /{name}",
"should_escape": False,
"url": request_url,
}
if usage:
entry["usage_hint"] = usage
slashes.append(entry)
return {"features": {"slash_commands": slashes}}
def slack_subcommand_map() -> dict[str, str]:
"""Return subcommand -> /command mapping for Slack /hermes handler.

View file

@ -465,6 +465,7 @@ DEFAULT_CONFIG = {
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
"record_sessions": False, # Auto-record browser sessions as WebM videos
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
# Active only when a CDP-capable backend is attached (Browserbase or
@ -959,6 +960,27 @@ DEFAULT_CONFIG = {
"backup_count": 3, # Number of rotated backup files to keep
},
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
# curated model lists for OpenRouter and Nous Portal from this URL,
# falling back to the in-repo snapshot on network failure. Lets us
# update model picker lists without shipping a hermes-agent release.
# The default URL is served by the docs site GitHub Pages deploy.
"model_catalog": {
"enabled": True,
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
# next /model or `hermes model` invocation; network failures
# silently fall back to the stale cache.
"ttl_hours": 24,
# Optional per-provider override URLs for third parties that want
# to self-host their own curation list using the same schema.
# Example:
# providers:
# openrouter:
# url: https://example.com/my-curation.json
"providers": {},
},
# Network settings — workarounds for connectivity issues.
"network": {
# Force IPv4 connections. On servers with broken or unreachable IPv6,
@ -995,6 +1017,13 @@ DEFAULT_CONFIG = {
"min_interval_hours": 24,
},
# Contextual first-touch onboarding hints (see agent/onboarding.py).
# Each hint is shown once per install and then latched here so it
# never fires again. Users can wipe the section to re-see all hints.
"onboarding": {
"seen": {},
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
}

361
hermes_cli/fallback_cmd.py Normal file
View file

@ -0,0 +1,361 @@
"""
hermes fallback manage the fallback provider chain.
Fallback providers are tried in order when the primary model fails with
rate-limit, overload, or connection errors. See:
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
Subcommands:
hermes fallback [list] Show the current fallback chain (default when no subcommand)
hermes fallback add Pick provider + model via the same picker as `hermes model`,
then append the selection to the chain
hermes fallback remove Pick an entry to delete from the chain
hermes fallback clear Remove all fallback entries
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
``fallback_model`` format is migrated to the new list format on first add.
"""
from __future__ import annotations
import copy
from typing import Any, Dict, List, Optional
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Return the normalized fallback chain as a list of dicts.
Accepts both the new list format (``fallback_providers``) and the legacy
single-dict format (``fallback_model``). The returned list is always a
fresh copy callers can mutate without touching the config dict.
"""
chain = config.get("fallback_providers") or []
if isinstance(chain, list):
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
if result:
return result
legacy = config.get("fallback_model")
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
return [dict(legacy)]
if isinstance(legacy, list):
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
return []
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
config["fallback_providers"] = chain
# Drop the legacy single-dict key on write so there's only one source of truth.
if "fallback_model" in config:
config.pop("fallback_model", None)
def _format_entry(entry: Dict[str, Any]) -> str:
"""One-line human-readable rendering of a fallback entry."""
provider = entry.get("provider", "?")
model = entry.get("model", "?")
base = entry.get("base_url")
suffix = f" [{base}]" if base else ""
return f"{model} (via {provider}){suffix}"
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
if not isinstance(model_cfg, dict):
return None
provider = (model_cfg.get("provider") or "").strip()
# The picker writes the selected model to ``model.default``.
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
if not provider or not model:
return None
entry: Dict[str, Any] = {"provider": provider, "model": model}
base_url = (model_cfg.get("base_url") or "").strip()
if base_url:
entry["base_url"] = base_url
api_mode = (model_cfg.get("api_mode") or "").strip()
if api_mode:
entry["api_mode"] = api_mode
return entry
def _snapshot_auth_active_provider() -> Any:
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
return store.get("active_provider")
except Exception:
return None
def _restore_auth_active_provider(value: Any) -> None:
"""Write back a previously snapshotted ``active_provider`` value."""
try:
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
with _auth_store_lock():
store = _load_auth_store()
store["active_provider"] = value
_save_auth_store(store)
except Exception:
# Best-effort — if auth.json can't be restored, the user's primary
# provider may have been deactivated by the picker. They can re-run
# `hermes model` to fix it. Don't fail the fallback add.
pass
# ---------------------------------------------------------------------------
# Subcommand handlers
# ---------------------------------------------------------------------------
def cmd_fallback_list(args) -> None: # noqa: ARG001
"""Print the current fallback chain."""
from hermes_cli.config import load_config
config = load_config()
chain = _read_chain(config)
print()
if not chain:
print(" No fallback providers configured.")
print()
print(" Add one with: hermes fallback add")
print()
return
primary = _describe_primary(config)
if primary:
print(f" Primary: {primary}")
print()
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
print()
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
"""One-line description of the primary model for display purposes."""
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
provider = (model_cfg.get("provider") or "?").strip() or "?"
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
return f"{model} (via {provider})"
if isinstance(model_cfg, str) and model_cfg.strip():
return model_cfg.strip()
return None
def cmd_fallback_add(args) -> None:
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
from hermes_cli.main import _require_tty, select_provider_and_model
from hermes_cli.config import load_config, save_config
_require_tty("fallback add")
# Snapshot BEFORE the picker runs so we can distinguish "user actually
# picked something" from "user cancelled" by comparing before/after.
before_cfg = load_config()
model_before = copy.deepcopy(before_cfg.get("model"))
active_provider_before = _snapshot_auth_active_provider()
print()
print(" Adding a fallback provider. The picker below is the same one used by")
print(" `hermes model` — select the provider + model you want as a fallback.")
print()
try:
select_provider_and_model(args=args)
except SystemExit:
# Some provider flows exit on auth failure — restore state and re-raise.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
raise
# Read the post-picker state to see what the user selected.
after_cfg = load_config()
model_after = after_cfg.get("model")
new_entry = _extract_fallback_from_model_cfg(model_after)
if not new_entry:
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(" No fallback added.")
return
# Picker picked the same thing that's already the primary → nothing changed,
# and there's nothing useful to add as a fallback to itself.
primary_entry = _extract_fallback_from_model_cfg(model_before)
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
and primary_entry["model"] == new_entry["model"]:
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
print(" A provider cannot be a fallback for itself — no change.")
return
# Reload the config with the primary restored, then append the new entry
# to ``fallback_providers``. We deliberately re-load (rather than mutating
# ``after_cfg``) because the picker may have touched other top-level keys
# (custom_providers, providers credentials) that we want to keep.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
final_cfg = load_config()
chain = _read_chain(final_cfg)
# Reject exact-duplicate fallback entries.
for existing in chain:
if existing.get("provider") == new_entry["provider"] \
and existing.get("model") == new_entry["model"]:
print()
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
return
chain.append(new_entry)
_write_chain(final_cfg, chain)
save_config(final_cfg)
print()
print(f" Added fallback: {_format_entry(new_entry)}")
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
print()
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
def _restore_model_cfg(model_before: Any) -> None:
"""Restore ``config["model"]`` to a previously-captured snapshot."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
if model_before is None:
cfg.pop("model", None)
else:
cfg["model"] = copy.deepcopy(model_before)
save_config(cfg)
def cmd_fallback_remove(args) -> None: # noqa: ARG001
"""Pick an entry from the chain and remove it."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to remove.")
print()
return
choices = [_format_entry(e) for e in chain]
choices.append("Cancel")
try:
from hermes_cli.setup import _curses_prompt_choice
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
except Exception:
idx = _numbered_pick("Select a fallback to remove:", choices)
if idx is None or idx < 0 or idx >= len(chain):
print()
print(" Cancelled — no change.")
return
removed = chain.pop(idx)
_write_chain(config, chain)
save_config(config)
print()
print(f" Removed fallback: {_format_entry(removed)}")
if chain:
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
else:
print(" Fallback chain is now empty.")
print()
def cmd_fallback_clear(args) -> None: # noqa: ARG001
"""Remove all fallback entries (with confirmation)."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to clear.")
print()
return
print()
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
try:
resp = input(" Clear all entries? [y/N]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
print()
print(" Cancelled.")
return
if resp not in ("y", "yes"):
print(" Cancelled — no change.")
return
_write_chain(config, [])
save_config(config)
print()
print(" Fallback chain cleared.")
print()
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
"""Fallback numbered-list picker when curses is unavailable."""
print(question)
for i, c in enumerate(choices, 1):
print(f" {i}. {c}")
print()
while True:
try:
val = input(f"Choice [1-{len(choices)}]: ").strip()
if not val:
return None
idx = int(val) - 1
if 0 <= idx < len(choices):
return idx
print(f"Please enter 1-{len(choices)}")
except ValueError:
print("Please enter a number")
except (KeyboardInterrupt, EOFError):
print()
return None
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
def cmd_fallback(args) -> None:
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
sub = getattr(args, "fallback_command", None)
if sub in (None, "", "list", "ls"):
cmd_fallback_list(args)
elif sub == "add":
cmd_fallback_add(args)
elif sub in ("remove", "rm"):
cmd_fallback_remove(args)
elif sub == "clear":
cmd_fallback_clear(args)
else:
print(f"Unknown fallback subcommand: {sub}")
print("Use one of: list, add, remove, clear")
raise SystemExit(2)

View file

@ -2315,13 +2315,13 @@ def _model_flow_nous(config, current_model="", args=None):
# The live /models endpoint returns hundreds of models; the curated list
# shows only agentic models users recognize from OpenRouter.
from hermes_cli.models import (
_PROVIDER_MODELS,
get_curated_nous_model_ids,
get_pricing_for_provider,
check_nous_free_tier,
partition_nous_models_by_tier,
)
model_ids = _PROVIDER_MODELS.get("nous", [])
model_ids = get_curated_nous_model_ids()
if not model_ids:
print("No curated models available for Nous Portal.")
return
@ -4780,6 +4780,37 @@ def cmd_webhook(args):
webhook_command(args)
def cmd_slack(args):
"""Slack integration helpers.
Dispatches ``hermes slack <subcommand>``. Currently supports:
manifest print or write a Slack app manifest with every gateway
command registered as a first-class slash.
"""
sub = getattr(args, "slack_command", None)
if sub in (None, ""):
# No subcommand — print usage hint.
print(
"usage: hermes slack <subcommand>\n"
"\n"
"subcommands:\n"
" manifest Generate a Slack app manifest with every gateway\n"
" command registered as a native slash\n"
"\n"
"Run `hermes slack manifest -h` for details.",
file=sys.stderr,
)
return 1
if sub == "manifest":
from hermes_cli.slack_cli import slack_manifest_command
return slack_manifest_command(args)
print(f"Unknown slack subcommand: {sub}", file=sys.stderr)
return 1
def cmd_hooks(args):
"""Shell-hook inspection and management."""
from hermes_cli.hooks import hooks_command
@ -7223,6 +7254,9 @@ Examples:
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
hermes auth reset <provider> Clear exhaustion status for a provider
hermes model Select default model
hermes fallback [list] Show fallback provider chain
hermes fallback add Add a fallback provider (same picker as `hermes model`)
hermes fallback remove Remove a fallback provider from the chain
hermes config View configuration
hermes config edit Edit config in $EDITOR
hermes config set model gpt-4 Set a config value
@ -7564,6 +7598,42 @@ For more help on a command:
)
model_parser.set_defaults(func=cmd_model)
# =========================================================================
# fallback command — manage the fallback provider chain
# =========================================================================
from hermes_cli.fallback_cmd import cmd_fallback
fallback_parser = subparsers.add_parser(
"fallback",
help="Manage fallback providers (tried when the primary model fails)",
description=(
"Manage the fallback provider chain. Fallback providers are tried "
"in order when the primary model fails with rate-limit, overload, or "
"connection errors. See: "
"https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
),
)
fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
fallback_subparsers.add_parser(
"list",
aliases=["ls"],
help="Show the current fallback chain (default when no subcommand)",
)
fallback_subparsers.add_parser(
"add",
help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
)
fallback_subparsers.add_parser(
"remove",
aliases=["rm"],
help="Pick an entry to delete from the chain",
)
fallback_subparsers.add_parser(
"clear",
help="Remove all fallback entries",
)
fallback_parser.set_defaults(func=cmd_fallback)
# =========================================================================
# gateway command
# =========================================================================
@ -7759,6 +7829,54 @@ For more help on a command:
)
whatsapp_parser.set_defaults(func=cmd_whatsapp)
# =========================================================================
# slack command
# =========================================================================
slack_parser = subparsers.add_parser(
"slack",
help="Slack integration helpers (manifest generation, etc.)",
description="Slack integration helpers for Hermes.",
)
slack_sub = slack_parser.add_subparsers(dest="slack_command")
slack_manifest = slack_sub.add_parser(
"manifest",
help="Print or write a Slack app manifest with every gateway command "
"registered as a native slash (/btw, /stop, /model, ...)",
description=(
"Generate a Slack app manifest that registers every gateway "
"command in COMMAND_REGISTRY as a first-class Slack slash "
"command (matching Discord and Telegram parity). Paste the "
"output into Slack app config → Features → App Manifest → "
"Edit, then Save. Reinstall the app if Slack prompts for it."
),
)
slack_manifest.add_argument(
"--write",
nargs="?",
const=True,
default=None,
metavar="PATH",
help="Write manifest to a file instead of stdout. With no PATH "
"writes to $HERMES_HOME/slack-manifest.json.",
)
slack_manifest.add_argument(
"--name",
default=None,
help='Bot display name (default: "Hermes")',
)
slack_manifest.add_argument(
"--description",
default=None,
help="Bot description shown in Slack's app directory.",
)
slack_manifest.add_argument(
"--slashes-only",
action="store_true",
help="Emit only the features.slash_commands array (for merging "
"into an existing manifest manually).",
)
slack_parser.set_defaults(func=cmd_slack)
# =========================================================================
# login command
# =========================================================================
@ -8414,6 +8532,12 @@ Examples:
skills_list.add_argument(
"--source", default="all", choices=["all", "hub", "builtin", "local"]
)
skills_list.add_argument(
"--enabled-only",
action="store_true",
help="Hide disabled skills. Use with -p <profile> to see exactly "
"which skills will load for that profile.",
)
skills_check = skills_subparsers.add_parser(
"check", help="Check installed hub skills for updates"

329
hermes_cli/model_catalog.py Normal file
View file

@ -0,0 +1,329 @@
"""Remote model catalog fetcher.
The Hermes docs site hosts a JSON manifest of curated models for providers
we want to update without shipping a release (currently OpenRouter and
Nous Portal). This module fetches, validates, and caches that manifest,
falling back to the in-repo hardcoded lists when the network is unavailable.
Pipeline
--------
1. ``get_catalog()`` returns a parsed manifest dict.
- Checks in-process cache (invalidated by TTL).
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
- Fetches the master URL if disk cache is stale or missing.
- On any fetch failure, keeps using the stale cache (or empty dict).
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()``
thin accessors returning the shapes existing callers expect. Each
falls back to the in-repo hardcoded list on any lookup failure.
Schema (version 1)
------------------
::
{
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {...}, # free-form
"providers": {
"openrouter": {
"metadata": {...}, # free-form
"models": [
{"id": "vendor/model", "description": "recommended",
"metadata": {...}} # free-form, model-level
]
},
"nous": {...}
}
}
Unknown fields are ignored extra metadata can be added at either level
without bumping ``version``. ``version`` bumps are reserved for
breaking changes (renaming ``providers``, changing ``models`` shape).
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any
from hermes_cli import __version__ as _HERMES_VERSION
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DEFAULT_CATALOG_URL = (
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
)
DEFAULT_TTL_HOURS = 24
DEFAULT_FETCH_TIMEOUT = 8.0
SUPPORTED_SCHEMA_VERSION = 1
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
# In-process cache to avoid repeated disk + parse work across multiple
# calls within the same session. Invalidated by TTL against the disk file's
# mtime, so calling code never has to think about this.
_catalog_cache: dict[str, Any] | None = None
_catalog_cache_source_mtime: float = 0.0
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
def _load_catalog_config() -> dict[str, Any]:
"""Load the ``model_catalog`` config block with defaults filled in."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
raw = cfg.get("model_catalog")
if not isinstance(raw, dict):
raw = {}
return {
"enabled": bool(raw.get("enabled", True)),
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
}
def _cache_path() -> Path:
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "cache" / "model_catalog.json"
# ---------------------------------------------------------------------------
# Fetch + validate + cache
# ---------------------------------------------------------------------------
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
try:
req = urllib.request.Request(
url,
headers={
"Accept": "application/json",
"User-Agent": _HERMES_USER_AGENT,
},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
logger.info("model catalog fetch failed (%s): %s", url, exc)
return None
except Exception as exc: # pragma: no cover — defensive
logger.info("model catalog fetch errored (%s): %s", url, exc)
return None
if not _validate_manifest(data):
logger.info("model catalog at %s failed schema validation", url)
return None
return data
def _validate_manifest(data: Any) -> bool:
"""Return True when ``data`` matches the minimum manifest shape."""
if not isinstance(data, dict):
return False
version = data.get("version")
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
# Future schema version we don't understand — refuse rather than
# guess. Older schemas (version < 1) aren't supported either.
return False
providers = data.get("providers")
if not isinstance(providers, dict):
return False
for pname, pblock in providers.items():
if not isinstance(pname, str) or not isinstance(pblock, dict):
return False
models = pblock.get("models")
if not isinstance(models, list):
return False
for m in models:
if not isinstance(m, dict):
return False
if not isinstance(m.get("id"), str) or not m["id"].strip():
return False
return True
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
path = _cache_path()
try:
mtime = path.stat().st_mtime
except (OSError, FileNotFoundError):
return (None, 0.0)
try:
with open(path) as fh:
data = json.load(fh)
except (OSError, json.JSONDecodeError):
return (None, 0.0)
if not _validate_manifest(data):
return (None, 0.0)
return (data, mtime)
def _write_disk_cache(data: dict[str, Any]) -> None:
path = _cache_path()
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
with open(tmp, "w") as fh:
json.dump(data, fh, indent=2)
fh.write("\n")
os.replace(tmp, path)
except OSError as exc:
logger.info("model catalog cache write failed: %s", exc)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
"""Return the parsed model catalog manifest, or an empty dict on failure.
Callers should treat a missing provider/model as "use the in-repo fallback"
never raise from this function so the CLI keeps working offline.
"""
global _catalog_cache, _catalog_cache_source_mtime
cfg = _load_catalog_config()
if not cfg["enabled"]:
return {}
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
disk_data, disk_mtime = _read_disk_cache()
now = time.time()
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
if (
not force_refresh
and _catalog_cache is not None
and disk_data is not None
and disk_mtime == _catalog_cache_source_mtime
and disk_fresh
):
return _catalog_cache
# Disk is fresh enough — use it without a network hit.
if not force_refresh and disk_fresh and disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
if fetched is not None:
_write_disk_cache(fetched)
new_disk_data, new_mtime = _read_disk_cache()
if new_disk_data is not None:
_catalog_cache = new_disk_data
_catalog_cache_source_mtime = new_mtime
return new_disk_data
_catalog_cache = fetched
_catalog_cache_source_mtime = now
return fetched
if disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
return {}
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
cfg = _load_catalog_config()
if not cfg["enabled"]:
return None
provider_cfg = cfg["providers"].get(provider)
if not isinstance(provider_cfg, dict):
return None
override_url = provider_cfg.get("url")
if not isinstance(override_url, str) or not override_url.strip():
return None
# Override fetches skip the disk cache because they're usually
# third-party self-hosted. Re-request on every call but with a short
# timeout so they don't block the picker.
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
def _get_provider_block(provider: str) -> dict[str, Any] | None:
"""Return the provider's manifest block, respecting per-provider overrides."""
override = _fetch_provider_override(provider)
if override is not None:
block = override.get("providers", {}).get(provider)
if isinstance(block, dict):
return block
catalog = get_catalog()
if not catalog:
return None
block = catalog.get("providers", {}).get(provider)
return block if isinstance(block, dict) else None
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
Returns ``None`` when the manifest is unavailable, so callers can fall
back to their hardcoded list.
"""
block = _get_provider_block("openrouter")
if not block:
return None
out: list[tuple[str, str]] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if not mid:
continue
desc = str(m.get("description") or "")
out.append((mid, desc))
return out or None
def get_curated_nous_models() -> list[str] | None:
"""Return Nous Portal's curated list of model ids from the manifest.
Returns ``None`` when the manifest is unavailable.
"""
block = _get_provider_block("nous")
if not block:
return None
out: list[str] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if mid:
out.append(mid)
return out or None
def reset_cache() -> None:
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
global _catalog_cache, _catalog_cache_source_mtime
_catalog_cache = None
_catalog_cache_source_mtime = 0.0

View file

@ -876,7 +876,16 @@ def fetch_openrouter_models(
if _openrouter_catalog_cache is not None and not force_refresh:
return list(_openrouter_catalog_cache)
fallback = list(OPENROUTER_MODELS)
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
# snapshot when the manifest is unreachable. Both are curated lists that
# drive the picker; the OpenRouter live /v1/models filter (tool support,
# free pricing) is applied on top either way.
try:
from hermes_cli.model_catalog import get_curated_openrouter_models
remote = get_curated_openrouter_models()
except Exception:
remote = None
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
@ -929,6 +938,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
def get_curated_nous_model_ids() -> list[str]:
"""Return the curated Nous Portal model-id list.
Prefers the remotely-hosted catalog manifest (published under
``website/static/api/model-catalog.json``); falls back to the in-repo
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
unreachable. Always returns a list (never None).
"""
try:
from hermes_cli.model_catalog import get_curated_nous_models
remote = get_curated_nous_models()
except Exception:
remote = None
if remote:
return list(remote)
return list(_PROVIDER_MODELS.get("nous", []))
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):

View file

@ -1856,27 +1856,32 @@ def _setup_slack():
if existing:
print_info("Slack: already configured")
if not prompt_yes_no("Reconfigure Slack?", False):
# Even without reconfiguring, offer to refresh the manifest so
# new commands (e.g. /btw, /stop, ...) get registered in Slack.
if prompt_yes_no(
"Regenerate the Slack app manifest with the latest command "
"list? (recommended after `hermes update`)",
True,
):
_write_slack_manifest_and_instruct()
return
print_info("Steps to create a Slack app:")
print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)")
print_info(" 1. Go to https://api.slack.com/apps → Create New App")
print_info(" Pick 'From an app manifest' — we'll generate one for you below.")
print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable")
print_info(" • Create an App-Level Token with 'connections:write' scope")
print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions")
print_info(" Required scopes: chat:write, app_mentions:read,")
print_info(" channels:history, channels:read, im:history,")
print_info(" im:read, im:write, users:read, files:read, files:write")
print_info(" Optional for private channels: groups:history")
print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable")
print_info(" Required events: message.im, message.channels, app_mention")
print_info(" Optional for private channels: message.groups")
print_warning(" ⚠ Without message.channels the bot will ONLY work in DMs,")
print_warning(" not public channels.")
print_info(" 5. Install to Workspace: Settings → Install App")
print_info(" 6. Reinstall the app after any scope or event changes")
print_info(" 7. After installing, invite the bot to channels: /invite @YourBot")
print_info(" 3. Install to Workspace: Settings → Install App")
print_info(" 4. After installing, invite the bot to channels: /invite @YourBot")
print()
print_info(" Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
print()
# Generate and write manifest up-front so the user can paste it into
# the "Create from manifest" flow instead of clicking through scopes /
# events / slash commands one at a time.
_write_slack_manifest_and_instruct()
print()
bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
if not bot_token:
@ -1902,6 +1907,49 @@ def _setup_slack():
print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")
def _write_slack_manifest_and_instruct():
"""Generate the Slack manifest, write it under HERMES_HOME, and print
paste-into-Slack instructions.
Exposed as its own helper so both the initial setup flow and the
"reconfigure? → no" branch can refresh the manifest without the user
re-entering tokens. Failures are non-fatal if the manifest write
fails for any reason, we print a warning and skip rather than abort
the whole Slack setup.
"""
try:
from hermes_cli.slack_cli import _build_full_manifest
from hermes_constants import get_hermes_home
manifest = _build_full_manifest(
bot_name="Hermes",
bot_description="Your Hermes agent on Slack",
)
target = Path(get_hermes_home()) / "slack-manifest.json"
target.parent.mkdir(parents=True, exist_ok=True)
import json as _json
target.write_text(
_json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
print_success(f"Slack app manifest written to: {target}")
print_info(
" Paste it into https://api.slack.com/apps → your app → Features "
"→ App Manifest → Edit, then Save. Slack will prompt to "
"reinstall if scopes or slash commands changed."
)
print_info(
" Re-run `hermes slack manifest --write` anytime to refresh after "
"Hermes adds new commands."
)
except Exception as exc: # pragma: no cover - best-effort UX helper
print_warning(f"Couldn't write Slack manifest: {exc}")
print_info(
" You can generate it manually later with: "
"hermes slack manifest --write"
)
def _setup_matrix():
"""Configure Matrix credentials."""
print_header("Matrix")

View file

@ -599,11 +599,24 @@ def inspect_skill(identifier: str) -> Optional[dict]:
return out
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills."""
def do_list(source_filter: str = "all",
enabled_only: bool = False,
console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills.
Args:
source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
enabled_only: If True, hide disabled skills from the output.
Enabled/disabled state is resolved against the currently active profile's
config ``hermes -p <profile> skills list`` reads that profile's
``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
start. No explicit profile flag needed here.
"""
from tools.skills_hub import HubLockFile, ensure_hub_dirs
from tools.skills_sync import _read_manifest
from tools.skills_tool import _find_all_skills
from agent.skill_utils import get_disabled_skill_names
c = console or _console
ensure_hub_dirs()
@ -611,17 +624,26 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
hub_installed = {e["name"]: e for e in lock.list_installed()}
builtin_names = set(_read_manifest())
all_skills = _find_all_skills()
# Pull ALL skills (including disabled ones) so we can annotate status.
all_skills = _find_all_skills(skip_disabled=True)
disabled_names = get_disabled_skill_names()
table = Table(title="Installed Skills")
title = "Installed Skills"
if enabled_only:
title += " (enabled only)"
table = Table(title=title)
table.add_column("Name", style="bold cyan")
table.add_column("Category", style="dim")
table.add_column("Source", style="dim")
table.add_column("Trust", style="dim")
table.add_column("Status", style="dim")
hub_count = 0
builtin_count = 0
local_count = 0
enabled_count = 0
disabled_count = 0
for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
name = skill["name"]
@ -632,29 +654,48 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
source_type = "hub"
source_display = hub_entry.get("source", "hub")
trust = hub_entry.get("trust_level", "community")
hub_count += 1
elif name in builtin_names:
source_type = "builtin"
source_display = "builtin"
trust = "builtin"
builtin_count += 1
else:
source_type = "local"
source_display = "local"
trust = "local"
local_count += 1
if source_filter != "all" and source_filter != source_type:
continue
is_enabled = name not in disabled_names
if enabled_only and not is_enabled:
continue
if source_type == "hub":
hub_count += 1
elif source_type == "builtin":
builtin_count += 1
else:
local_count += 1
if is_enabled:
enabled_count += 1
status_cell = "[bold green]enabled[/]"
else:
disabled_count += 1
status_cell = "[dim red]disabled[/]"
trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
trust_label = "official" if source_display == "official" else trust
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)
c.print(table)
c.print(
f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
)
summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
if enabled_only:
summary += f"{enabled_count} enabled shown"
else:
summary += f"{enabled_count} enabled, {disabled_count} disabled"
summary += "[/]\n"
c.print(summary)
def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
@ -1127,7 +1168,10 @@ def skills_command(args) -> None:
elif action == "inspect":
do_inspect(args.identifier)
elif action == "list":
do_list(source_filter=args.source)
do_list(
source_filter=args.source,
enabled_only=getattr(args, "enabled_only", False),
)
elif action == "check":
do_check(name=getattr(args, "name", None))
elif action == "update":
@ -1279,11 +1323,12 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
elif action == "list":
source_filter = "all"
enabled_only = "--enabled-only" in args or "--enabled" in args
if "--source" in args:
idx = args.index("--source")
if idx + 1 < len(args):
source_filter = args[idx + 1]
do_list(source_filter=source_filter, console=c)
do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)
elif action == "check":
name = args[0] if args else None
@ -1371,7 +1416,8 @@ def _print_skills_help(console: Console) -> None:
" [cyan]search[/] <query> Search registries for skills\n"
" [cyan]install[/] <identifier> Install a skill (with security scan)\n"
" [cyan]inspect[/] <identifier> Preview a skill without installing\n"
" [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
" [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
" List installed skills; --enabled-only filters to the active profile's live set\n"
" [cyan]check[/] [name] Check hub skills for upstream updates\n"
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
" [cyan]audit[/] [name] Re-scan hub skills for security\n"

152
hermes_cli/slack_cli.py Normal file
View file

@ -0,0 +1,152 @@
"""``hermes slack ...`` CLI subcommands.
Today only ``hermes slack manifest`` is implemented it generates the
Slack app manifest JSON for registering every gateway command as a native
Slack slash (``/btw``, ``/stop``, ``/model``, ) so users get the same
first-class slash UX Discord and Telegram already have.
Typical workflow::
$ hermes slack manifest > slack-manifest.json
# or:
$ hermes slack manifest --write
Then paste the printed JSON into the Slack app config (Features App
Manifest Edit) and click Save. Slack diffs the manifest and prompts
for reinstall when scopes/commands change.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
"""Build a full Slack manifest merging display info + our slash list.
The slash-command list is always generated from ``COMMAND_REGISTRY`` so
it stays in sync with the rest of Hermes. Other manifest sections
(display info, OAuth scopes, socket mode) are set to sensible defaults
for a Hermes deployment users can tweak them in the Slack UI after
pasting.
"""
from hermes_cli.commands import slack_app_manifest
partial = slack_app_manifest()
slashes = partial["features"]["slash_commands"]
return {
"_metadata": {
"major_version": 1,
"minor_version": 1,
},
"display_information": {
"name": bot_name[:35],
"description": (bot_description or "Your Hermes agent on Slack")[:140],
"background_color": "#1a1a2e",
},
"features": {
"bot_user": {
"display_name": bot_name[:80],
"always_online": True,
},
"slash_commands": slashes,
"assistant_view": {
"assistant_description": "Chat with Hermes in threads and DMs.",
},
},
"oauth_config": {
"scopes": {
"bot": [
"app_mentions:read",
"assistant:write",
"channels:history",
"channels:read",
"chat:write",
"commands",
"files:read",
"files:write",
"groups:history",
"im:history",
"im:read",
"im:write",
"users:read",
],
},
},
"settings": {
"event_subscriptions": {
"bot_events": [
"app_mention",
"assistant_thread_context_changed",
"assistant_thread_started",
"message.channels",
"message.groups",
"message.im",
],
},
"interactivity": {
"is_enabled": True,
},
"org_deploy_enabled": False,
"socket_mode_enabled": True,
"token_rotation_enabled": False,
},
}
def slack_manifest_command(args) -> int:
"""Print or write a Slack app manifest JSON.
Flags (all parsed in ``hermes_cli/main.py``):
--write [PATH] Write to file instead of stdout (default path:
``$HERMES_HOME/slack-manifest.json``)
--name NAME Override the bot display name (default: "Hermes")
--description DESC Override the bot description
--slashes-only Emit only the ``features.slash_commands`` array (for
merging into an existing manifest manually)
"""
name = getattr(args, "name", None) or "Hermes"
description = getattr(args, "description", None) or "Your Hermes agent on Slack"
if getattr(args, "slashes_only", False):
from hermes_cli.commands import slack_app_manifest
manifest = slack_app_manifest()["features"]["slash_commands"]
else:
manifest = _build_full_manifest(name, description)
payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
write_target = getattr(args, "write", None)
if write_target is not None:
if isinstance(write_target, bool) and write_target:
# --write with no value → default location
try:
from hermes_constants import get_hermes_home
target = Path(get_hermes_home()) / "slack-manifest.json"
except Exception:
target = Path.home() / ".hermes" / "slack-manifest.json"
else:
target = Path(write_target).expanduser()
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(payload, encoding="utf-8")
print(f"Slack manifest written to: {target}", file=sys.stderr)
print(
"\nNext steps:\n"
" 1. Open https://api.slack.com/apps and pick your Hermes app\n"
" (or create a new one: Create New App → From an app manifest).\n"
f" 2. Features → App Manifest → paste the contents of\n"
f" {target}\n"
" 3. Save; Slack will prompt to reinstall the app if scopes or\n"
" slash commands changed.\n"
" 4. Make sure Socket Mode is enabled and you have a bot token\n"
" (xoxb-...) and app token (xapp-...) configured via\n"
" `hermes setup`.\n",
file=sys.stderr,
)
else:
sys.stdout.write(payload)
return 0

View file

@ -10,8 +10,7 @@ import random
TIPS = [
# --- Slash Commands ---
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
"/background <prompt> runs a task in a separate session while your current one stays free.",
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
"/branch forks the current session so you can explore a different direction without losing progress.",
"/compress manually compresses conversation context when things get long.",
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",

View file

@ -832,7 +832,18 @@ class SessionDB:
params = []
if not include_children:
where_clauses.append("s.parent_session_id IS NULL")
# Show root sessions and branch sessions (whose parent ended with
# end_reason='branched' before the child was created), while still
# hiding sub-agent runs and compression continuations (which also
# carry a parent_session_id but were spawned while the parent was
# still live — i.e., started_at < parent.ended_at).
where_clauses.append(
"(s.parent_session_id IS NULL"
" OR EXISTS (SELECT 1 FROM sessions p"
" WHERE p.id = s.parent_session_id"
" AND p.end_reason = 'branched'"
" AND s.started_at >= p.ended_at))"
)
if source:
where_clauses.append("s.source = ?")

View file

@ -892,7 +892,6 @@ class AIAgent:
checkpoints_enabled: bool = False,
checkpoint_max_snapshots: int = 50,
pass_session_id: bool = False,
persist_session: bool = True,
):
"""
Initialize the AI Agent.
@ -964,7 +963,6 @@ class AIAgent:
self.background_review_callback = None # Optional sync callback for gateway delivery
self.skip_context_files = skip_context_files
self.pass_session_id = pass_session_id
self.persist_session = persist_session
self._credential_pool = credential_pool
self.log_prefix_chars = log_prefix_chars
self.log_prefix = f"{log_prefix} " if log_prefix else ""
@ -3109,13 +3107,28 @@ class AIAgent:
)
_SKILL_REVIEW_PROMPT = (
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
"Focus on: was a non-trivial approach used to complete a task that required trial "
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome?\n\n"
"If a relevant skill already exists, update it with what you learned. "
"Otherwise, create a new skill if the approach is reusable.\n"
"If nothing is worth saving, just say 'Nothing to save.' and stop."
"Review the conversation above and consider whether a skill should be saved or updated.\n\n"
"Work in this order — do not skip steps:\n\n"
"1. SURVEY the existing skill landscape first. Call skills_list to see what you "
"have. If anything looks potentially relevant, skill_view it before deciding. "
"You are looking for the CLASS of task that just happened, not the exact task. "
"Example: a successful Tauri build is in the class \"desktop app build "
"troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
"2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
"What conditions will trigger this pattern again? Describe the class in one "
"sentence before looking at what to save.\n\n"
"3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
"already covers the class — even partially — update it (skill_manage patch) "
"with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
"4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
"When you create one, name and scope it at the class level "
"(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
"section must describe the class of situations, not this one session.\n\n"
"5. If you notice two existing skills that overlap, note it in your response "
"so a future review can consolidate them. Do not consolidate now unless the "
"overlap is obvious and low-risk.\n\n"
"Only act when something is genuinely worth saving. "
"If nothing stands out, just say 'Nothing to save.' and stop."
)
_COMBINED_REVIEW_PROMPT = (
@ -3125,9 +3138,16 @@ class AIAgent:
"about how you should behave, their work style, or ways they want you to operate? "
"If so, save using the memory tool.\n\n"
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome? If a relevant skill "
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
"and error, changing course due to experiential findings, or a different method "
"or outcome than the user expected? If so, work in this order:\n"
" a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
" b. Identify the CLASS of task, not the specific task "
"(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
" c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
" d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
"the class level, not this one session.\n"
" e. If you notice overlapping skills during the survey, note it so a future "
"review can consolidate them.\n\n"
"Only act if there's something genuinely worth saving. "
"If nothing stands out, just say 'Nothing to save.' and stop."
)
@ -3225,12 +3245,25 @@ class AIAgent:
with open(os.devnull, "w") as _devnull, \
contextlib.redirect_stdout(_devnull), \
contextlib.redirect_stderr(_devnull):
# Inherit the parent agent's live runtime (provider, model,
# base_url, api_key, api_mode) so the fork uses the exact
# same credentials the main turn is using. Without this,
# AIAgent.__init__ re-runs auto-resolution from env vars,
# which fails for OAuth-only providers, session-scoped
# creds, or credential-pool setups where the resolver can't
# reconstruct auth from scratch -- producing the spurious
# "No LLM provider configured" warning at end of turn.
_parent_runtime = self._current_main_runtime()
review_agent = AIAgent(
model=self.model,
max_iterations=8,
quiet_mode=True,
platform=self.platform,
provider=self.provider,
api_mode=_parent_runtime.get("api_mode") or None,
base_url=_parent_runtime.get("base_url") or None,
api_key=_parent_runtime.get("api_key") or None,
credential_pool=getattr(self, "_credential_pool", None),
parent_session_id=self.session_id,
)
review_agent._memory_write_origin = "background_review"
@ -3331,10 +3364,7 @@ class AIAgent:
"""Save session state to both JSON log and SQLite on any exit path.
Ensures conversations are never lost, even on errors or early returns.
Skipped when ``persist_session=False`` (ephemeral helper flows).
"""
if not self.persist_session:
return
self._apply_persist_user_message_override(messages)
self._session_messages = messages
self._save_session_log(messages)
@ -7851,7 +7881,17 @@ class AIAgent:
api_msg["reasoning_content"] = existing
return
# 2. DeepSeek / Kimi thinking mode: tool-call turns that lack
# 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
# for providers that use the internal 'reasoning' key.
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
# genuine reasoning content is not overwritten by the empty-string
# fallback (#15812 regression in PR #15478).
normalized_reasoning = source_msg.get("reasoning")
if isinstance(normalized_reasoning, str) and normalized_reasoning:
api_msg["reasoning_content"] = normalized_reasoning
return
# 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
# reasoning_content are "poisoned history" — a prior provider (MiniMax,
# etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
# is absent on replay; inject "" to satisfy the provider's requirement
@ -7867,13 +7907,6 @@ class AIAgent:
api_msg["reasoning_content"] = ""
return
# 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
# for providers that use the internal 'reasoning' key.
normalized_reasoning = source_msg.get("reasoning")
if isinstance(normalized_reasoning, str) and normalized_reasoning:
api_msg["reasoning_content"] = normalized_reasoning
return
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
# reasoning_content. Inject "" to satisfy the provider's requirement
# when no explicit reasoning content is present.
@ -11007,36 +11040,69 @@ class AIAgent:
continue
# ── Nous Portal: record rate limit & skip retries ─────
# When Nous returns a 429, record the reset time to a
# shared file so ALL sessions (cron, gateway, auxiliary)
# know not to pile on. Then skip further retries —
# each one burns another RPH request and deepens the
# rate limit hole. The retry loop's top-of-iteration
# guard will catch this on the next pass and try
# fallback or bail with a clear message.
# When Nous returns a 429 that is a genuine account-
# level rate limit, record the reset time to a shared
# file so ALL sessions (cron, gateway, auxiliary) know
# not to pile on, then skip further retries -- each
# one burns another RPH request and deepens the hole.
# The retry loop's top-of-iteration guard will catch
# this on the next pass and try fallback or bail.
#
# IMPORTANT: Nous Portal multiplexes multiple upstream
# providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can
# also mean an UPSTREAM provider is out of capacity
# for one specific model -- transient, clears in
# seconds, nothing to do with the caller's quota.
# Tripping the cross-session breaker on that would
# block every Nous model for minutes. We use
# ``is_genuine_nous_rate_limit`` to tell the two
# apart via the 429's own x-ratelimit-* headers and
# the last-known-good state captured on the previous
# successful response.
if (
is_rate_limited
and self.provider == "nous"
and classified.reason == FailoverReason.rate_limit
and not recovered_with_pool
):
_genuine_nous_rate_limit = False
try:
from agent.nous_rate_guard import record_nous_rate_limit
from agent.nous_rate_guard import (
is_genuine_nous_rate_limit,
record_nous_rate_limit,
)
_err_resp = getattr(api_error, "response", None)
_err_hdrs = (
getattr(_err_resp, "headers", None)
if _err_resp else None
)
record_nous_rate_limit(
_genuine_nous_rate_limit = is_genuine_nous_rate_limit(
headers=_err_hdrs,
error_context=error_context,
last_known_state=self._rate_limit_state,
)
if _genuine_nous_rate_limit:
record_nous_rate_limit(
headers=_err_hdrs,
error_context=error_context,
)
else:
logging.info(
"Nous 429 looks like upstream capacity "
"(no exhausted bucket in headers or "
"last-known state) -- not tripping "
"cross-session breaker."
)
except Exception:
pass
# Skip straight to max_retries — the top-of-loop
# guard will handle fallback or bail cleanly.
retry_count = max_retries
continue
if _genuine_nous_rate_limit:
# Skip straight to max_retries -- the
# top-of-loop guard will handle fallback or
# bail cleanly.
retry_count = max_retries
continue
# Upstream capacity 429: fall through to normal
# retry logic. A different model (or the same
# model a moment later) will typically succeed.
is_payload_too_large = (
classified.reason == FailoverReason.payload_too_large

95
scripts/build_model_catalog.py Executable file
View file

@ -0,0 +1,95 @@
#!/usr/bin/env python3
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
lets maintainers update model lists without shipping a Hermes release.
The runtime fetcher falls back to the same in-repo hardcoded lists if the
manifest is unreachable, so this script is a convenience for keeping the
manifest in sync not a source of truth.
Usage::
python scripts/build_model_catalog.py
Output: ``website/static/api/model-catalog.json``
Live URL (after ``deploy-site.yml`` runs on merge to main):
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
"""
from __future__ import annotations
import json
import os
import sys
from datetime import datetime, timezone
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, REPO_ROOT)
# Ensure HERMES_HOME is set for imports that touch it at module level.
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
CATALOG_VERSION = 1
def build_catalog() -> dict:
return {
"version": CATALOG_VERSION,
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"metadata": {
"source": "hermes-agent repo",
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
},
"providers": {
"openrouter": {
"metadata": {
"display_name": "OpenRouter",
"note": (
"Descriptions drive picker badges. Live /api/v1/models "
"filters curated ids by tool-calling support and free pricing."
),
},
"models": [
{"id": mid, "description": desc}
for mid, desc in OPENROUTER_MODELS
],
},
"nous": {
"metadata": {
"display_name": "Nous Portal",
"note": (
"Free-tier gating is determined live via Portal pricing "
"(partition_nous_models_by_tier), not this manifest."
),
},
"models": [
{"id": mid}
for mid in _PROVIDER_MODELS.get("nous", [])
],
},
},
}
def main() -> int:
catalog = build_catalog()
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
with open(OUTPUT_PATH, "w") as fh:
json.dump(catalog, fh, indent=2)
fh.write("\n")
print(f"Wrote {OUTPUT_PATH}")
for provider, block in catalog["providers"].items():
print(f" {provider}: {len(block['models'])} models")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -43,6 +43,7 @@ AUTHOR_MAP = {
"teknium1@gmail.com": "teknium1",
"teknium@nousresearch.com": "teknium1",
"127238744+teknium1@users.noreply.github.com": "teknium1",
"focusflow.app.help@gmail.com": "yes999zc",
"343873859@qq.com": "DrStrangerUJN",
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
"jefferson@heimdallstrategy.com": "Mind-Dragon",
@ -69,6 +70,8 @@ AUTHOR_MAP = {
"keira.voss94@gmail.com": "keiravoss94",
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
"fqsy1416@gmail.com": "EKKOLearnAI",
"octo-patch@github.com": "octo-patch",
"math0r-be@github.com": "math0r-be",
"simbamax99@gmail.com": "simbam99",
"iris@growthpillars.co": "irispillars",
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
@ -118,6 +121,7 @@ AUTHOR_MAP = {
"nocoo@users.noreply.github.com": "nocoo",
"30841158+n-WN@users.noreply.github.com": "n-WN",
"tsuijinglei@gmail.com": "hiddenpuppy",
"buraysandro9@gmail.com": "ygd58",
"jerome@clawwork.ai": "HiddenPuppy",
"jerome.benoit@sap.com": "jerome-benoit",
"wysie@users.noreply.github.com": "Wysie",

View file

@ -281,7 +281,6 @@ Type these during an interactive chat session.
### Utility
```
/branch (/fork) Branch the current session
/btw Ephemeral side question (doesn't interrupt main task)
/fast Toggle priority/fast processing
/browser Open CDP browser connection
/history Show conversation history (CLI)

View file

@ -1,3 +0,0 @@
---
description: Skills for monitoring, aggregating, and processing RSS feeds, blogs, and web content sources.
---

View file

@ -192,6 +192,43 @@ class TestDefaultContextLengths:
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_deepseek_v4_models_1m_context(self):
from agent.model_metadata import get_model_context_length
from unittest.mock import patch as mock_patch
expected_keys = {
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
}
for key, value in expected_keys.items():
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
)
# Longest-first substring matching must resolve both the bare V4
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
# / Nous Portal) to 1M without probing down to the legacy 128K
# ``deepseek`` substring fallback.
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
cases = [
("deepseek-v4-pro", 1_000_000),
("deepseek-v4-flash", 1_000_000),
("deepseek/deepseek-v4-pro", 1_000_000),
("deepseek/deepseek-v4-flash", 1_000_000),
("deepseek-chat", 1_000_000),
("deepseek-reasoner", 1_000_000),
]
for model_id, expected_ctx in cases:
actual = get_model_context_length(model_id)
assert actual == expected_ctx, (
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_all_values_positive(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
assert value > 0, f"{key} has non-positive context length"
@ -303,7 +340,9 @@ class TestCodexOAuthContextLength:
from agent.model_metadata import get_model_context_length
# OpenRouter — should hit its own catalog path first; when mocked
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
# matching the real direct-API value — Codex OAuth's 272k cap is
# provider-specific and must not leak here).
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
@ -314,7 +353,7 @@ class TestCodexOAuthContextLength:
api_key="",
provider="openrouter",
)
assert ctx == 400_000, (
assert ctx == 1_050_000, (
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
"leaked outside openai-codex provider"
)

View file

@ -251,3 +251,141 @@ class TestAuxiliaryClientIntegration:
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
result = aux._try_nous()
assert result == (None, None)
class TestIsGenuineNousRateLimit:
"""Tell a real account-level 429 apart from an upstream-capacity 429.
Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
A 429 from an upstream out of capacity should NOT trip the
cross-session breaker; a real user-quota 429 should.
"""
def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "0",
"x-ratelimit-reset-requests-1h": "3100",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-reset-requests": "40",
}
assert is_genuine_nous_rate_limit(headers=headers) is True
def test_exhausted_tokens_bucket_is_genuine(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "0",
"x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "0",
"x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine
}
assert is_genuine_nous_rate_limit(headers=headers) is True
def test_healthy_headers_on_429_are_upstream_capacity(self):
# Classic upstream-capacity symptom: Nous edge reports plenty of
# headroom on every bucket, but returns 429 anyway because
# upstream (DeepSeek / Kimi / ...) is out of capacity.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-reset-requests": "40",
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "750",
"x-ratelimit-reset-requests-1h": "3100",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "790000",
"x-ratelimit-reset-tokens": "40",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7800000",
"x-ratelimit-reset-tokens-1h": "3100",
}
assert is_genuine_nous_rate_limit(headers=headers) is False
def test_bare_429_with_no_headers_is_upstream(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
assert is_genuine_nous_rate_limit(headers=None) is False
assert is_genuine_nous_rate_limit(headers={}) is False
assert is_genuine_nous_rate_limit(
headers={"content-type": "application/json"}
) is False
def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
# remaining == 0 but reset in < 60s: almost certainly a
# secondary per-minute throttle that will clear immediately --
# not worth tripping the cross-session breaker.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "0",
"x-ratelimit-reset-requests": "30",
}
assert is_genuine_nous_rate_limit(headers=headers) is False
def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
# Headers on the 429 lack rate-limit info, but the previous
# successful response already showed the hourly bucket
# exhausted -- the 429 is almost certainly that limit
# continuing.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
from agent.rate_limit_tracker import parse_rate_limit_headers
prior_headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "0",
"x-ratelimit-reset-requests-1h": "2000",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "100",
"x-ratelimit-reset-requests": "30",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "700000",
"x-ratelimit-reset-tokens": "30",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7000000",
"x-ratelimit-reset-tokens-1h": "2000",
}
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=last_state
) is True
def test_last_known_state_all_healthy_stays_upstream(self):
# Prior state was healthy; bare 429 arrives; should be treated
# as upstream capacity.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
from agent.rate_limit_tracker import parse_rate_limit_headers
prior_headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "750",
"x-ratelimit-reset-requests-1h": "2000",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "180",
"x-ratelimit-reset-requests": "30",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "790000",
"x-ratelimit-reset-tokens": "30",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7900000",
"x-ratelimit-reset-tokens-1h": "2000",
}
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=last_state
) is False
def test_none_last_state_and_no_headers_is_upstream(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=None
) is False

View file

@ -0,0 +1,164 @@
"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
from __future__ import annotations
import yaml
import pytest
from agent.onboarding import (
BUSY_INPUT_FLAG,
TOOL_PROGRESS_FLAG,
busy_input_hint_cli,
busy_input_hint_gateway,
is_seen,
mark_seen,
tool_progress_hint_cli,
tool_progress_hint_gateway,
)
class TestIsSeen:
def test_empty_config_unseen(self):
assert is_seen({}, BUSY_INPUT_FLAG) is False
def test_missing_onboarding_unseen(self):
assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
def test_onboarding_not_dict_unseen(self):
assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
def test_seen_dict_missing_flag(self):
assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
def test_seen_flag_true(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
assert is_seen(cfg, BUSY_INPUT_FLAG) is True
def test_seen_flag_falsy(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
assert is_seen(cfg, BUSY_INPUT_FLAG) is False
def test_other_flags_isolated(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
class TestMarkSeen:
def test_creates_missing_file_and_sets_flag(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_preserves_other_config(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({
"model": {"default": "claude-sonnet-4.6"},
"display": {"skin": "default"},
}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["model"]["default"] == "claude-sonnet-4.6"
assert loaded["display"]["skin"] == "default"
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_preserves_other_seen_flags(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({
"onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_idempotent(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
mark_seen(cfg_path, BUSY_INPUT_FLAG)
first = cfg_path.read_text()
# Second call must be a no-op on-disk content (file may be touched,
# but the YAML contents should be identical).
mark_seen(cfg_path, BUSY_INPUT_FLAG)
second = cfg_path.read_text()
assert yaml.safe_load(first) == yaml.safe_load(second)
def test_handles_non_dict_onboarding(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_handles_non_dict_seen(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
class TestHintMessages:
def test_busy_input_hint_gateway_interrupt(self):
msg = busy_input_hint_gateway("interrupt")
assert "/busy queue" in msg
assert "interrupted" in msg.lower()
def test_busy_input_hint_gateway_queue(self):
msg = busy_input_hint_gateway("queue")
assert "/busy interrupt" in msg
assert "queued" in msg.lower()
def test_busy_input_hint_cli_interrupt(self):
msg = busy_input_hint_cli("interrupt")
assert "/busy queue" in msg
def test_busy_input_hint_cli_queue(self):
msg = busy_input_hint_cli("queue")
assert "/busy interrupt" in msg
def test_tool_progress_hints_mention_verbose(self):
assert "/verbose" in tool_progress_hint_gateway()
assert "/verbose" in tool_progress_hint_cli()
def test_hints_are_not_empty(self):
for hint in (
busy_input_hint_gateway("queue"),
busy_input_hint_gateway("interrupt"),
busy_input_hint_cli("queue"),
busy_input_hint_cli("interrupt"),
tool_progress_hint_gateway(),
tool_progress_hint_cli(),
):
assert hint.strip()
class TestRoundTrip:
"""After mark_seen, is_seen on the re-loaded config must return True."""
def test_mark_then_is_seen(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
def test_mark_both_flags_independently(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
mark_seen(cfg_path, BUSY_INPUT_FLAG)
mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
loaded = yaml.safe_load(cfg_path.read_text())
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True

View file

@ -160,6 +160,30 @@ class TestBranchCommandCLI:
assert agent.reset_session_state.called
assert agent._last_flushed_db_idx == 4 # len(conversation_history)
def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
"""Branching must redirect the agent's session_log_file to the new session's path."""
from cli import HermesCLI
from pathlib import Path
logs_dir = tmp_path / "sessions"
logs_dir.mkdir()
agent = MagicMock()
agent._last_flushed_db_idx = 0
agent.logs_dir = logs_dir
agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
cli_instance.agent = agent
old_log_file = agent.session_log_file
HermesCLI._handle_branch_command(cli_instance, "/branch")
new_session_id = cli_instance.session_id
expected_log = logs_dir / f"session_{new_session_id}.json"
assert agent.session_log_file == expected_log, (
"session_log_file must point to the branch session, not the original"
)
assert agent.session_log_file != old_log_file
def test_branch_sets_resumed_flag(self, cli_instance, session_db):
"""Branch should set _resumed=True to prevent auto-title generation."""
from cli import HermesCLI

View file

@ -1043,3 +1043,132 @@ class TestAgentCacheIdleResume:
new_agent.close()
except Exception:
pass
_FAKE_NOW = 10_000.0 # Fixed epoch for deterministic time assertions
class TestCachedAgentInactivityReset:
"""Inactivity-clock reset must be gated on _interrupt_depth == 0.
On interrupt-recursive turns (_interrupt_depth > 0) the clock must
keep accumulating so the inactivity watchdog can fire when a turn is
stuck in an interrupt loop. Resetting unconditionally prevented the
30-min timeout from triggering (#15654). The depth-0 reset is still
needed: a session idle for 29 min must not trip the watchdog before
the new turn makes its first API call (#9051).
"""
def _fake_agent(self, stale_seconds: float = 1800.0):
m = MagicMock()
m._last_activity_ts = _FAKE_NOW - stale_seconds
m._api_call_count = 10
m._last_activity_desc = "previous turn activity"
return m
def test_fresh_turn_resets_idle_clock(self):
"""interrupt_depth=0: clock resets so a post-idle turn gets a
fresh 30-min inactivity window (guard for #9051)."""
from gateway.run import GatewayRunner
agent = self._fake_agent(stale_seconds=1800.0)
old_ts = agent._last_activity_ts
with patch("gateway.run.time") as mock_time:
mock_time.time.return_value = _FAKE_NOW
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
assert agent._last_activity_ts == _FAKE_NOW, (
"_last_activity_ts was not reset on a fresh turn (interrupt_depth=0)"
)
assert agent._last_activity_ts > old_ts, (
"Stale idle time should be cleared so the new turn gets a fresh window"
)
def test_fresh_turn_resets_desc(self):
"""interrupt_depth=0: description is updated to reflect the new turn."""
from gateway.run import GatewayRunner
agent = self._fake_agent()
with patch("gateway.run.time") as mock_time:
mock_time.time.return_value = _FAKE_NOW
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
assert agent._last_activity_desc == "starting new turn (cached)"
def test_interrupt_turn_preserves_idle_clock(self):
"""interrupt_depth=1: clock preserved so accumulated stuck-turn
idle time is not discarded by an interrupt-recursive re-entry (#15654)."""
from gateway.run import GatewayRunner
agent = self._fake_agent(stale_seconds=1200.0)
old_ts = agent._last_activity_ts
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
assert agent._last_activity_ts == old_ts, (
"_last_activity_ts must not be reset on interrupt-recursive turns "
"(interrupt_depth>0) — the watchdog needs the accumulated idle time"
)
def test_interrupt_turn_preserves_desc(self):
"""interrupt_depth=1: desc preserved — it is semantically paired with ts."""
from gateway.run import GatewayRunner
agent = self._fake_agent(stale_seconds=1200.0)
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
assert agent._last_activity_desc == "previous turn activity", (
"_last_activity_desc must not change on interrupt-recursive turns; "
"it describes the activity *at* _last_activity_ts"
)
def test_deep_interrupt_recursion_preserves_idle_clock(self):
"""interrupt_depth=MAX-1: clock still preserved at any non-zero depth."""
from gateway.run import GatewayRunner
agent = self._fake_agent(stale_seconds=600.0)
old_ts = agent._last_activity_ts
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=4)
assert agent._last_activity_ts == old_ts
def test_api_call_count_reset_regardless_of_depth(self):
"""_api_call_count is always reset to 0 for the new turn, at any depth."""
from gateway.run import GatewayRunner
agent_fresh = self._fake_agent()
agent_interrupted = self._fake_agent()
with patch("gateway.run.time") as mock_time:
mock_time.time.return_value = _FAKE_NOW
GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0)
GatewayRunner._init_cached_agent_for_turn(agent_interrupted, interrupt_depth=1)
assert agent_fresh._api_call_count == 0
assert agent_interrupted._api_call_count == 0
def test_watchdog_accumulation_across_recursive_turns(self):
"""Scenario: stuck turn + user interrupt → recursive turn.
The idle time seen by the watchdog must reflect the full stuck
duration, not restart from zero on the recursive re-entry.
"""
from gateway.run import GatewayRunner
STUCK_FOR = 1750.0
agent = self._fake_agent(stale_seconds=STUCK_FOR)
# Simulate: user sees "Still working..." and sends another message.
# That triggers an interrupt → _run_agent recurses at depth=1.
GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
# Watchdog sees time.time() - _last_activity_ts ≥ STUCK_FOR.
idle_secs = _FAKE_NOW - agent._last_activity_ts
assert idle_secs >= STUCK_FOR - 1.0, (
f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. "
"Inactivity timeout could not fire for a stuck interrupted turn."
)

View file

@ -349,3 +349,121 @@ class TestBusySessionAck:
result = await runner._handle_active_session_busy_message(event, sk)
assert result is False # not handled, let default path try
class TestBusySessionOnboardingHint:
"""First-touch hint appended to the busy-ack the first time it fires."""
@pytest.mark.asyncio
async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
"""First busy-while-running message gets an extra hint about /busy."""
import gateway.run as _gr
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
# mark_seen imports utils.atomic_yaml_write; make sure it resolves
# against a writable dir by pointing _hermes_home at tmp_path.
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
runner, _sentinel = _make_runner()
runner._busy_input_mode = "interrupt"
adapter = _make_adapter()
event = _make_event(text="ping")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 3, "max_iterations": 60,
"current_tool": None, "last_activity_ts": time.time(),
"last_activity_desc": "api", "seconds_since_activity": 0.1,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 5
runner.adapters[event.source.platform] = adapter
await runner._handle_active_session_busy_message(event, sk)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
# Normal ack body
assert "Interrupting" in content
# First-touch hint appended
assert "First-time tip" in content
assert "/busy queue" in content
# The flag is now persisted to tmp_path/config.yaml
import yaml
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
@pytest.mark.asyncio
async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
"""Once the flag is marked, the hint never appears again."""
import gateway.run as _gr
import yaml
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
# Pre-populate the config so is_seen() returns True from the start.
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
"onboarding": {"seen": {"busy_input_prompt": True}},
}))
monkeypatch.setattr(
_gr, "_load_gateway_config",
lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
)
runner, _sentinel = _make_runner()
runner._busy_input_mode = "interrupt"
adapter = _make_adapter()
event = _make_event(text="ping again")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 3, "max_iterations": 60,
"current_tool": None, "last_activity_ts": time.time(),
"last_activity_desc": "api", "seconds_since_activity": 0.1,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 5
runner.adapters[event.source.platform] = adapter
await runner._handle_active_session_busy_message(event, sk)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
assert "Interrupting" in content
assert "First-time tip" not in content
assert "/busy queue" not in content
@pytest.mark.asyncio
async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
"""In queue mode the hint should suggest /busy interrupt, not /busy queue."""
import gateway.run as _gr
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
runner, _sentinel = _make_runner()
runner._busy_input_mode = "queue"
adapter = _make_adapter()
event = _make_event(text="queue me")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
agent = MagicMock()
runner._running_agents[sk] = agent
with patch("gateway.run.merge_pending_message_event"):
await runner._handle_active_session_busy_message(event, sk)
content = adapter._send_with_retry.call_args.kwargs.get("content", "")
assert "Queued for the next turn" in content
assert "First-time tip" in content
assert "/busy interrupt" in content
# Must NOT tell the user to /busy queue when they're already on queue.
assert "/busy queue" not in content

View file

@ -0,0 +1,215 @@
"""Tests for interrupt-aware tool-progress suppression in gateway.
When a user sends `stop` while the agent is executing a batch of parallel
tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
and the drain loop should drop any already-queued events. Without this
guard, the stop acknowledgement appears first but is followed by a trail
of tool-progress bubbles for calls that were already parsed from the LLM
response making the interrupt feel ignored.
"""
import asyncio
import importlib
import sys
import time
import types
from types import SimpleNamespace
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter, SendResult
from gateway.session import SessionSource
class ProgressCaptureAdapter(BasePlatformAdapter):
def __init__(self, platform=Platform.TELEGRAM):
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
self.sent = []
self.edits = []
self.typing = []
async def connect(self) -> bool:
return True
async def disconnect(self) -> None:
return None
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
self.sent.append({"chat_id": chat_id, "content": content})
return SendResult(success=True, message_id="progress-1")
async def edit_message(self, chat_id, message_id, content) -> SendResult:
self.edits.append({"message_id": message_id, "content": content})
return SendResult(success=True, message_id=message_id)
async def send_typing(self, chat_id, metadata=None) -> None:
self.typing.append(chat_id)
async def stop_typing(self, chat_id) -> None:
return None
async def get_chat_info(self, chat_id: str):
return {"id": chat_id}
class PreInterruptAgent:
"""Fires tool-progress events BEFORE the interrupt lands.
These should render normally. Baseline for comparison with the
interrupted case proves the harness renders events when no
interrupt is active.
"""
def __init__(self, **kwargs):
self.tool_progress_callback = kwargs.get("tool_progress_callback")
self.tools = []
self._interrupt_requested = False
@property
def is_interrupted(self) -> bool:
return self._interrupt_requested
def run_conversation(self, message, conversation_history=None, task_id=None):
self.tool_progress_callback("tool.started", "web_search", "first search", {})
time.sleep(0.35) # let the drain loop process
return {"final_response": "done", "messages": [], "api_calls": 1}
class InterruptedAgent:
"""Fires tool.started events AFTER interrupt — all should be suppressed.
Mirrors the failure mode in the bug report: LLM returned N parallel
web_search calls, interrupt flag flipped, remaining events still
rendered as bubbles. With the fix, none of these should appear.
"""
def __init__(self, **kwargs):
self.tool_progress_callback = kwargs.get("tool_progress_callback")
self.tools = []
# Start already interrupted — simulates stop having already landed
# by the time the agent batch starts firing tool.started events.
self._interrupt_requested = True
@property
def is_interrupted(self) -> bool:
return self._interrupt_requested
def run_conversation(self, message, conversation_history=None, task_id=None):
# Parallel tool batch — in production these come from one LLM
# response with 5 tool_calls. All are post-interrupt.
self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
time.sleep(0.35) # let the drain loop attempt to process the queue
return {"final_response": "interrupted", "messages": [], "api_calls": 1}
def _make_runner(adapter):
gateway_run = importlib.import_module("gateway.run")
GatewayRunner = gateway_run.GatewayRunner
runner = object.__new__(GatewayRunner)
runner.adapters = {adapter.platform: adapter}
runner._voice_mode = {}
runner._prefill_messages = []
runner._ephemeral_system_prompt = ""
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
runner._session_db = None
runner._running_agents = {}
runner._session_run_generation = {}
runner.hooks = SimpleNamespace(loaded_hooks=False)
runner.config = SimpleNamespace(
thread_sessions_per_user=False,
group_sessions_per_user=False,
stt_enabled=False,
)
return runner
async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
fake_dotenv = types.ModuleType("dotenv")
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = agent_cls
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
adapter = ProgressCaptureAdapter()
runner = _make_runner(adapter)
gateway_run = importlib.import_module("gateway.run")
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {"api_key": "fake"},
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1001",
chat_type="group",
thread_id="17585",
)
result = await runner._run_agent(
message="hi",
context_prompt="",
history=[],
source=source,
session_id=session_id,
session_key="agent:main:telegram:group:-1001:17585",
)
return adapter, result
@pytest.mark.asyncio
async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
"""Sanity check: when is_interrupted is False, tool-progress renders normally."""
adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
assert result["final_response"] == "done"
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
c["content"] for c in adapter.edits
)
assert "first search" in rendered, (
"baseline agent should render its tool-progress event — "
"if this fails the test harness is broken, not the fix"
)
@pytest.mark.asyncio
async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
"""Post-interrupt tool.started events must not render as bubbles.
This is Bug B from the screenshot: user sends `stop`, agent acks with
Interrupting, but 5 more 🔍 web_search bubbles still render because
their tool.started events were already parsed from the LLM response.
With the fix, progress_callback and the drain loop both check
is_interrupted and skip these events.
"""
adapter, result = await _run_once(
monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
)
assert result["final_response"] == "interrupted"
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
c["content"] for c in adapter.edits
)
# None of the post-interrupt queries should appear.
for leaked_query in (
"cognee hermes",
"McBee deer hunting",
"kuzu graph db",
"moonshot kimi api",
"platform.moonshot.cn",
):
assert leaked_query not in rendered, (
f"event '{leaked_query}' leaked into the UI after interrupt — "
f"progress_callback / drain loop is not checking is_interrupted"
)

View file

@ -165,3 +165,26 @@ async def test_reasoning_rejected_mid_run():
assert result is not None
assert "can't run mid-turn" in result
assert "/reasoning" in result
@pytest.mark.asyncio
async def test_btw_dispatches_mid_run():
"""/btw mid-run must dispatch to /background's handler, not hit the catch-all.
/btw is an alias of /background (see hermes_cli/commands.py). Typing
/btw mid-turn must spawn a parallel background task that's the whole
point of the command. Before the mid-turn bypass was added for
/background, /btw fell through to the "Agent is running — wait or
/stop first" catch-all, making it useless in exactly the scenario it
was designed for. The alias and the bypass together make it work.
"""
runner = _make_runner()
runner._handle_background_command = AsyncMock(
return_value='🚀 Background task started: "what module owns titles?"'
)
result = await runner._handle_message(_make_event("/btw what module owns titles?"))
runner._handle_background_command.assert_awaited_once()
assert result is not None
assert "can't run mid-turn" not in result

View file

@ -147,7 +147,20 @@ class TestAppMentionHandler:
assert "app_mention" in registered_events
assert "assistant_thread_started" in registered_events
assert "assistant_thread_context_changed" in registered_events
assert "/hermes" in registered_commands
# Slack slash commands are registered via a single regex matcher
# covering every COMMAND_REGISTRY entry (e.g. /hermes, /btw, /stop,
# /model, ...) so users get native-slash parity with Discord and
# Telegram. Verify the regex matches the key expected slashes.
assert len(registered_commands) == 1, (
f"expected 1 combined slash matcher, got {registered_commands!r}"
)
slash_matcher = registered_commands[0]
import re as _re
assert isinstance(slash_matcher, _re.Pattern)
for expected in ("/hermes", "/btw", "/stop", "/model", "/help"):
assert slash_matcher.match(expected), (
f"Slack slash regex does not match {expected}"
)
class TestSlackConnectCleanup:
@ -1544,6 +1557,83 @@ class TestSlashCommands:
msg = adapter.handle_message.call_args[0][0]
assert msg.text == "/reasoning"
# ------------------------------------------------------------------
# Native slash commands — /btw, /stop, /model, ... dispatched directly
# instead of as /hermes subcommands. This is the Discord/Telegram parity
# fix: the slash name itself becomes the command.
# ------------------------------------------------------------------
@pytest.mark.asyncio
async def test_native_btw_slash(self, adapter):
"""/btw with args must dispatch to /background, not /hermes btw."""
command = {
"command": "/btw",
"text": "fix the failing test",
"user_id": "U1",
"channel_id": "C1",
}
await adapter._handle_slash_command(command)
msg = adapter.handle_message.call_args[0][0]
# The gateway command dispatcher resolves /btw -> background via
# resolve_command() — our handler's job is just to deliver
# "/btw <args>" to the gateway runner, which is what this asserts.
assert msg.text == "/btw fix the failing test"
@pytest.mark.asyncio
async def test_native_stop_slash_no_args(self, adapter):
command = {
"command": "/stop",
"text": "",
"user_id": "U1",
"channel_id": "C1",
}
await adapter._handle_slash_command(command)
msg = adapter.handle_message.call_args[0][0]
assert msg.text == "/stop"
@pytest.mark.asyncio
async def test_native_model_slash_with_args(self, adapter):
command = {
"command": "/model",
"text": "anthropic/claude-sonnet-4",
"user_id": "U1",
"channel_id": "C1",
}
await adapter._handle_slash_command(command)
msg = adapter.handle_message.call_args[0][0]
assert msg.text == "/model anthropic/claude-sonnet-4"
@pytest.mark.asyncio
async def test_legacy_hermes_prefix_still_works(self, adapter):
"""Backward compat: /hermes btw foo must still route to /btw foo.
Old workspace manifests only declared /hermes as the single slash.
After users refresh their manifest they get /btw natively, but the
legacy form must keep working during the transition.
"""
command = {
"command": "/hermes",
"text": "btw run the tests",
"user_id": "U1",
"channel_id": "C1",
}
await adapter._handle_slash_command(command)
msg = adapter.handle_message.call_args[0][0]
assert msg.text == "/btw run the tests"
@pytest.mark.asyncio
async def test_legacy_hermes_freeform_question(self, adapter):
"""/hermes <free-form text> must stay as the raw text (non-command)."""
command = {
"command": "/hermes",
"text": "what's the weather today?",
"user_id": "U1",
"channel_id": "C1",
}
await adapter._handle_slash_command(command)
msg = adapter.handle_message.call_args[0][0]
assert msg.text == "what's the weather today?"
# ---------------------------------------------------------------------------
# TestMessageSplitting

View file

@ -177,6 +177,53 @@ class TestHandleVoiceCommand:
assert adapter._auto_tts_disabled_chats == {"123"}
def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
"""Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
persisted voice_mode is ``voice_only`` or ``all`` without this,
``/voice on`` was relying on a "not in disabled set" default that
silently enabled auto-TTS for every chat.
"""
from gateway.config import Platform
runner._voice_mode = {
"telegram:off_chat": "off",
"telegram:on_chat": "voice_only",
"telegram:tts_chat": "all",
"slack:999": "voice_only", # wrong platform, must be ignored
}
adapter = SimpleNamespace(
_auto_tts_default=False,
_auto_tts_disabled_chats=set(),
_auto_tts_enabled_chats=set(),
platform=Platform.TELEGRAM,
)
runner._sync_voice_mode_state_to_adapter(adapter)
assert adapter._auto_tts_disabled_chats == {"off_chat"}
assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
"""Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
from gateway.config import Platform
fake_cfg = {"voice": {"auto_tts": True}}
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: fake_cfg,
)
adapter = SimpleNamespace(
_auto_tts_default=False,
_auto_tts_disabled_chats=set(),
_auto_tts_enabled_chats=set(),
platform=Platform.TELEGRAM,
)
runner._sync_voice_mode_state_to_adapter(adapter)
assert adapter._auto_tts_default is True
def test_restart_restores_voice_off_state(self, runner, tmp_path):
from gateway.config import Platform
runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
@ -2706,3 +2753,56 @@ class TestUDPKeepalive:
mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
finally:
DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
# =====================================================================
# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
# on voice input. Regression test for Issue #16007.
# =====================================================================
class TestShouldAutoTtsForChat:
"""Three-layer gate: per-chat enable > per-chat disable > config default."""
def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
"""Build a bare adapter with only the attrs the gate reads."""
adapter = SimpleNamespace(
_auto_tts_default=default,
_auto_tts_enabled_chats=set(enabled),
_auto_tts_disabled_chats=set(disabled),
)
# Bind the unbound method — _should_auto_tts_for_chat only reads the
# three attrs above via ``self.``, so an unbound call works.
from gateway.platforms.base import BasePlatformAdapter
return BasePlatformAdapter._should_auto_tts_for_chat, adapter
def test_default_false_no_override_suppresses(self):
"""Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
fn, adapter = self._make_adapter(default=False)
assert fn(adapter, "chat1") is False
def test_default_true_no_override_fires(self):
fn, adapter = self._make_adapter(default=True)
assert fn(adapter, "chat1") is True
def test_explicit_enable_overrides_false_default(self):
"""``/voice on`` with config auto_tts=False still fires."""
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
assert fn(adapter, "chat1") is True
def test_explicit_disable_overrides_true_default(self):
"""``/voice off`` with config auto_tts=True still suppresses."""
fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
assert fn(adapter, "chat1") is False
def test_enabled_wins_over_disabled(self):
"""An explicit enable beats an explicit disable (enable takes priority)."""
fn, adapter = self._make_adapter(
default=False, enabled={"chat1"}, disabled={"chat1"}
)
assert fn(adapter, "chat1") is True
def test_per_chat_isolation(self):
"""Enable for chat1 doesn't leak to chat2."""
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
assert fn(adapter, "chat1") is True
assert fn(adapter, "chat2") is False

View file

@ -0,0 +1,152 @@
"""Regression test for the `/model` picker confirmation display.
Bug (April 2026): after choosing a model from the interactive `/model` picker,
``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
straight from models.dev, which always reports the vendor-wide value (e.g.
gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps in
particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
``resolve_display_context_length()``; the picker path was missed, causing
"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
Fix: both display paths now go through ``resolve_display_context_length()``.
"""
from __future__ import annotations
from unittest.mock import patch
from hermes_cli.model_switch import ModelSwitchResult
class _FakeModelInfo:
context_window = 1_050_000
max_output = 0
def has_cost_data(self):
return False
def format_capabilities(self):
return ""
class _StubCLI:
"""Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
agent = None
model = ""
provider = ""
requested_provider = ""
api_key = ""
_explicit_api_key = ""
base_url = ""
_explicit_base_url = ""
api_mode = ""
_pending_model_switch_note = ""
def _run_display(monkeypatch, result):
import cli as cli_mod
captured: list[str] = []
monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
# Avoid writing to ~/.hermes/config.yaml during the test.
monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
return captured
def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
"""``_apply_model_switch_result`` must prefer the provider-aware resolver
(272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
"""
result = ModelSwitchResult(
success=True,
new_model="gpt-5.5",
target_provider="openai-codex",
provider_changed=True,
api_key="",
base_url="https://chatgpt.com/backend-api/codex",
api_mode="codex_responses",
warning_message="",
provider_label="ChatGPT Codex",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(), # models.dev says 1.05M
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=272_000,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "272,000" in ctx_line, (
f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
)
assert "1,050,000" not in ctx_line, (
f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
)
def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
"""On providers with no enforced cap (e.g. OpenRouter), the picker path
should surface the real 1.05M context for gpt-5.5 resolver and models.dev
agree here.
"""
result = ModelSwitchResult(
success=True,
new_model="openai/gpt-5.5",
target_provider="openrouter",
provider_changed=True,
api_key="",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
warning_message="",
provider_label="OpenRouter",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(),
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=1_050_000,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "1,050,000" in ctx_line, (
f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
)
def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
"""If ``get_model_context_length`` returns nothing (rare — truly unknown
endpoint), the display still surfaces ``ModelInfo.context_window`` so the
user sees *something* rather than a silent blank.
"""
result = ModelSwitchResult(
success=True,
new_model="some-model",
target_provider="some-provider",
provider_changed=True,
api_key="",
base_url="",
api_mode="chat_completions",
warning_message="",
provider_label="Some Provider",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(), # context_window = 1_050_000
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=None,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "1,050,000" in ctx_line, (
f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
)

View file

@ -20,6 +20,8 @@ from hermes_cli.commands import (
discord_skill_commands,
gateway_help_lines,
resolve_command,
slack_app_manifest,
slack_native_slashes,
slack_subcommand_map,
telegram_bot_commands,
telegram_menu_commands,
@ -256,6 +258,115 @@ class TestSlackSubcommandMap:
assert cmd.name not in mapping
class TestSlackNativeSlashes:
"""Slack native slash command generation — used to register every
COMMAND_REGISTRY entry as a first-class Slack slash, matching Discord
and Telegram."""
def test_returns_triples(self):
slashes = slack_native_slashes()
assert len(slashes) >= 10
for entry in slashes:
assert isinstance(entry, tuple) and len(entry) == 3
name, desc, hint = entry
assert isinstance(name, str) and name
assert isinstance(desc, str)
assert isinstance(hint, str)
def test_hermes_catchall_is_first(self):
"""``/hermes`` must be reserved as the first slot so the legacy
``/hermes <subcommand>`` form keeps working after we add new
commands and hit the 50-slash cap."""
slashes = slack_native_slashes()
assert slashes[0][0] == "hermes"
def test_names_respect_slack_limits(self):
for name, _desc, _hint in slack_native_slashes():
# Slack: lowercase a-z, 0-9, hyphens, underscores; max 32 chars
assert len(name) <= 32, f"slash {name!r} exceeds 32 chars"
assert name == name.lower()
for ch in name:
assert ch.isalnum() or ch in "-_", f"invalid char {ch!r} in {name!r}"
def test_under_fifty_command_cap(self):
"""Slack allows at most 50 slash commands per app."""
assert len(slack_native_slashes()) <= 50
def test_unique_names(self):
names = [n for n, _d, _h in slack_native_slashes()]
assert len(names) == len(set(names)), "duplicate Slack slash names"
def test_includes_canonical_commands(self):
names = {n for n, _d, _h in slack_native_slashes()}
# Sample of gateway-available canonical commands
for expected in ("new", "stop", "background", "model", "help", "status"):
assert expected in names, f"missing canonical /{expected}"
def test_includes_aliases_as_first_class_slashes(self):
"""Aliases (/btw, /bg, /reset, /q) must be registered as standalone
slashes this is the whole point of native-slashes parity."""
names = {n for n, _d, _h in slack_native_slashes()}
assert "btw" in names
assert "bg" in names
assert "reset" in names
assert "q" in names
def test_telegram_parity(self):
"""Every Telegram bot command must be registerable on Slack too.
This catches the old behavior where Slack users couldn't invoke
commands like /btw natively. If a future command surfaces on
Telegram but not Slack (because of Slack's 50-slash cap), this
test fails loudly so we can curate the list rather than silently
dropping parity.
"""
slack_names = {n for n, _d, _h in slack_native_slashes()}
tg_names = {n for n, _d in telegram_bot_commands()}
# Some Telegram names have underscores where Slack uses hyphens
# (e.g. set_home vs sethome). Normalize both sides for comparison.
def _norm(s: str) -> str:
return s.replace("-", "_").replace("__", "_").strip("_")
slack_norm = {_norm(n) for n in slack_names}
tg_norm = {_norm(n) for n in tg_names}
missing = tg_norm - slack_norm
assert not missing, (
f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}"
)
class TestSlackAppManifest:
"""Generated Slack app manifest (used by `hermes slack manifest`)."""
def test_returns_dict(self):
m = slack_app_manifest()
assert isinstance(m, dict)
assert "features" in m
assert "slash_commands" in m["features"]
def test_each_slash_has_required_fields(self):
m = slack_app_manifest()
for entry in m["features"]["slash_commands"]:
assert entry["command"].startswith("/")
assert "description" in entry
assert "url" in entry
# should_escape must be present (Slack defaults to True which
# HTML-escapes args — we want the raw text)
assert "should_escape" in entry
def test_btw_is_in_manifest(self):
"""Regression: /btw must be a native Slack slash, not just a
/hermes subcommand."""
m = slack_app_manifest()
commands = [c["command"] for c in m["features"]["slash_commands"]]
assert "/btw" in commands
def test_custom_request_url(self):
m = slack_app_manifest(request_url="https://example.com/slack")
for entry in m["features"]["slash_commands"]:
assert entry["url"] == "https://example.com/slack"
# ---------------------------------------------------------------------------
# Config-gated gateway commands
# ---------------------------------------------------------------------------

View file

@ -0,0 +1,486 @@
"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
from __future__ import annotations
import io
import types
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
# ---------------------------------------------------------------------------
# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
# ---------------------------------------------------------------------------
@pytest.fixture()
def isolated_home(tmp_path, monkeypatch):
monkeypatch.setattr(Path, "home", lambda: tmp_path)
home = tmp_path / ".hermes"
home.mkdir(exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(home))
return tmp_path
def _write_config(home: Path, data: dict) -> None:
config_path = home / ".hermes" / "config.yaml"
config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
def _read_config(home: Path) -> dict:
config_path = home / ".hermes" / "config.yaml"
return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
# ---------------------------------------------------------------------------
# _read_chain / _write_chain
# ---------------------------------------------------------------------------
class TestReadChain:
def test_returns_empty_list_when_unset(self):
from hermes_cli.fallback_cmd import _read_chain
assert _read_chain({}) == []
def test_reads_new_list_format(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {
"fallback_providers": [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
]
}
assert _read_chain(cfg) == [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
]
def test_migrates_legacy_single_dict(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
def test_skips_incomplete_entries(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {
"fallback_providers": [
{"provider": "openrouter"}, # missing model
{"model": "gpt-5.4"}, # missing provider
{"provider": "nous", "model": "foo"}, # valid
"not-a-dict", # noise
]
}
assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
def test_returns_copies_not_aliases(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
result = _read_chain(cfg)
result[0]["provider"] = "mutated"
assert cfg["fallback_providers"][0]["provider"] == "nous"
# ---------------------------------------------------------------------------
# _extract_fallback_from_model_cfg
# ---------------------------------------------------------------------------
class TestExtractFallback:
def test_extracts_from_default_field(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
assert _extract_fallback_from_model_cfg(model_cfg) == {
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
}
def test_extracts_optional_base_url_and_api_mode(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
model_cfg = {
"provider": "custom",
"default": "local-model",
"base_url": "http://localhost:11434/v1",
"api_mode": "chat_completions",
}
assert _extract_fallback_from_model_cfg(model_cfg) == {
"provider": "custom",
"model": "local-model",
"base_url": "http://localhost:11434/v1",
"api_mode": "chat_completions",
}
def test_returns_none_without_provider(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
def test_returns_none_without_model(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
def test_returns_none_for_non_dict(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg("plain-string") is None
assert _extract_fallback_from_model_cfg(None) is None
# ---------------------------------------------------------------------------
# cmd_fallback_list
# ---------------------------------------------------------------------------
class TestListCommand:
def test_list_empty(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "No fallback providers configured" in out
assert "hermes fallback add" in out
def test_list_with_entries(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
"fallback_providers": [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4"},
],
})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "Fallback chain (2 entries)" in out
assert "anthropic/claude-sonnet-4.6" in out
assert "Hermes-4" in out
# Primary should be shown too
assert "claude-sonnet-4-6" in out
def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
_write_config(isolated_home, {
"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "1 entry" in out
assert "gpt-5.4" in out
# ---------------------------------------------------------------------------
# cmd_fallback_add — mock select_provider_and_model
# ---------------------------------------------------------------------------
class TestAddCommand:
def test_add_appends_new_entry(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
# Simulate what the real picker does: writes the selection to config["model"]
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {
"provider": "openrouter",
"default": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Primary is preserved
assert cfg["model"]["provider"] == "anthropic"
assert cfg["model"]["default"] == "claude-sonnet-4-6"
# Fallback was appended
assert cfg["fallback_providers"] == [
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
]
out = capsys.readouterr().out
assert "Added fallback" in out
def test_add_rejects_duplicate(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
],
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Should still have exactly one entry
assert len(cfg["fallback_providers"]) == 1
out = capsys.readouterr().out
assert "already in the fallback chain" in out
def test_add_rejects_same_as_primary(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "openrouter", "default": "gpt-5.4"},
})
def fake_picker(args=None):
# User picks the same thing that's already the primary
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
out = capsys.readouterr().out
assert "matches the current primary" in out
def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
"""The picker mutates config["model"]; fallback_add must restore the primary."""
_write_config(isolated_home, {
"model": {
"provider": "anthropic",
"default": "claude-sonnet-4-6",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
},
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {
"provider": "openrouter",
"default": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Primary exactly as it was
assert cfg["model"]["provider"] == "anthropic"
assert cfg["model"]["default"] == "claude-sonnet-4-6"
assert cfg["model"]["base_url"] == "https://api.anthropic.com"
assert cfg["model"]["api_mode"] == "anthropic_messages"
# Fallback added
assert len(cfg["fallback_providers"]) == 1
assert cfg["fallback_providers"][0]["provider"] == "openrouter"
def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
# User cancelled — no change to config
pass
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
out = capsys.readouterr().out
# Either "No fallback added" (picker fully cancelled) or "matches the current primary"
# (picker left config untouched) — both indicate a non-add outcome.
assert ("No fallback added" in out) or ("matches the current primary" in out)
def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
"""Simulate picker explicitly clearing model.default (unusual but possible)."""
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "", "default": ""}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
out = capsys.readouterr().out
assert "No fallback added" in out
# ---------------------------------------------------------------------------
# cmd_fallback_remove
# ---------------------------------------------------------------------------
class TestRemoveCommand:
def test_remove_empty_chain(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
out = capsys.readouterr().out
assert "nothing to remove" in out
def test_remove_selected_entry(self, isolated_home, capsys):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "nous", "model": "Hermes-4"},
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
],
})
# Picker returns index 1 (the middle entry, "nous / Hermes-4")
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert cfg["fallback_providers"] == [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
]
out = capsys.readouterr().out
assert "Removed fallback" in out
assert "Hermes-4" in out
def test_remove_cancel_keeps_chain(self, isolated_home):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
],
})
# Cancel = last item (index == len(chain) == 1 in our menu)
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert len(cfg["fallback_providers"]) == 1
# ---------------------------------------------------------------------------
# cmd_fallback_clear
# ---------------------------------------------------------------------------
class TestClearCommand:
def test_clear_empty_chain(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
out = capsys.readouterr().out
assert "nothing to clear" in out
def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "nous", "model": "Hermes-4"},
],
})
monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert cfg.get("fallback_providers") == []
out = capsys.readouterr().out
assert "Fallback chain cleared" in out
def test_clear_cancelled(self, isolated_home, monkeypatch):
_write_config(isolated_home, {
"fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
})
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert len(cfg["fallback_providers"]) == 1
# ---------------------------------------------------------------------------
# cmd_fallback dispatcher
# ---------------------------------------------------------------------------
class TestDispatcher:
def test_no_subcommand_lists(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command=None))
out = capsys.readouterr().out
assert "No fallback providers configured" in out
def test_list_alias(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
out = capsys.readouterr().out
assert "No fallback providers configured" in out
def test_remove_alias(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
out = capsys.readouterr().out
assert "nothing to remove" in out
def test_unknown_subcommand_exits(self, isolated_home):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
with pytest.raises(SystemExit):
cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
# ---------------------------------------------------------------------------
# argparse wiring — verify the subparser is registered
# ---------------------------------------------------------------------------
class TestArgparseWiring:
"""Verify `hermes fallback` is wired into main.py's argparse tree.
main() builds the parser inline, so we invoke main([...]) via subprocess
with --help to introspect registered subcommands without side effects.
"""
def test_fallback_help_lists_subcommands(self):
import subprocess
import sys
result = subprocess.run(
[sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
capture_output=True,
text=True,
timeout=30,
)
# --help exits 0
assert result.returncode == 0, f"stderr: {result.stderr}"
out = result.stdout + result.stderr
# All four subcommands should appear in help
assert "list" in out
assert "add" in out
assert "remove" in out
assert "clear" in out

View file

@ -0,0 +1,284 @@
"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback."""
from __future__ import annotations
import json
import time
from pathlib import Path
from unittest.mock import patch
import pytest
@pytest.fixture
def isolated_home(tmp_path, monkeypatch):
"""Isolate HERMES_HOME + reset any module-level catalog cache per test."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Force a fresh catalog module state for each test.
import importlib
from hermes_cli import model_catalog
importlib.reload(model_catalog)
yield home
model_catalog.reset_cache()
def _valid_manifest() -> dict:
return {
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {"source": "test"},
"providers": {
"openrouter": {
"metadata": {"display_name": "OpenRouter"},
"models": [
{"id": "anthropic/claude-opus-4.7", "description": "recommended"},
{"id": "openai/gpt-5.4", "description": ""},
{"id": "openrouter/elephant-alpha", "description": "free"},
],
},
"nous": {
"metadata": {"display_name": "Nous Portal"},
"models": [
{"id": "anthropic/claude-opus-4.7"},
{"id": "moonshotai/kimi-k2.6"},
],
},
},
}
class TestValidation:
def test_accepts_well_formed_manifest(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
assert _validate_manifest(_valid_manifest()) is True
def test_rejects_non_dict(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
assert _validate_manifest("string") is False
assert _validate_manifest([]) is False
assert _validate_manifest(None) is False
def test_rejects_missing_version(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
del m["version"]
assert _validate_manifest(m) is False
def test_rejects_future_version(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["version"] = 999
assert _validate_manifest(m) is False
def test_rejects_missing_providers(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
del m["providers"]
assert _validate_manifest(m) is False
def test_rejects_malformed_model_entry(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["providers"]["openrouter"]["models"][0] = {"id": ""} # empty id
assert _validate_manifest(m) is False
def test_rejects_non_string_model_id(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["providers"]["openrouter"]["models"][0] = {"id": 42}
assert _validate_manifest(m) is False
class TestFetchSuccess:
def test_fetch_and_cache_writes_disk(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
result = model_catalog.get_catalog(force_refresh=True)
assert result == manifest
assert fetch.called
cache_file = model_catalog._cache_path()
assert cache_file.exists()
with open(cache_file) as fh:
assert json.load(fh) == manifest
def test_second_call_uses_in_process_cache(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
model_catalog.get_catalog(force_refresh=True)
model_catalog.get_catalog() # should not hit network again
assert fetch.call_count == 1
def test_force_refresh_always_refetches(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
model_catalog.get_catalog(force_refresh=True)
model_catalog.get_catalog(force_refresh=True)
assert fetch.call_count == 2
class TestFetchFailure:
def test_network_failure_returns_empty_when_no_cache(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog(force_refresh=True)
assert result == {}
def test_network_failure_falls_back_to_disk_cache(self, isolated_home):
from hermes_cli import model_catalog
# Prime disk cache with a fresh copy.
manifest = _valid_manifest()
with patch.object(model_catalog, "_fetch_manifest", return_value=manifest):
model_catalog.get_catalog(force_refresh=True)
# Now wipe in-process cache and simulate network failure on refetch.
model_catalog.reset_cache()
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog(force_refresh=True)
assert result == manifest
def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
# Write stale cache directly (mtime in the past).
cache = model_catalog._cache_path()
cache.parent.mkdir(parents=True, exist_ok=True)
with open(cache, "w") as fh:
json.dump(manifest, fh)
old = time.time() - 30 * 24 * 3600 # 30 days ago
import os as _os
_os.utime(cache, (old, old))
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog()
# Stale cache is better than nothing.
assert result == manifest
class TestCuratedAccessors:
def test_openrouter_returns_tuples(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = model_catalog.get_curated_openrouter_models()
assert result == [
("anthropic/claude-opus-4.7", "recommended"),
("openai/gpt-5.4", ""),
("openrouter/elephant-alpha", "free"),
]
def test_nous_returns_ids(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = model_catalog.get_curated_nous_models()
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
def test_openrouter_returns_none_when_catalog_empty(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
assert model_catalog.get_curated_openrouter_models() is None
def test_nous_returns_none_when_catalog_empty(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
assert model_catalog.get_curated_nous_models() is None
class TestDisabled:
def test_disabled_config_short_circuits(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog,
"_load_catalog_config",
return_value={
"enabled": False,
"url": "http://ignored",
"ttl_hours": 24.0,
"providers": {},
},
):
with patch.object(model_catalog, "_fetch_manifest") as fetch:
result = model_catalog.get_catalog()
assert result == {}
fetch.assert_not_called()
class TestProviderOverride:
def test_override_url_takes_precedence(self, isolated_home):
from hermes_cli import model_catalog
override_payload = {
"version": 1,
"providers": {
"openrouter": {
"models": [
{"id": "override/model", "description": "custom"},
]
}
},
}
def fake_fetch(url, timeout):
if "override" in url:
return override_payload
return _valid_manifest()
with patch.object(
model_catalog,
"_load_catalog_config",
return_value={
"enabled": True,
"url": "http://master",
"ttl_hours": 24.0,
"providers": {"openrouter": {"url": "http://override"}},
},
):
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
result = model_catalog.get_curated_openrouter_models()
assert result == [("override/model", "custom")]
class TestIntegrationWithModelsModule:
"""Exercise the fallback paths via the real callers in hermes_cli.models."""
def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog(
self, isolated_home
):
from hermes_cli import model_catalog
from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = get_curated_nous_model_ids()
assert result == list(_PROVIDER_MODELS["nous"])
def test_curated_nous_ids_prefers_manifest(self, isolated_home):
from hermes_cli import model_catalog
from hermes_cli.models import get_curated_nous_model_ids
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = get_curated_nous_model_ids()
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]

View file

@ -56,7 +56,7 @@ def three_source_env(monkeypatch, hub_env):
import tools.skills_tool as skills_tool
monkeypatch.setattr(hub, "HubLockFile", lambda: _DummyLockFile([_HUB_ENTRY]))
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: list(_ALL_THREE_SKILLS))
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: list(_ALL_THREE_SKILLS))
monkeypatch.setattr(skills_sync, "_read_manifest", lambda: dict(_BUILTIN_MANIFEST))
return hub_env
@ -107,7 +107,7 @@ def test_do_list_initializes_hub_dir(monkeypatch, hub_env):
import tools.skills_sync as skills_sync
import tools.skills_tool as skills_tool
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: [])
monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: [])
monkeypatch.setattr(skills_sync, "_read_manifest", lambda: {})
hub_dir = hub_env
@ -154,6 +154,74 @@ def test_do_list_filter_builtin(three_source_env):
assert "local-skill" not in output
def test_do_list_renders_status_column(three_source_env, monkeypatch):
"""Every list row should carry an enabled/disabled status (new in PR that
answered Mr Mochizuki's 'I just want to see what's live' question)."""
from agent import skill_utils
monkeypatch.setattr(skill_utils, "get_disabled_skill_names", lambda platform=None: set())
output = _capture()
assert "Status" in output
assert "enabled" in output.lower()
# Summary counts enabled skills.
assert "3 enabled, 0 disabled" in output
def test_do_list_marks_disabled_skills(three_source_env, monkeypatch):
from agent import skill_utils
# Simulate `skills.disabled: [hub-skill]` in config.
monkeypatch.setattr(
skill_utils, "get_disabled_skill_names",
lambda platform=None: {"hub-skill"},
)
output = _capture()
# Row still appears (no --enabled-only), but marked disabled
assert "hub-skill" in output
assert "disabled" in output.lower()
assert "2 enabled, 1 disabled" in output
def test_do_list_enabled_only_hides_disabled(three_source_env, monkeypatch):
from agent import skill_utils
monkeypatch.setattr(
skill_utils, "get_disabled_skill_names",
lambda platform=None: {"hub-skill"},
)
sink = StringIO()
console = Console(file=sink, force_terminal=False, color_system=None)
do_list(enabled_only=True, console=console)
output = sink.getvalue()
assert "hub-skill" not in output
assert "builtin-skill" in output
assert "local-skill" in output
assert "enabled only" in output.lower()
assert "2 enabled shown" in output
def test_do_list_platform_env_is_ignored(three_source_env, monkeypatch):
"""`hermes skills list` reads the active profile's config via
HERMES_HOME (swapped by -p), so it must NOT pass a platform arg to
``get_disabled_skill_names`` otherwise per-platform overrides
would silently leak in from HERMES_PLATFORM env."""
from agent import skill_utils
seen = {}
def _fake(platform=None):
seen["platform"] = platform
return set()
monkeypatch.setattr(skill_utils, "get_disabled_skill_names", _fake)
_capture()
assert seen["platform"] is None
def test_do_check_reports_available_updates(monkeypatch):
output = _capture_check(monkeypatch, [
{"name": "hub-skill", "source": "skills.sh", "status": "update_available"},

View file

@ -0,0 +1,78 @@
"""Behavior tests for the class-first skill review prompts.
The skill review / combined review prompts steer the background review agent
toward generalizing existing skills rather than accumulating near-duplicates.
These tests assert the behavioral *instructions* are present they do NOT
snapshot the full prompt text (change-detector).
"""
from run_agent import AIAgent
def test_skill_review_prompt_instructs_survey_first():
"""Prompt must tell the reviewer to list existing skills before deciding."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "skills_list" in prompt, "must instruct the reviewer to call skills_list"
assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates"
assert "SURVEY" in prompt, "must name the survey step explicitly"
def test_skill_review_prompt_is_class_first():
"""Prompt must steer toward the CLASS of task, not the specific task."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "CLASS" in prompt, "must tell the reviewer to think about the task class"
assert "class level" in prompt, "must anchor naming at the class level"
def test_skill_review_prompt_prefers_updating_existing():
"""Prompt must prefer generalizing an existing skill over creating a new one."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, (
"must state the update-over-create preference"
)
assert "ONLY CREATE A NEW SKILL" in prompt, (
"must gate new-skill creation behind a last-resort clause"
)
def test_skill_review_prompt_flags_overlap_for_followup():
"""Prompt must ask the reviewer to note overlapping skills for future review."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol"
def test_skill_review_prompt_preserves_opt_out_clause():
"""The 'Nothing to save.' escape clause must remain."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "Nothing to save." in prompt
def test_combined_review_prompt_keeps_memory_section():
"""Combined prompt must still cover memory review."""
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "**Memory**" in prompt
assert "memory tool" in prompt
def test_combined_review_prompt_skills_section_is_class_first():
"""The **Skills** half of the combined prompt must follow the same protocol."""
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "**Skills**" in prompt
assert "SURVEY" in prompt
assert "CLASS" in prompt
assert "skills_list" in prompt
assert "ONLY CREATE A NEW SKILL" in prompt
def test_combined_review_prompt_preserves_opt_out_clause():
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "Nothing to save." in prompt
def test_memory_review_prompt_unchanged_in_structure():
"""Memory-only review prompt stays focused on user facts — not touched by this change."""
prompt = AIAgent._MEMORY_REVIEW_PROMPT
# Guardrails: the memory-only prompt must NOT mention skills/surveys.
assert "skills_list" not in prompt
assert "SURVEY" not in prompt
assert "memory tool" in prompt

View file

@ -1485,6 +1485,48 @@ class TestListSessionsRich:
assert "\n" not in sessions[0]["preview"]
assert "Line one Line two" in sessions[0]["preview"]
def test_branch_session_visible_in_list(self, db):
"""Branch sessions (parent ended with 'branched') must appear in list_sessions_rich."""
db.create_session("parent", "cli")
db.end_session("parent", "branched")
db.create_session("branch", "cli", parent_session_id="parent")
db.append_message("branch", "user", "Exploring the alternative approach")
sessions = db.list_sessions_rich()
ids = [s["id"] for s in sessions]
assert "branch" in ids, "Branch session should be visible in default list"
def test_subagent_session_still_hidden(self, db):
"""Sub-agent children (parent NOT ended with 'branched') remain hidden."""
db.create_session("root", "cli")
db.create_session("delegate", "cli", parent_session_id="root")
sessions = db.list_sessions_rich()
ids = [s["id"] for s in sessions]
assert "delegate" not in ids, "Delegate sub-agent should not appear in default list"
assert "root" in ids
def test_compression_child_still_hidden(self, db):
"""Compression continuation sessions remain hidden (parent ended with 'compression')."""
import time as _time
t0 = _time.time()
db.create_session("root", "cli")
db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root"))
db._conn.execute(
"UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?",
(t0 + 1800, "root"),
)
db._conn.commit()
db.create_session("continuation", "cli", parent_session_id="root")
db._conn.execute(
"UPDATE sessions SET started_at=? WHERE id=?", (t0 + 1801, "continuation")
)
db._conn.commit()
sessions = db.list_sessions_rich(project_compression_tips=False)
ids = [s["id"] for s in sessions]
assert "continuation" not in ids, "Compression continuation should stay hidden"
class TestCompressionChainProjection:
"""Tests for lineage-aware list_sessions_rich — compressed conversations

View file

@ -1835,3 +1835,112 @@ def test_model_options_propagates_list_exception(monkeypatch):
assert "error" in resp
assert resp["error"]["code"] == 5033
assert "catalog blew up" in resp["error"]["message"]
# ---------------------------------------------------------------------------
# prompt.submit — auto-title
# ---------------------------------------------------------------------------
class _ImmediateThread:
"""Runs the target callable synchronously so assertions can follow."""
def __init__(self, target=None, daemon=None):
self._target = target
def start(self):
self._target()
def test_prompt_submit_auto_titles_session_on_complete(monkeypatch):
"""maybe_auto_title is called after a successful (complete) prompt."""
class _Agent:
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
return {
"final_response": "Rome was founded in 753 BC.",
"messages": [
{"role": "user", "content": "Tell me about Rome"},
{"role": "assistant", "content": "Rome was founded in 753 BC."},
],
}
server._sessions["sid"] = _session(agent=_Agent())
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
monkeypatch.setattr(server, "_get_db", lambda: None)
with patch("agent.title_generator.maybe_auto_title") as mock_title:
server.handle_request(
{
"id": "1",
"method": "prompt.submit",
"params": {"session_id": "sid", "text": "Tell me about Rome"},
}
)
mock_title.assert_called_once()
args = mock_title.call_args.args
assert args[1] == "session-key"
assert args[2] == "Tell me about Rome"
assert args[3] == "Rome was founded in 753 BC."
def test_prompt_submit_skips_auto_title_when_interrupted(monkeypatch):
"""maybe_auto_title must NOT be called when the agent was interrupted."""
class _Agent:
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
return {
"final_response": "partial answer",
"interrupted": True,
"messages": [],
}
server._sessions["sid"] = _session(agent=_Agent())
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
monkeypatch.setattr(server, "_get_db", lambda: None)
with patch("agent.title_generator.maybe_auto_title") as mock_title:
server.handle_request(
{
"id": "1",
"method": "prompt.submit",
"params": {"session_id": "sid", "text": "Tell me about Rome"},
}
)
mock_title.assert_not_called()
def test_prompt_submit_skips_auto_title_when_response_empty(monkeypatch):
"""maybe_auto_title must NOT be called when the agent returns an empty reply."""
class _Agent:
def run_conversation(self, prompt, conversation_history=None, stream_callback=None):
return {
"final_response": "",
"messages": [],
}
server._sessions["sid"] = _session(agent=_Agent())
monkeypatch.setattr(server.threading, "Thread", _ImmediateThread)
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None)
monkeypatch.setattr(server, "render_message", lambda raw, cols: None)
monkeypatch.setattr(server, "_get_db", lambda: None)
with patch("agent.title_generator.maybe_auto_title") as mock_title:
server.handle_request(
{
"id": "1",
"method": "prompt.submit",
"params": {"session_id": "sid", "text": "Tell me about Rome"},
}
)
mock_title.assert_not_called()

View file

@ -0,0 +1,248 @@
"""Tests for hybrid browser-backend routing (LAN/localhost auto-local).
When a cloud browser provider (Browserbase / Browser-Use / Firecrawl) is
configured globally, ``browser.auto_local_for_private_urls`` (default True)
causes ``browser_navigate`` to transparently spawn a local Chromium sidecar
for URLs whose host resolves to a private/loopback/LAN address, while
public URLs continue to hit the cloud session in the same conversation.
These tests cover the routing decision layer session_key selection,
sidecar detection, last-active-session tracking, and the config toggle.
The downstream session creation is covered by test_browser_cloud_fallback.py.
"""
from unittest.mock import Mock
import pytest
import tools.browser_tool as browser_tool
@pytest.fixture(autouse=True)
def _reset_routing_state(monkeypatch):
"""Clear module-level caches so each test starts clean."""
monkeypatch.setattr(browser_tool, "_active_sessions", {})
monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None)
monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False)
monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls_resolved", False)
monkeypatch.setattr(browser_tool, "_cached_auto_local_for_private_urls", True)
monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None)
# Default: no CDP override, no Camofox
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
class TestNavigationSessionKey:
"""Tests for _navigation_session_key URL-based routing decisions."""
def test_public_url_uses_bare_task_id(self, monkeypatch):
"""Public URL with cloud provider configured → bare task_id (cloud)."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "https://github.com/x/y")
assert key == "default"
def test_localhost_routes_to_local_sidecar(self, monkeypatch):
"""``localhost`` URL → ``::local`` suffix when cloud configured + flag on."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
assert key == "default::local"
def test_loopback_ipv4_routes_to_local_sidecar(self, monkeypatch):
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "http://127.0.0.1:8080/")
assert key == "default::local"
def test_rfc1918_lan_routes_to_local_sidecar(self, monkeypatch):
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "http://192.168.1.50:8000/")
assert key == "default::local"
def test_ipv6_loopback_routes_to_local_sidecar(self, monkeypatch):
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "http://[::1]:3000/")
assert key == "default::local"
def test_public_ip_literal_uses_bare_task_id(self, monkeypatch):
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key("default", "https://8.8.8.8/")
assert key == "default"
def test_mdns_local_hostname_routes_to_sidecar(self, monkeypatch):
"""``*.local`` mDNS / ``*.lan`` / ``*.internal`` hostnames route to sidecar."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
for host in ("raspberrypi.local", "printer.lan", "db.internal"):
key = browser_tool._navigation_session_key("default", f"http://{host}/")
assert key == "default::local", f"host {host!r} did not route to sidecar"
def test_no_cloud_provider_stays_on_bare_task_id(self, monkeypatch):
"""When cloud provider is not configured, no hybrid routing happens."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
assert key == "default"
def test_camofox_mode_stays_on_bare_task_id(self, monkeypatch):
"""Camofox is already local — no hybrid routing needed."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True)
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
assert key == "default"
def test_cdp_override_stays_on_bare_task_id(self, monkeypatch):
"""A user-supplied CDP endpoint owns the whole session — no hybrid."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://localhost:9222")
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
assert key == "default"
def test_feature_flag_off_disables_hybrid_routing(self, monkeypatch):
"""``auto_local_for_private_urls: false`` keeps private URLs on cloud."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls", lambda: False)
key = browser_tool._navigation_session_key("default", "http://localhost:3000/")
assert key == "default"
def test_none_task_id_defaults(self, monkeypatch):
"""``None`` task_id resolves to 'default'."""
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock())
key = browser_tool._navigation_session_key(None, "http://localhost:3000/")
assert key == "default::local"
class TestSessionKeyHelpers:
def test_is_local_sidecar_key(self):
assert browser_tool._is_local_sidecar_key("default::local")
assert browser_tool._is_local_sidecar_key("my_task::local")
assert not browser_tool._is_local_sidecar_key("default")
assert not browser_tool._is_local_sidecar_key("my_task")
def test_last_session_key_falls_back_to_task_id(self, monkeypatch):
"""Without a recorded last-active key, returns the bare task_id."""
monkeypatch.setattr(browser_tool, "_last_active_session_key", {})
assert browser_tool._last_session_key("default") == "default"
assert browser_tool._last_session_key("task-42") == "task-42"
assert browser_tool._last_session_key(None) == "default"
def test_last_session_key_returns_recorded_key(self, monkeypatch):
monkeypatch.setattr(
browser_tool,
"_last_active_session_key",
{"default": "default::local", "task-42": "task-42"},
)
assert browser_tool._last_session_key("default") == "default::local"
assert browser_tool._last_session_key("task-42") == "task-42"
# Unknown task_id still falls back
assert browser_tool._last_session_key("other") == "other"
class TestHybridRoutingSessionCreation:
"""_get_session_info must force a local session when the key carries ``::local``."""
def test_local_sidecar_key_skips_cloud_provider(self, monkeypatch):
"""A ``::local``-suffixed key creates a local session even when cloud is set."""
provider = Mock()
provider.create_session.return_value = {
"session_name": "should_not_be_used",
"bb_session_id": "bb_xxx",
"cdp_url": "wss://fake.browserbase.com/ws",
}
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
session = browser_tool._get_session_info("default::local")
assert provider.create_session.call_count == 0
assert session["bb_session_id"] is None
assert session["cdp_url"] is None
assert session["features"]["local"] is True
def test_bare_task_id_with_cloud_provider_uses_cloud(self, monkeypatch):
"""A bare task_id with cloud provider configured hits the cloud path."""
provider = Mock()
provider.create_session.return_value = {
"session_name": "cloud-sess",
"bb_session_id": "bb_123",
"cdp_url": "wss://real.browserbase.com/ws",
}
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None)
monkeypatch.setattr(browser_tool, "_resolve_cdp_override", lambda u: u)
session = browser_tool._get_session_info("default")
assert provider.create_session.call_count == 1
assert session["bb_session_id"] == "bb_123"
class TestCleanupHybridSessions:
"""cleanup_browser(bare_task_id) must reap both cloud + local sidecar sessions."""
def test_cleanup_reaps_both_primary_and_sidecar(self, monkeypatch):
"""Given a bare task_id with both sessions alive, both get cleaned."""
reaped = []
def _fake_cleanup_one(key):
reaped.append(key)
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
monkeypatch.setattr(
browser_tool,
"_active_sessions",
{
"default": {"session_name": "cloud_sess"},
"default::local": {"session_name": "local_sess"},
},
)
monkeypatch.setattr(
browser_tool, "_last_active_session_key", {"default": "default::local"}
)
browser_tool.cleanup_browser("default")
assert set(reaped) == {"default", "default::local"}
# last-active pointer dropped
assert "default" not in browser_tool._last_active_session_key
def test_cleanup_reaps_only_primary_when_no_sidecar(self, monkeypatch):
"""When no sidecar exists, only the primary is reaped."""
reaped = []
def _fake_cleanup_one(key):
reaped.append(key)
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
monkeypatch.setattr(
browser_tool,
"_active_sessions",
{"default": {"session_name": "cloud_sess"}},
)
browser_tool.cleanup_browser("default")
assert reaped == ["default"]
def test_cleanup_sidecar_directly_keeps_primary(self, monkeypatch):
"""Calling cleanup with a ``::local`` key reaps only the sidecar."""
reaped = []
def _fake_cleanup_one(key):
reaped.append(key)
monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one)
monkeypatch.setattr(
browser_tool,
"_active_sessions",
{
"default": {"session_name": "cloud_sess"},
"default::local": {"session_name": "local_sess"},
},
)
monkeypatch.setattr(
browser_tool, "_last_active_session_key", {"default": "default::local"}
)
browser_tool.cleanup_browser("default::local")
assert reaped == ["default::local"]
# Last-active pointer NOT dropped (primary task is still alive)
assert browser_tool._last_active_session_key.get("default") == "default::local"

View file

@ -0,0 +1,210 @@
"""Tests for credential_pool .env fallback and auth credential_pool lookup.
Covers the fix from #15914 / PR #15920:
- _seed_from_env reads API keys from ~/.hermes/.env when not in os.environ
- _resolve_api_key_provider_secret falls back to credential_pool when env vars are empty
- env vars take priority over .env file (handled by get_env_value itself)
- env vars take priority over credential pool (fallback only kicks in when env is empty)
"""
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
def _make_pconfig(provider_id="deepseek", env_vars=None):
"""Create a minimal ProviderConfig for testing.
Default provider_id is 'deepseek' because it's a real api_key provider
in PROVIDER_REGISTRY (needed for _seed_from_env's generic path).
"""
from hermes_cli.auth import ProviderConfig
return ProviderConfig(
id=provider_id,
name=provider_id.title(),
auth_type="api_key",
api_key_env_vars=tuple(env_vars or [f"{provider_id.upper()}_API_KEY"]),
)
@pytest.fixture
def isolated_hermes_home(tmp_path, monkeypatch):
"""Point HERMES_HOME at a temp dir and clear known API key env vars.
Also invalidates any cached get_env_value state by patching Path.home().
"""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Clear all known API key env vars so get_env_value falls through to .env
for key in [
"OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OPENROUTER_API_KEY",
"ZAI_API_KEY", "DEEPSEEK_API_KEY", "ANTHROPIC_TOKEN",
"CLAUDE_CODE_OAUTH_TOKEN", "OPENAI_BASE_URL",
]:
monkeypatch.delenv(key, raising=False)
return home
def _write_env_file(home: Path, **kwargs) -> None:
"""Write key=value pairs to ~/.hermes/.env."""
lines = [f"{k}={v}" for k, v in kwargs.items()]
(home / ".env").write_text("\n".join(lines) + "\n")
class TestCredentialPoolSeedsFromDotEnv:
"""_seed_from_env must read keys from ~/.hermes/.env, not just os.environ.
This is the load-bearing behaviour for the fix: when a user adds a key to
.env mid-session or via a non-CLI entry point that doesn't run
load_hermes_dotenv, the credential pool must still discover it.
"""
def test_deepseek_key_from_dotenv_only(self, isolated_hermes_home):
"""Key in .env but not os.environ → _seed_from_env adds a pool entry."""
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-only-12345")
assert "DEEPSEEK_API_KEY" not in os.environ
from agent.credential_pool import _seed_from_env
entries = []
changed, active_sources = _seed_from_env("deepseek", entries)
assert changed is True
assert "env:DEEPSEEK_API_KEY" in active_sources
assert any(
e.access_token == "sk-dotenv-only-12345"
and e.source == "env:DEEPSEEK_API_KEY"
for e in entries
), f"Expected seeded entry with dotenv key, got: {[(e.source, e.access_token) for e in entries]}"
def test_openrouter_key_from_dotenv_only(self, isolated_hermes_home):
"""OpenRouter path has its own branch — verify it also reads .env."""
_write_env_file(isolated_hermes_home, OPENROUTER_API_KEY="sk-or-dotenv-abc")
assert "OPENROUTER_API_KEY" not in os.environ
from agent.credential_pool import _seed_from_env
entries = []
changed, active_sources = _seed_from_env("openrouter", entries)
assert changed is True
assert "env:OPENROUTER_API_KEY" in active_sources
assert any(
e.access_token == "sk-or-dotenv-abc" for e in entries
)
def test_empty_dotenv_no_entries(self, isolated_hermes_home):
"""No .env file, no env vars → no entries seeded (and no crash)."""
from agent.credential_pool import _seed_from_env
entries = []
changed, active_sources = _seed_from_env("deepseek", entries)
assert changed is False
assert active_sources == set()
assert entries == []
def test_os_environ_still_wins_over_dotenv(self, isolated_hermes_home, monkeypatch):
"""get_env_value checks os.environ first — verify seeding picks that up."""
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-stale")
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-fresh-xyz")
from agent.credential_pool import _seed_from_env
entries = []
changed, _ = _seed_from_env("deepseek", entries)
assert changed is True
seeded = [e for e in entries if e.source == "env:DEEPSEEK_API_KEY"]
assert len(seeded) == 1
assert seeded[0].access_token == "sk-env-fresh-xyz"
class TestAuthResolvesFromDotEnv:
"""_resolve_api_key_provider_secret must also read from ~/.hermes/.env."""
def test_key_from_dotenv_only(self, isolated_hermes_home):
"""Key in .env but not os.environ → _resolve returns it with the env var source."""
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-resolve-789")
assert "DEEPSEEK_API_KEY" not in os.environ
from hermes_cli.auth import _resolve_api_key_provider_secret
key, source = _resolve_api_key_provider_secret(
provider_id="deepseek",
pconfig=_make_pconfig(),
)
assert key == "sk-dotenv-resolve-789"
assert source == "DEEPSEEK_API_KEY"
class TestAuthCredentialPoolFallback:
"""_resolve_api_key_provider_secret falls back to credential pool when env + dotenv are empty."""
def test_credential_pool_fallback_structure(self, isolated_hermes_home):
"""Empty env + empty .env → auth falls back to credential pool."""
mock_entry = MagicMock()
mock_entry.access_token = "test-pool-key-12345"
mock_entry.runtime_api_key = ""
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = True
mock_pool.peek.return_value = mock_entry
from hermes_cli.auth import _resolve_api_key_provider_secret
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
key, source = _resolve_api_key_provider_secret(
provider_id="deepseek",
pconfig=_make_pconfig(),
)
assert "test-pool-key-12345" in key
assert "credential_pool" in source
def test_credential_pool_empty_returns_empty(self, isolated_hermes_home):
"""Empty env + empty .env + empty pool → empty string."""
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = False
from hermes_cli.auth import _resolve_api_key_provider_secret
with patch("agent.credential_pool.load_pool", return_value=mock_pool):
key, source = _resolve_api_key_provider_secret(
provider_id="deepseek",
pconfig=_make_pconfig(),
)
assert key == ""
def test_env_var_takes_priority_over_pool(self, isolated_hermes_home, monkeypatch):
"""os.environ key wins — credential pool is NEVER consulted."""
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-key-first-abc123")
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = True
from hermes_cli.auth import _resolve_api_key_provider_secret
with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
key, source = _resolve_api_key_provider_secret(
provider_id="deepseek",
pconfig=_make_pconfig(),
)
assert key == "sk-env-key-first-abc123"
assert source == "DEEPSEEK_API_KEY"
# Pool should not even have been loaded — env var satisfied the request first
mp.assert_not_called()
def test_dotenv_takes_priority_over_pool(self, isolated_hermes_home):
"""Key in .env beats credential pool — pool only fires when both env sources are empty."""
_write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-priority-xyz")
assert "DEEPSEEK_API_KEY" not in os.environ
mock_pool = MagicMock()
mock_pool.has_credentials.return_value = True
from hermes_cli.auth import _resolve_api_key_provider_secret
with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp:
key, source = _resolve_api_key_provider_secret(
provider_id="deepseek",
pconfig=_make_pconfig(),
)
assert key == "sk-dotenv-priority-xyz"
assert source == "DEEPSEEK_API_KEY"
mp.assert_not_called()

View file

@ -491,11 +491,36 @@ def test_configure_callback_port_uses_explicit_port():
assert cfg["_resolved_port"] == 54321
def test_parse_base_url_strips_path():
"""_parse_base_url drops path components for OAuth discovery."""
from tools.mcp_oauth import _parse_base_url
def test_build_oauth_auth_preserves_server_url_path():
"""server_url with path is forwarded to OAuthClientProvider unmodified.
Regression for #16015: previously ``_parse_base_url`` stripped the path,
collapsing ``https://mcp.notion.com/mcp`` to ``https://mcp.notion.com`` and
breaking RFC 9728 protected-resource validation against servers whose PRM
advertises a path-scoped resource (Notion). The MCP SDK strips the path
itself for authorization-server discovery via
``OAuthContext.get_authorization_base_url``; Hermes must not pre-strip.
"""
from tools import mcp_oauth
captured: dict = {}
class _FakeProvider:
def __init__(self, **kwargs):
captured.update(kwargs)
with patch.object(mcp_oauth, "_OAUTH_AVAILABLE", True), \
patch.object(mcp_oauth, "OAuthClientProvider", _FakeProvider), \
patch.object(mcp_oauth, "_is_interactive", return_value=True), \
patch.object(mcp_oauth, "_maybe_preregister_client"), \
patch.object(mcp_oauth, "HermesTokenStorage") as mock_storage_cls:
mock_storage_cls.return_value = MagicMock(has_cached_tokens=lambda: True)
build_oauth_auth(
server_name="notion",
server_url="https://mcp.notion.com/mcp",
oauth_config={},
)
assert captured["server_url"] == "https://mcp.notion.com/mcp"
assert _parse_base_url("https://example.com/mcp/v1") == "https://example.com"
assert _parse_base_url("https://example.com") == "https://example.com"
assert _parse_base_url("https://host.example.com:8080/api") == "https://host.example.com:8080"

View file

@ -483,6 +483,147 @@ def _is_local_backend() -> bool:
return _is_camofox_mode() or _get_cloud_provider() is None
_auto_local_for_private_urls_resolved = False
_cached_auto_local_for_private_urls: bool = True
def _auto_local_for_private_urls() -> bool:
"""Return whether a cloud-configured install should auto-spawn a local
Chromium for LAN/localhost URLs.
Reads ``browser.auto_local_for_private_urls`` once (default ``True``) and
caches it for the process lifetime. When enabled, ``browser_navigate``
routes URLs whose host resolves to a private/loopback/LAN address to a
local headless Chromium sidecar even when a cloud provider (Browserbase
/ Browser-Use / Firecrawl) is configured globally. Public URLs continue
to use the cloud provider in the same conversation.
"""
global _auto_local_for_private_urls_resolved, _cached_auto_local_for_private_urls
if _auto_local_for_private_urls_resolved:
return _cached_auto_local_for_private_urls
_auto_local_for_private_urls_resolved = True
try:
from hermes_cli.config import read_raw_config
cfg = read_raw_config()
browser_cfg = cfg.get("browser", {})
if isinstance(browser_cfg, dict) and "auto_local_for_private_urls" in browser_cfg:
_cached_auto_local_for_private_urls = bool(
browser_cfg.get("auto_local_for_private_urls")
)
except Exception as e:
logger.debug("Could not read auto_local_for_private_urls from config: %s", e)
return _cached_auto_local_for_private_urls
def _url_is_private(url: str) -> bool:
"""Return True when the URL's host resolves to a private/LAN/loopback address.
Reuses ``tools.url_safety.is_safe_url`` as the oracle if the SSRF check
would reject the URL, we treat it as "private" for routing purposes. DNS
resolution failures are treated as NOT private (fall through to whatever
backend is configured, which will surface the DNS error naturally).
"""
try:
from tools.url_safety import is_safe_url
# is_safe_url returns False for private/loopback/link-local/CGNAT AND
# for DNS failures. We only want the private-network case here, so
# we parse + check the host shape as a DNS-failure sieve first.
from urllib.parse import urlparse
import ipaddress
import socket
parsed = urlparse(url)
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
if not hostname:
return False
# Literal IP → check directly
try:
ip = ipaddress.ip_address(hostname)
return (
ip.is_private
or ip.is_loopback
or ip.is_link_local
or ip in ipaddress.ip_network("100.64.0.0/10")
)
except ValueError:
pass
# Hostname — must resolve to confirm it's private (bare "localhost"
# resolves to 127.0.0.1 via /etc/hosts). Short-circuit on obvious
# names to avoid a DNS hop.
if hostname in ("localhost",) or hostname.endswith(".localhost"):
return True
if hostname.endswith(".local") or hostname.endswith(".lan") or hostname.endswith(".internal"):
return True
try:
addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
except socket.gaierror:
return False # DNS fail → not private, let the normal path fail
for _, _, _, _, sockaddr in addr_info:
try:
ip = ipaddress.ip_address(sockaddr[0])
except ValueError:
continue
if (
ip.is_private
or ip.is_loopback
or ip.is_link_local
or ip in ipaddress.ip_network("100.64.0.0/10")
):
return True
return False
except Exception as exc:
logger.debug("URL-privacy check failed for %s: %s", url, exc)
return False
def _navigation_session_key(task_id: str, url: str) -> str:
"""Pick the session key that should handle ``url`` for ``task_id``.
Returns the bare task_id unless ALL of these are true:
1. A cloud provider is configured (``_get_cloud_provider()`` is not None).
2. Auto-local routing is enabled (``browser.auto_local_for_private_urls``,
default True).
3. The URL resolves to a private/LAN/loopback address.
4. A CDP override is not active (that path owns the whole session).
5. Camofox mode is not active (Camofox is already local-only).
When all are true, returns ``f"{task_id}::local"`` so the hybrid-routing
path spawns a local Chromium sidecar while the cloud session (if any)
continues to serve public URLs.
"""
if task_id is None:
task_id = "default"
if _get_cdp_override():
return task_id
if _is_camofox_mode():
return task_id
if _get_cloud_provider() is None:
return task_id
if not _auto_local_for_private_urls():
return task_id
if not _url_is_private(url):
return task_id
return f"{task_id}{_LOCAL_SUFFIX}"
def _is_local_sidecar_key(session_key: str) -> bool:
"""Return True when ``session_key`` is a hybrid-routing local sidecar."""
return session_key.endswith(_LOCAL_SUFFIX)
def _last_session_key(task_id: str) -> str:
"""Return the session key to use for a non-nav browser tool call.
If a previous ``browser_navigate`` on this task_id set a last-active key,
use it so snapshot/click/fill/etc. hit the same session. Otherwise fall
back to the bare task_id (matches original behavior for tasks that never
triggered hybrid routing).
"""
if task_id is None:
task_id = "default"
return _last_active_session_key.get(task_id, task_id)
def _allow_private_urls() -> bool:
"""Return whether the browser is allowed to navigate to private/internal addresses.
@ -521,10 +662,25 @@ def _socket_safe_tmpdir() -> str:
return tempfile.gettempdir()
# Track active sessions per task
# Track active sessions per "session key".
#
# A "session key" is either the bare task_id (cloud/default path) OR a composite
# like f"{task_id}::local" when the hybrid-routing feature spawns a local sidecar
# browser for a LAN/localhost URL while a cloud provider is configured globally.
# Both forms flow through the same _active_sessions / _run_browser_command /
# cleanup_browser code paths — the key is opaque to those internals.
#
# Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
_active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...}
_recording_sessions: set = set() # task_ids with active recordings
_active_sessions: Dict[str, Dict[str, str]] = {} # session_key -> {session_name, ...}
_recording_sessions: set = set() # session_keys with active recordings
# Tracks the most recent session_key used per task_id. Set by browser_navigate()
# after it chooses a backend for a URL; read by every non-nav browser tool
# (snapshot/click/fill/eval/...) so they target the session that served the last
# navigation. Without this, a task that navigated to localhost on the local
# sidecar would fall back to the cloud session on its next snapshot call.
_last_active_session_key: Dict[str, str] = {} # task_id -> session_key
_LOCAL_SUFFIX = "::local"
# Flag to track if cleanup has been done
_cleanup_done = False
@ -1014,37 +1170,48 @@ def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]:
def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
"""
Get or create session info for the given task.
Get or create session info for the given session key.
In cloud mode, creates a Browserbase session with proxies enabled.
In local mode, generates a session name for agent-browser --session.
Also starts the inactivity cleanup thread and updates activity tracking.
Thread-safe: multiple subagents can call this concurrently.
Args:
task_id: Unique identifier for the task
task_id: Session key. Normally the task_id as-is, but may carry the
``::local`` suffix for the hybrid-routing local sidecar in that
case the cloud provider is skipped even when one is configured,
and a local Chromium session is created instead.
Returns:
Dict with session_name (always), bb_session_id + cdp_url (cloud only)
"""
if task_id is None:
task_id = "default"
# Start the cleanup thread if not running (handles inactivity timeouts)
_start_browser_cleanup_thread()
# Update activity timestamp for this session
_update_session_activity(task_id)
with _cleanup_lock:
# Check if we already have a session for this task
if task_id in _active_sessions:
return _active_sessions[task_id]
# Hybrid routing: session keys ending with ``::local`` force a local
# Chromium regardless of the globally-configured cloud provider. Public
# URLs in the same conversation continue to use the cloud session under
# the bare task_id key.
force_local = _is_local_sidecar_key(task_id)
# Create session outside the lock (network call in cloud mode)
cdp_override = _get_cdp_override()
if cdp_override:
if cdp_override and not force_local:
session_info = _create_cdp_session(task_id, cdp_override)
elif force_local:
session_info = _create_local_session(task_id)
else:
provider = _get_cloud_provider()
if provider is None:
@ -1081,7 +1248,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
session_info["fallback_from_cloud"] = True
session_info["fallback_reason"] = str(e)
session_info["fallback_provider"] = provider_name
with _cleanup_lock:
# Double-check: another thread may have created a session while we
# were doing the network call. Use the existing one to avoid leaking
@ -1093,7 +1260,9 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
# Lazy-start the CDP supervisor now that the session exists (if the
# backend surfaces a CDP URL via override or session_info["cdp_url"]).
# Idempotent; swallows errors. See _ensure_cdp_supervisor for details.
_ensure_cdp_supervisor(task_id)
# Skip for local sidecars — they have no CDP URL.
if not force_local:
_ensure_cdp_supervisor(task_id)
return session_info
@ -1521,9 +1690,21 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# SSRF protection — block private/internal addresses before navigating.
# Skipped for local backends (Camofox, headless Chromium without a cloud
# provider) because the agent already has full local network access via
# the terminal tool. Can also be opted out for cloud mode via
# ``browser.allow_private_urls`` in config.
if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url):
# the terminal tool. Also skipped when hybrid routing will auto-spawn a
# local Chromium sidecar for this URL (cloud provider configured +
# private URL + ``browser.auto_local_for_private_urls`` enabled) — the
# cloud provider never sees the URL in that case. Can also be opted
# out globally via ``browser.allow_private_urls`` in config.
effective_task_id = task_id or "default"
nav_session_key = _navigation_session_key(effective_task_id, url)
auto_local_this_nav = _is_local_sidecar_key(nav_session_key)
if (
not _is_local_backend()
and not auto_local_this_nav
and not _allow_private_urls()
and not _is_safe_url(url)
):
return json.dumps({
"success": False,
"error": "Blocked: URL targets a private or internal address",
@ -1543,19 +1724,31 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_navigate
return camofox_navigate(url, task_id)
effective_task_id = task_id or "default"
if auto_local_this_nav:
logger.info(
"browser_navigate: auto-routing %s to local Chromium sidecar "
"(cloud provider %s stays on cloud for public URLs; "
"set browser.auto_local_for_private_urls: false to disable)",
url,
type(_get_cloud_provider()).__name__ if _get_cloud_provider() else "none",
)
# Get session info to check if this is a new session
# (will create one with features logged if not exists)
session_info = _get_session_info(effective_task_id)
session_info = _get_session_info(nav_session_key)
is_first_nav = session_info.get("_first_nav", True)
# Auto-start recording if configured and this is first navigation
if is_first_nav:
session_info["_first_nav"] = False
_maybe_start_recording(effective_task_id)
_maybe_start_recording(nav_session_key)
result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60))
result = _run_browser_command(nav_session_key, "open", [url], timeout=max(_get_command_timeout(), 60))
# Remember which session served this nav so snapshot/click/fill/...
# on the same task_id hit it (critical when hybrid routing has both a
# cloud session and a local sidecar alive concurrently).
_last_active_session_key[effective_task_id] = nav_session_key
if result.get("success"):
data = result.get("data", {})
@ -1565,10 +1758,17 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# Post-redirect SSRF check — if the browser followed a redirect to a
# private/internal address, block the result so the model can't read
# internal content via subsequent browser_snapshot calls.
# Skipped for local backends (same rationale as the pre-nav check).
if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url):
# Skipped for local backends (same rationale as the pre-nav check),
# and for the hybrid local sidecar (we're already on a local browser
# hitting a private URL by design).
if (
not _is_local_backend()
and not auto_local_this_nav
and not _allow_private_urls()
and final_url and final_url != url and not _is_safe_url(final_url)
):
# Navigate away to a blank page to prevent snapshot leaks
_run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10)
_run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
return json.dumps({
"success": False,
"error": "Blocked: redirect landed on a private/internal address",
@ -1612,7 +1812,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
# Auto-take a compact snapshot so the model can act immediately
# without a separate browser_snapshot call.
try:
snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"])
snap_result = _run_browser_command(nav_session_key, "snapshot", ["-c"])
if snap_result.get("success"):
snap_data = snap_result.get("data", {})
snapshot_text = snap_data.get("snapshot", "")
@ -1652,7 +1852,7 @@ def browser_snapshot(
from tools.browser_camofox import camofox_snapshot
return camofox_snapshot(full, task_id, user_task)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
# Build command args based on full flag
args = []
@ -1714,7 +1914,7 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_click
return camofox_click(ref, task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
# Ensure ref starts with @
if not ref.startswith("@"):
@ -1750,7 +1950,7 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_type
return camofox_type(ref, text, task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
# Ensure ref starts with @
if not ref.startswith("@"):
@ -1804,7 +2004,7 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
result = camofox_scroll(direction, task_id)
return result
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)])
if not result.get("success"):
@ -1833,7 +2033,7 @@ def browser_back(task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_back
return camofox_back(task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
result = _run_browser_command(effective_task_id, "back", [])
if result.get("success"):
@ -1864,7 +2064,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_press
return camofox_press(key, task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
result = _run_browser_command(effective_task_id, "press", [key])
if result.get("success"):
@ -1906,7 +2106,7 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_
from tools.browser_camofox import camofox_console
return camofox_console(clear, task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
console_args = ["--clear"] if clear else []
error_args = ["--clear"] if clear else []
@ -1945,7 +2145,7 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str:
if _is_camofox_mode():
return _camofox_eval(expression, task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
result = _run_browser_command(effective_task_id, "eval", [expression])
if not result.get("success"):
@ -2077,7 +2277,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
from tools.browser_camofox import camofox_get_images
return camofox_get_images(task_id)
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
# Use eval to run JavaScript that extracts images
js_code = """JSON.stringify(
@ -2147,7 +2347,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
import base64
import uuid as uuid_mod
effective_task_id = task_id or "default"
effective_task_id = _last_session_key(task_id or "default")
# Save screenshot to persistent location so it can be shared with users
from hermes_constants import get_hermes_dir
@ -2350,17 +2550,47 @@ def _cleanup_old_recordings(max_age_hours=72):
def cleanup_browser(task_id: Optional[str] = None) -> None:
"""
Clean up browser session for a task.
Clean up browser session(s) for a task.
Called automatically when a task completes or when inactivity timeout is reached.
Closes both the agent-browser/Browserbase session and Camofox sessions.
When ``task_id`` is a bare task identifier (no ``::local`` suffix), reaps
BOTH the cloud/primary session AND any hybrid-routing local sidecar that
may have been spawned for LAN/localhost URLs in the same task. When
``task_id`` already carries a ``::local`` suffix (called from the inactivity
cleanup loop against a specific session key), reaps only that one.
Args:
task_id: Task identifier to clean up
task_id: Task identifier (or explicit session key)
"""
if task_id is None:
task_id = "default"
# Expand to the full set of session keys to reap. For a bare task_id
# that includes the cloud/primary key + the local sidecar if one exists.
if _is_local_sidecar_key(task_id):
session_keys = [task_id]
bare_task_id = task_id[: -len(_LOCAL_SUFFIX)]
else:
session_keys = [task_id]
sidecar_key = f"{task_id}{_LOCAL_SUFFIX}"
with _cleanup_lock:
if sidecar_key in _active_sessions:
session_keys.append(sidecar_key)
bare_task_id = task_id
for session_key in session_keys:
_cleanup_single_browser_session(session_key)
# Drop the last-active pointer only when the bare task is being cleaned
# (i.e. not when we're only reaping a sidecar mid-task).
if not _is_local_sidecar_key(task_id):
_last_active_session_key.pop(bare_task_id, None)
def _cleanup_single_browser_session(task_id: str) -> None:
"""Internal: reap a single browser session by its exact session key."""
# Stop the CDP supervisor for this task FIRST so we close our WebSocket
# before the backend tears down the underlying CDP endpoint.
_stop_cdp_supervisor(task_id)
@ -2379,32 +2609,33 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
logger.debug("cleanup_browser called for task_id: %s", task_id)
logger.debug("Active sessions: %s", list(_active_sessions.keys()))
# Check if session exists (under lock), but don't remove yet -
# _run_browser_command needs it to build the close command.
with _cleanup_lock:
session_info = _active_sessions.get(task_id)
if session_info:
bb_session_id = session_info.get("bb_session_id", "unknown")
logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
# Stop auto-recording before closing (saves the file)
_maybe_stop_recording(task_id)
# Try to close via agent-browser first (needs session in _active_sessions)
try:
_run_browser_command(task_id, "close", [], timeout=10)
logger.debug("agent-browser close command completed for task %s", task_id)
except Exception as e:
logger.warning("agent-browser close failed for task %s: %s", task_id, e)
# Now remove from tracking under lock
with _cleanup_lock:
_active_sessions.pop(task_id, None)
_session_last_activity.pop(task_id, None)
# Cloud mode: close the cloud browser session via provider API
# Cloud mode: close the cloud browser session via provider API.
# Local sidecars have bb_session_id=None so this no-ops for them.
if bb_session_id:
provider = _get_cloud_provider()
if provider is not None:

View file

@ -519,12 +519,6 @@ def _maybe_preregister_client(
logger.debug("Pre-registered client_id=%s for '%s'", client_id, storage._server_name)
def _parse_base_url(server_url: str) -> str:
"""Strip path component from server URL, returning the base origin."""
parsed = urlparse(server_url)
return f"{parsed.scheme}://{parsed.netloc}"
def build_oauth_auth(
server_name: str,
server_url: str,
@ -570,7 +564,7 @@ def build_oauth_auth(
_maybe_preregister_client(storage, cfg, client_metadata)
return OAuthClientProvider(
server_url=_parse_base_url(server_url),
server_url=server_url,
client_metadata=client_metadata,
storage=storage,
redirect_handler=_redirect_handler,

View file

@ -362,7 +362,6 @@ class MCPOAuthManager:
_configure_callback_port,
_is_interactive,
_maybe_preregister_client,
_parse_base_url,
_redirect_handler,
_wait_for_callback,
)
@ -387,7 +386,7 @@ class MCPOAuthManager:
return _HERMES_PROVIDER_CLS(
server_name=server_name,
server_url=_parse_base_url(entry.server_url),
server_url=entry.server_url,
client_metadata=client_metadata,
storage=storage,
redirect_handler=_redirect_handler,

View file

@ -776,7 +776,7 @@ class ProcessRegistry:
# Only enqueue completion notification on the FIRST move. Without
# this guard, kill_process() and the reader thread can both call
# _move_to_finished(), producing duplicate [SYSTEM: ...] messages.
# _move_to_finished(), producing duplicate [IMPORTANT: ...] messages.
if was_running and session.notify_on_complete:
from tools.ansi_strip import strip_ansi
output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""

View file

@ -2321,6 +2321,26 @@ def _(rid, params: dict) -> dict:
payload["rendered"] = rendered
_emit("message.complete", sid, payload)
if (
status == "complete"
and isinstance(raw, str)
and raw.strip()
and isinstance(text, str)
and text.strip()
):
try:
from agent.title_generator import maybe_auto_title
maybe_auto_title(
_get_db(),
session.get("session_key") or sid,
text,
raw,
session.get("history", []),
)
except Exception:
pass
# CLI parity: when voice-mode TTS is on, speak the agent reply
# (cli.py:_voice_speak_response). Only the final text — tool
# calls / reasoning already stream separately and would be
@ -2550,48 +2570,6 @@ def _(rid, params: dict) -> dict:
return _ok(rid, {"task_id": task_id})
@method("prompt.btw")
def _(rid, params: dict) -> dict:
session, err = _sess(params, rid)
if err:
return err
text, sid = params.get("text", ""), params.get("session_id", "")
if not text:
return _err(rid, 4012, "text required")
snapshot = list(session.get("history", []))
def run():
session_tokens = _set_session_context(session["session_key"])
try:
from run_agent import AIAgent
result = AIAgent(
model=_resolve_model(),
quiet_mode=True,
platform="tui",
max_iterations=8,
enabled_toolsets=[],
).run_conversation(text, conversation_history=snapshot)
_emit(
"btw.complete",
sid,
{
"text": (
result.get("final_response", str(result))
if isinstance(result, dict)
else str(result)
)
},
)
except Exception as e:
_emit("btw.complete", sid, {"text": f"error: {e}"})
finally:
_clear_session_context(session_tokens)
threading.Thread(target=run, daemon=True).start()
return _ok(rid, {"status": "running"})
# ── Methods: respond ─────────────────────────────────────────────────

View file

@ -252,7 +252,6 @@ Primary event types the client handles today:
| `sudo.request` | `{ request_id }` |
| `secret.request` | `{ prompt, env_var, request_id }` |
| `background.complete` | `{ task_id, text }` |
| `btw.complete` | `{ text }` |
| `error` | `{ message }` |
| `gateway.stderr` | synthesized from child stderr |
| `gateway.protocol_error` | synthesized from malformed stdout |

View file

@ -9,9 +9,9 @@ import { type FocusMove, type SelectionState, shiftAnchor } from '../selection.j
* Returns no-op functions when fullscreen mode is disabled.
*/
export function useSelection(): {
copySelection: () => string
copySelection: () => Promise<string>
/** Copy without clearing the highlight (for copy-on-select). */
copySelectionNoClear: () => string
copySelectionNoClear: () => Promise<string>
clearSelection: () => void
hasSelection: () => boolean
/** Read the raw mutable selection state (for drag-to-scroll). */
@ -48,8 +48,8 @@ export function useSelection(): {
return useMemo(() => {
if (!ink) {
return {
copySelection: () => '',
copySelectionNoClear: () => '',
copySelection: async () => '',
copySelectionNoClear: async () => '',
clearSelection: () => {},
hasSelection: () => false,
getState: () => null,

View file

@ -1302,11 +1302,13 @@ export default class Ink {
}
/**
* Copy the current selection to the clipboard without clearing the
* highlight. Matches iTerm2's copy-on-select behavior where the selected
* region stays visible after the automatic copy.
* Copy the current text selection to the system clipboard without clearing the
* selection. Returns the copied text when a clipboard path succeeded (native
* tool fired, tmux buffer loaded, or OSC 52 emitted), or '' when no path was
* taken (e.g. headless Linux without tmux). Matches iTerm2's copy-on-select
* behavior where the selected region stays visible after the automatic copy.
*/
copySelectionNoClear(): string {
async copySelectionNoClear(): Promise<string> {
if (!hasSelection(this.selection)) {
return ''
}
@ -1314,28 +1316,41 @@ export default class Ink {
const text = getSelectedText(this.selection, this.frontFrame.screen)
if (text) {
// Raw OSC 52, or DCS-passthrough-wrapped OSC 52 inside tmux (tmux
// drops it silently unless allow-passthrough is on — no regression).
void setClipboard(text).then(raw => {
if (raw) {
this.options.stdout.write(raw)
try {
const { sequence, success } = await setClipboard(text)
if (sequence) {
this.options.stdout.write(sequence)
}
})
if (success) {
return text
}
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
console.error('[clipboard] no path reached the clipboard (headless + no tmux?) — set HERMES_TUI_FORCE_OSC52=1 to force the escape sequence')
}
} catch (err) {
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
console.error('[clipboard] error:', err)
}
}
}
return text
return ''
}
/**
* Copy the current text selection to the system clipboard via OSC 52
* and clear the selection. Returns the copied text (empty if no selection).
* and clear the selection. Returns the copied text (empty if no selection
* or clipboard operation failed).
*/
copySelection(): string {
async copySelection(): Promise<string> {
if (!hasSelection(this.selection)) {
return ''
}
const text = this.copySelectionNoClear()
const text = await this.copySelectionNoClear()
clearSelection(this.selection)
this.notifySelectionChange()

View file

@ -26,4 +26,26 @@ describe('shouldEmitClipboardSequence', () => {
shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
).toBe(false)
})
it('HERMES_TUI_FORCE_OSC52 takes precedence over TMUX suppression', () => {
// Without the override, local-in-tmux suppresses the OSC 52 sequence
// so the terminal multiplexer path wins. FORCE_OSC52=1 flips that
// back on for users whose tmux config supports passthrough.
expect(shouldEmitClipboardSequence({ TMUX: '/tmp/t,1,0' } as NodeJS.ProcessEnv)).toBe(false)
expect(
shouldEmitClipboardSequence({
HERMES_TUI_FORCE_OSC52: '1',
TMUX: '/tmp/t,1,0'
} as NodeJS.ProcessEnv)
).toBe(true)
})
it('HERMES_TUI_FORCE_OSC52=0 suppresses OSC 52 even for remote or plain terminals', () => {
expect(
shouldEmitClipboardSequence({
HERMES_TUI_FORCE_OSC52: '0',
SSH_CONNECTION: '1'
} as NodeJS.ProcessEnv)
).toBe(false)
})
})

View file

@ -84,7 +84,11 @@ export function getClipboardPath(): ClipboardPath {
}
export function shouldEmitClipboardSequence(env: NodeJS.ProcessEnv = process.env): boolean {
const override = (env.HERMES_TUI_CLIPBOARD_OSC52 ?? env.HERMES_TUI_COPY_OSC52 ?? '').trim()
const override = (
env.HERMES_TUI_FORCE_OSC52 ??
env.HERMES_TUI_CLIPBOARD_OSC52 ??
env.HERMES_TUI_COPY_OSC52 ?? ''
).trim()
if (ENV_ON_RE.test(override)) {
return true
@ -162,10 +166,23 @@ export async function tmuxLoadBuffer(text: string): Promise<boolean> {
* utilities (pbcopy/wl-copy/xclip/xsel/clip.exe) always work locally. Over
* SSH these would write to the remote clipboard OSC 52 is the right path there.
*
* Returns the sequence for the caller to write to stdout (raw OSC 52
* outside tmux, DCS-wrapped inside).
* Returns { sequence, success }:
* - `sequence` is the bytes to write to stdout (raw OSC 52 outside tmux,
* DCS-wrapped inside; empty string when we shouldn't emit).
* - `success` is true when we believe SOME path reached the clipboard:
* native tool fired (local), tmux buffer loaded, or an OSC 52 sequence
* was emitted to the terminal. False only when no path was taken at
* all (headless Linux with no tmux + osc52 suppressed, effectively).
* This is best-effort pbcopy/xclip are fire-and-forget, and OSC 52
* depends on the outer terminal honoring the sequence but it lets
* callers distinguish "nothing attempted" from "attempted".
*/
export async function setClipboard(text: string): Promise<string> {
export type ClipboardResult = {
sequence: string
success: boolean
}
export async function setClipboard(text: string): Promise<ClipboardResult> {
const b64 = Buffer.from(text, 'utf8').toString('base64')
const raw = osc(OSC.CLIPBOARD, 'c', b64)
const emitSequence = shouldEmitClipboardSequence(process.env)
@ -177,20 +194,28 @@ export async function setClipboard(text: string): Promise<string> {
// (https://anthropic.slack.com/archives/C07VBSHV7EV/p1773943921788829).
// Gated on SSH_CONNECTION (not SSH_TTY) since tmux panes inherit SSH_TTY
// forever but SSH_CONNECTION is in tmux's default update-environment and
// clears on local attach. Fire-and-forget.
if (!process.env['SSH_CONNECTION']) {
copyNative(text)
}
// clears on local attach. Fire-and-forget, but `copyNativeAttempted`
// tells us whether ANY native path will be tried on this platform.
const nativeAttempted =
!process.env['SSH_CONNECTION'] && copyNative(text)
const tmuxBufferLoaded = await tmuxLoadBuffer(text)
// Inner OSC uses BEL directly (not osc()) — ST's ESC would need doubling
// too, and BEL works everywhere for OSC 52.
if (tmuxBufferLoaded) {
return emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : ''
}
const sequence = tmuxBufferLoaded
? (emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '')
: (emitSequence ? raw : '')
return emitSequence ? raw : ''
// Success if any path was taken. Native and tmux are fire-and-forget,
// so we can't truly confirm the clipboard was written — but if native
// was attempted OR tmux buffer loaded OR we emitted OSC 52, the user's
// paste is likely to work. The only false case is "we did literally
// nothing" (e.g. local-in-tmux with osc52 suppressed and tmux buffer
// load failed), in which case reporting failure to the user is honest.
const success = nativeAttempted || tmuxBufferLoaded || sequence.length > 0
return { sequence, success }
}
// Linux clipboard tool: undefined = not yet probed, null = none available.
@ -198,65 +223,95 @@ export async function setClipboard(text: string): Promise<string> {
// Cached after first attempt so repeated mouse-ups skip the probe chain.
let linuxCopy: 'wl-copy' | 'xclip' | 'xsel' | null | undefined
/** Internal: probe once and cache — wl-copy first, then xclip, then xsel. */
async function probeLinuxCopy(): Promise<'wl-copy' | 'xclip' | 'xsel' | null> {
const opts = { useCwd: false, timeout: 500 }
const r = await execFileNoThrow('wl-copy', [], opts)
if (r.code === 0) {
return 'wl-copy'
}
const r2 = await execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
if (r2.code === 0) {
return 'xclip'
}
const r3 = await execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
return r3.code === 0 ? 'xsel' : null
}
/**
* Shell out to a native clipboard utility as a safety net for OSC 52.
* Only called when not in an SSH session (over SSH, these would write to
* the remote machine's clipboard OSC 52 is the right path there).
* Fire-and-forget: failures are silent since OSC 52 may have succeeded.
*
* Returns true when a native copy path was (or will be) attempted i.e.
* we'll spawn pbcopy on macOS, clip on Windows, or a known-working Linux
* tool. Returns false only when we know no native tool is viable (Linux
* without DISPLAY/WAYLAND_DISPLAY, or previously-probed-to-null). The
* return value is used to decide whether to tell the user the copy
* succeeded spawning is best-effort but good enough to claim success.
*
* Linux behaviour: if DISPLAY and WAYLAND_DISPLAY are both unset, native
* clipboard tools cannot work (they need a display server). In that case
* we skip probing entirely and treat linuxCopy as permanently null.
*/
function copyNative(text: string): void {
function copyNative(text: string): boolean {
const opts = { input: text, useCwd: false, timeout: 2000 }
switch (process.platform) {
case 'darwin':
void execFileNoThrow('pbcopy', [], opts)
return
return true
case 'linux': {
if (linuxCopy === null) {
return
}
if (linuxCopy === 'wl-copy') {
void execFileNoThrow('wl-copy', [], opts)
return
}
if (linuxCopy === 'xclip') {
void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
return
}
if (linuxCopy === 'xsel') {
void execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
return
}
// First call: probe wl-copy (Wayland) then xclip/xsel (X11), cache winner.
void execFileNoThrow('wl-copy', [], opts).then(r => {
if (r.code === 0) {
linuxCopy = 'wl-copy'
return
// If we already probed (success or hard-fail), short-circuit.
if (linuxCopy !== undefined) {
if (linuxCopy === null) {
// No working native tool — skip silently.
return false
}
void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts).then(r2 => {
if (r2.code === 0) {
linuxCopy = 'xclip'
// linuxCopy is a known-working tool; fire-and-forget.
void execFileNoThrow(linuxCopy, linuxCopy === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)
return
}
return true
}
void execFileNoThrow('xsel', ['--clipboard', '--input'], opts).then(r3 => {
linuxCopy = r3.code === 0 ? 'xsel' : null
})
})
})
// No display server → native tools will fail immediately. Cache null.
if (!process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
console.error('[clipboard] [native] Linux: no DISPLAY or WAYLAND_DISPLAY — native clipboard unavailable')
}
return
linuxCopy = null
return false
}
// First call: probe in the background and cache the result for future copies.
// We don't await — this is fire-and-forget. Treat as an attempt:
// the probe will discover a tool and spawn it. If probing finds
// nothing, the NEXT copy will short-circuit above.
void (async () => {
const winner = await probeLinuxCopy()
linuxCopy = winner
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
console.error(`[clipboard] [native] Linux: clipboard probe complete → ${winner ?? 'no tool available'}`)
}
// Actually perform the copy with the discovered tool.
if (winner) {
void execFileNoThrow(winner, winner === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)
}
})()
return true
}
case 'win32':
@ -264,8 +319,10 @@ function copyNative(text: string): void {
// imperfect (system locale encoding) but good enough for a fallback.
void execFileNoThrow('clip', [], opts)
return
return true
}
return false
}
/** @internal test-only */

View file

@ -392,7 +392,7 @@ const buildComposer = () => ({
hasSelection: false,
paste: vi.fn(),
queueRef: { current: [] as string[] },
selection: { copySelection: vi.fn(() => '') },
selection: { copySelection: vi.fn(async () => '') },
setInput: vi.fn()
})

View file

@ -429,12 +429,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
case 'btw.complete':
dropBgTask('btw:x')
sys(`[btw] ${ev.payload.text}`)
return
case 'subagent.spawn_requested':
// Child built but not yet running (waiting on ThreadPoolExecutor slot).
// Preserve completed state if a later event races in before this one.

View file

@ -33,7 +33,7 @@ export type StatusBarMode = 'bottom' | 'off' | 'top'
export interface SelectionApi {
captureScrolledRows: (firstRow: number, lastRow: number, side: 'above' | 'below') => void
clearSelection: () => void
copySelection: () => string
copySelection: () => Promise<string>
getState: () => unknown
shiftAnchor: (dRow: number, minRow: number, maxRow: number) => void
shiftSelection: (dRow: number, minRow: number, maxRow: number) => void

View file

@ -251,11 +251,17 @@ export const coreCommands: SlashCommand[] = [
{
help: 'copy selection or assistant message',
name: 'copy',
run: (arg, ctx) => {
run: async (arg, ctx) => {
const { sys } = ctx.transcript
if (!arg && ctx.composer.hasSelection && ctx.composer.selection.copySelection()) {
return sys('copied selection')
if (!arg && ctx.composer.hasSelection) {
const text = await ctx.composer.selection.copySelection()
if (text) {
return sys(`copied ${text.length} characters`)
} else {
return sys('clipboard copy failed — try HERMES_TUI_FORCE_OSC52=1 to force the escape sequence; HERMES_TUI_DEBUG_CLIPBOARD=1 for details')
}
}
if (arg && Number.isNaN(parseInt(arg, 10))) {

View file

@ -1,7 +1,6 @@
import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
import type {
BackgroundStartResponse,
BtwStartResponse,
ConfigGetValueResponse,
ConfigSetResponse,
ImageAttachResponse,
@ -26,7 +25,7 @@ const persistedModelArg = (arg: string) => {
export const sessionCommands: SlashCommand[] = [
{
aliases: ['bg'],
aliases: ['bg', 'btw'],
help: 'launch a background prompt',
name: 'background',
run: (arg, ctx) => {
@ -47,23 +46,6 @@ export const sessionCommands: SlashCommand[] = [
}
},
{
help: 'by-the-way follow-up',
name: 'btw',
run: (arg, ctx) => {
if (!arg) {
return ctx.transcript.sys('/btw <question>')
}
ctx.gateway.rpc<BtwStartResponse>('prompt.btw', { session_id: ctx.sid, text: arg }).then(
ctx.guarded(() => {
patchUiState(state => ({ ...state, bgTasks: new Set(state.bgTasks).add('btw:x') }))
ctx.transcript.sys('btw running…')
})
)
}
},
{
help: 'change or show model',
aliases: ['provider'],

View file

@ -178,10 +178,6 @@ export interface BackgroundStartResponse {
task_id?: string
}
export interface BtwStartResponse {
ok?: boolean
}
export interface ClarifyRespondResponse {
ok?: boolean
}
@ -403,7 +399,6 @@ export type GatewayEvent =
| { payload: { request_id: string }; session_id?: string; type: 'sudo.request' }
| { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
| { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
| { payload: { text: string }; session_id?: string; type: 'btw.complete' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }

View file

@ -84,8 +84,8 @@ declare module '@hermes/ink' {
export function withInkSuspended(run: RunExternalProcess): Promise<void>
export function useInput(handler: InputHandler, options?: { readonly isActive?: boolean }): void
export function useSelection(): {
readonly copySelection: () => string
readonly copySelectionNoClear: () => string
readonly copySelection: () => Promise<string>
readonly copySelectionNoClear: () => Promise<string>
readonly clearSelection: () => void
readonly hasSelection: () => boolean
readonly getState: () => unknown

View file

@ -32,7 +32,6 @@ export type GatewayEventName =
| "sudo.request"
| "secret.request"
| "background.complete"
| "btw.complete"
| "error"
| "skin.changed"
| (string & {});

View file

@ -269,17 +269,17 @@ export default function ChatPage() {
const payload = data.slice(semi + 1);
if (payload === "?" || payload === "") return false; // read/clear — ignore
try {
// atob returns a binary string (one byte per char); we need UTF-8
// decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip
// correctly. Without this step, the three UTF-8 bytes of `≥`
// would land in the clipboard as the three separate Latin-1
// characters `≥`.
const binary = atob(payload);
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
const text = new TextDecoder("utf-8").decode(bytes);
navigator.clipboard.writeText(text).catch(() => {});
} catch {
// Malformed base64 — silently drop.
navigator.clipboard.writeText(text).catch((err) => {
// Most common reason: the Clipboard API requires a user gesture.
// This can fail when the OSC 52 response arrives outside the
// original keydown event's activation. Log to aid debugging.
console.warn("[dashboard clipboard] OSC 52 write failed:", err.message);
});
} catch (e) {
console.warn("[dashboard clipboard] malformed OSC 52 payload");
}
return true;
});
@ -290,16 +290,31 @@ export default function ChatPage() {
term.attachCustomKeyEventHandler((ev) => {
if (ev.type !== "keydown") return true;
// Copy: Cmd+C on macOS, Ctrl+Shift+C on other platforms. Bare Ctrl+C
// is reserved for SIGINT to the TUI child — matches xterm / gnome-terminal /
// konsole / Windows Terminal. Ctrl+Shift+C only copies if a selection exists;
// without a selection it passes through to the TUI so agents can still
// react to the keypress.
// Paste: Cmd+Shift+V on macOS, Ctrl+Shift+V on others.
const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
if (copyModifier && ev.key.toLowerCase() === "c") {
const sel = term.getSelection();
if (sel) {
navigator.clipboard.writeText(sel).catch(() => {});
// Direct writeText inside the keydown handler preserves the user
// gesture — async round-trips through OSC 52 can lose activation
// and fail with "Document is not focused".
navigator.clipboard.writeText(sel).catch((err) => {
console.warn("[dashboard clipboard] direct copy failed:", err.message);
});
// Clear xterm.js's highlight after copy (matches gnome-terminal).
term.clearSelection();
ev.preventDefault();
return false;
}
// No selection → fall through so the TUI receives Ctrl+Shift+C
// (or the bare ev if the user used a different modifier).
}
if (pasteModifier && ev.key.toLowerCase() === "v") {
@ -308,7 +323,9 @@ export default function ChatPage() {
.then((text) => {
if (text) term.paste(text);
})
.catch(() => {});
.catch((err) => {
console.warn("[dashboard clipboard] paste failed:", err.message);
});
ev.preventDefault();
return false;
}

View file

@ -41,6 +41,7 @@ hermes [global-options] <command> [subcommand/options]
| `hermes gateway` | Run or manage the messaging gateway service. |
| `hermes setup` | Interactive setup wizard for all or part of the configuration. |
| `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
| `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). |
| `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. |
| `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
| `hermes status` | Show agent, auth, and platform status. |
@ -221,6 +222,33 @@ hermes whatsapp
Runs the WhatsApp pairing/setup flow, including mode selection and QR-code pairing.
## `hermes slack`
```bash
hermes slack manifest # print manifest to stdout
hermes slack manifest --write # write to ~/.hermes/slack-manifest.json
hermes slack manifest --slashes-only # just the features.slash_commands array
```
Generates a Slack app manifest that registers every gateway command in
`COMMAND_REGISTRY` (`/btw`, `/stop`, `/model`, …) as a first-class
Slack slash command — matching Discord and Telegram parity. Paste the
output into your Slack app config at
[https://api.slack.com/apps](https://api.slack.com/apps) → your app →
**Features → App Manifest → Edit**, then **Save**. Slack prompts for
reinstall if scopes or slash commands changed.
| Flag | Default | Purpose |
|------|---------|---------|
| `--write [PATH]` | stdout | Write to a file instead of stdout. Bare `--write` writes `$HERMES_HOME/slack-manifest.json`. |
| `--name NAME` | `Hermes` | Bot display name in Slack. |
| `--description DESC` | default blurb | Bot description shown in the Slack app directory. |
| `--slashes-only` | off | Emit only `features.slash_commands` for merging into a manually-maintained manifest. |
Run `hermes slack manifest --write` again after `hermes update` to pick
up any new commands.
## `hermes login` / `hermes logout` *(Deprecated)*
:::caution

View file

@ -0,0 +1,103 @@
---
sidebar_position: 11
title: Model Catalog
description: Remotely-hosted manifest driving curated model picker lists for OpenRouter and Nous Portal.
---
# Model Catalog
Hermes fetches curated model lists for **OpenRouter** and **Nous Portal** from a JSON manifest hosted alongside the docs site. This lets maintainers update picker lists without shipping a new `hermes-agent` release.
When the manifest is unreachable (offline, network blocked, hosting failure), Hermes silently falls back to the in-repo snapshot that ships with the CLI. The manifest never breaks the picker — worst case you see whatever list was bundled with your installed version.
## Live manifest URL
```
https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
```
Published on every merge to `main` via the existing `deploy-site.yml` GitHub Pages pipeline. The source of truth lives in the repo at `website/static/api/model-catalog.json`.
## Schema
```json
{
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {},
"providers": {
"openrouter": {
"metadata": {},
"models": [
{"id": "moonshotai/kimi-k2.6", "description": "recommended", "metadata": {}},
{"id": "openai/gpt-5.4", "description": ""}
]
},
"nous": {
"metadata": {},
"models": [
{"id": "anthropic/claude-opus-4.7"},
{"id": "moonshotai/kimi-k2.6"}
]
}
}
}
```
Field notes:
- **`version`** — integer schema version. Future schemas bump this; Hermes refuses manifests with versions it doesn't understand and falls back to the hardcoded snapshot.
- **`metadata`** — free-form dict at the manifest, provider, and model level. Any keys. Hermes ignores unknown fields, so you can annotate entries (`"tier": "paid"`, `"tags": [...]`, etc.) without coordinating a schema change.
- **`description`** — OpenRouter-only. Drives picker badge text (`"recommended"`, `"free"`, or empty). Nous Portal doesn't use this — free-tier gating is determined live from the Portal's pricing endpoint.
- **Pricing and context length** are NOT in the manifest. Those come from live provider APIs (`/v1/models` endpoints, models.dev) at fetch time.
## Fetch behavior
| When | What happens |
|---|---|
| `/model` or `hermes model` | Fetches if disk cache is stale, else uses cache |
| Disk cache fresh (< TTL) | No network hit |
| Network failure with cache | Silent fallback to cache, one log line |
| Network failure, no cache | Silent fallback to in-repo snapshot |
| Manifest fails schema validation | Treated as unreachable |
Cache location: `~/.hermes/cache/model_catalog.json`.
## Config
```yaml
model_catalog:
enabled: true
url: https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
ttl_hours: 24
providers: {}
```
Set `enabled: false` to disable remote fetch entirely and always use the in-repo snapshot.
### Per-provider override URLs
Third parties can self-host their own curation list using the same schema. Point a provider at a custom URL:
```yaml
model_catalog:
providers:
openrouter:
url: https://example.com/my-openrouter-curation.json
```
The overriding manifest only needs to populate the provider block(s) it cares about. Other providers continue to resolve against the master URL.
## Updating the manifest
Maintainers:
```bash
# Re-generate from the in-repo hardcoded lists (keeps manifest in sync after
# editing OPENROUTER_MODELS or _PROVIDER_MODELS["nous"] in hermes_cli/models.py).
python scripts/build_model_catalog.py
```
Then PR the resulting change to `website/static/api/model-catalog.json` to `main`. The docs site auto-deploys on merge and the new manifest is live within a few minutes.
You can also hand-edit the JSON directly for fine-grained metadata changes that don't belong in the in-repo snapshot — the generator script is a convenience, not the single source of truth.

View file

@ -36,8 +36,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
| `/resume [name]` | Resume a previously-named session |
| `/status` | Show session info |
| `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. |
| `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
| `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
| `/background <prompt>` (alias: `/bg`, `/btw`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
| `/branch [name]` (alias: `/fork`) | Branch the current session (explore a different path) |
### Configuration

View file

@ -242,6 +242,10 @@ You can also change it inside the CLI:
/busy status
```
:::tip First-touch hint
The very first time you press Enter while Hermes is working, Hermes prints a one-line reminder explaining the `/busy` knob (`"(tip) Your message interrupted the current run…"`). It only fires once per install — a flag in `config.yaml` under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
:::
### Suspending to Background
On Unix systems, press **`Ctrl+Z`** to suspend Hermes to the background — just like any terminal process. The shell prints a confirmation:

View file

@ -146,7 +146,9 @@ terminal:
**Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle).
**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed.
**Container lifecycle:** Hermes reuses a single long-lived container (`docker run -d ... sleep 2h`) for every terminal and file-tool call made by the top-level agent, across sessions, `/new`, and `/reset`, for the lifetime of the Hermes process. Commands run via `docker exec` with a login shell, so working-directory changes, installed packages, and files in `/workspace` all persist from one tool call to the next. The container is stopped and removed on Hermes shutdown (or when the idle-sweep reclaims it).
Subagents (`delegate_task`) and RL rollouts get their own isolated containers keyed by `task_id` — only the top-level agent shares the `default` container.
**Security hardening:**
- `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back

View file

@ -86,6 +86,40 @@ FIRECRAWL_API_URL=http://localhost:3002
FIRECRAWL_BROWSER_TTL=600
```
### Hybrid routing: cloud for public URLs, local for LAN/localhost
When a cloud provider is configured, Hermes auto-spawns a **local Chromium sidecar**
for URLs that resolve to a private/loopback/LAN address (`localhost`, `127.0.0.1`,
`192.168.x.x`, `10.x.x.x`, `172.16-31.x.x`, `*.local`, `*.lan`, `*.internal`,
IPv6 loopback `::1`, link-local `169.254.x.x`). Public URLs continue to use the
cloud provider in the same conversation.
This solves the common "I'm developing locally but using Browserbase" workflow —
the agent can screenshot your dashboard at `http://localhost:3000` AND scrape
`https://github.com` without you switching providers or disabling the SSRF guard.
The cloud provider never sees the private URL.
The feature is **on by default**. To disable it (all URLs go to the configured
cloud provider, as before):
```yaml
# ~/.hermes/config.yaml
browser:
cloud_provider: browserbase
auto_local_for_private_urls: false
```
With auto-routing disabled, private URLs are rejected with
`"Blocked: URL targets a private or internal address"` unless you also set
`browser.allow_private_urls: true` (which lets the cloud provider attempt them —
usually won't work since Browserbase etc. can't reach your LAN).
Requirements: the local sidecar uses the same `agent-browser` CLI as pure local
mode, so you need it installed (`hermes setup tools → Browser Automation`
auto-installs it). Post-navigation redirects from a public URL onto a private
address are still blocked (you can't use a redirect-to-internal trick to reach
your LAN through the public path).
### Camofox local mode
[Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies.

View file

@ -219,6 +219,17 @@ Send any message while the agent is working to interrupt it. Key behaviors:
- **Multiple messages are combined** — messages sent during interruption are joined into one prompt
- **`/stop` command** — interrupts without queuing a follow-up message
### Queue vs interrupt (busy-input mode)
By default, messaging a busy agent interrupts it. To switch the whole install so follow-ups queue behind the current task instead, set:
```yaml
display:
busy_input_mode: queue # default: interrupt
```
The first time you message a busy agent on any platform, Hermes appends a one-line reminder to the busy-ack explaining the knob (`"💡 First-time tip — …"`). The reminder fires once per install — a flag under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
## Tool Progress Notifications
Control how much tool activity is displayed in `~/.hermes/config.yaml`:

View file

@ -29,13 +29,36 @@ the steps below.
## Step 1: Create a Slack App
The fastest path is to paste a manifest Hermes generates for you. It
declares every built-in slash command (`/btw`, `/stop`, `/model`, …),
every required OAuth scope, every event subscription, and enables Socket
Mode — all at once.
### Option A: From a Hermes-generated manifest (recommended)
1. Generate the manifest:
```bash
hermes slack manifest --write
```
This writes `~/.hermes/slack-manifest.json` and prints paste-in
instructions.
2. Go to [https://api.slack.com/apps](https://api.slack.com/apps) →
**Create New App** → **From an app manifest**
3. Pick your workspace, paste the JSON contents, review, click **Next**
→ **Create**
4. Skip ahead to **Step 6: Install App to Workspace**. The manifest
handled scopes, events, and slash commands for you.
### Option B: From scratch (manual)
1. Go to [https://api.slack.com/apps](https://api.slack.com/apps)
2. Click **Create New App**
3. Choose **From scratch**
4. Enter an app name (e.g., "Hermes Agent") and select your workspace
5. Click **Create App**
You'll land on the app's **Basic Information** page.
You'll land on the app's **Basic Information** page. Continue with
Steps 26 below.
---
@ -203,6 +226,57 @@ The bot will **not** automatically join channels. You must invite it to each cha
---
## Slash Commands
Every Hermes command (`/btw`, `/stop`, `/new`, `/model`, `/help`, ...)
is a native Slack slash command — exactly the way they work on Telegram
and Discord. Type `/` in Slack and the autocomplete picker lists every
Hermes command with its description.
Under the hood: Hermes ships with a generated Slack app manifest (see
Step 1, Option A) that declares every command in
[`COMMAND_REGISTRY`](https://github.com/NousResearch/hermes-agent/blob/main/hermes_cli/commands.py)
as a slash command. In Socket Mode, Slack routes the command event
through the WebSocket regardless of the manifest's `url` field.
### Refreshing slash commands after updates
When Hermes adds new commands (e.g. after `hermes update`), regenerate
the manifest and update your Slack app:
```bash
hermes slack manifest --write
```
Then in Slack:
1. Open [https://api.slack.com/apps](https://api.slack.com/apps) →
your Hermes app
2. **Features → App Manifest → Edit**
3. Paste the new contents of `~/.hermes/slack-manifest.json`
4. **Save**. Slack will prompt to reinstall the app if scopes or slash
commands changed.
### Legacy `/hermes <subcommand>` still works
For backward compatibility with older manifests, you can still type
`/hermes btw run the tests` — Hermes routes it the same way as `/btw
run the tests`. Free-form questions also work: `/hermes what's the
weather?` is treated as a regular message.
### Advanced: emit only the slash-commands array
If you maintain your Slack manifest by hand and just want the slash
command list:
```bash
hermes slack manifest --slashes-only > /tmp/slashes.json
```
Paste that array into the `features.slash_commands` key of your
existing manifest.
---
## How the Bot Responds
Understanding how Hermes behaves in different contexts:

View file

@ -298,7 +298,6 @@ Type these during an interactive chat session.
### Utility
```
/branch (/fork) Branch the current session
/btw Ephemeral side question (doesn't interrupt main task)
/fast Toggle priority/fast processing
/browser Open CDP browser connection
/history Show conversation history (CLI)

View file

@ -26,7 +26,6 @@ CATEGORY_LABELS = {
"dogfood": "Dogfood",
"domain": "Domain",
"email": "Email",
"feeds": "Feeds",
"gaming": "Gaming",
"gifs": "GIFs",
"github": "GitHub",

View file

@ -613,6 +613,7 @@ const sidebars: SidebarsConfig = {
'reference/tools-reference',
'reference/toolsets-reference',
'reference/mcp-config-reference',
'reference/model-catalog',
'reference/skills-catalog',
'reference/optional-skills-catalog',
'reference/faq',

View file

@ -0,0 +1,259 @@
{
"version": 1,
"updated_at": "2026-04-26T12:34:42Z",
"metadata": {
"source": "hermes-agent repo",
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
},
"providers": {
"openrouter": {
"metadata": {
"display_name": "OpenRouter",
"note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
},
"models": [
{
"id": "moonshotai/kimi-k2.6",
"description": "recommended"
},
{
"id": "deepseek/deepseek-v4-pro",
"description": ""
},
{
"id": "deepseek/deepseek-v4-flash",
"description": ""
},
{
"id": "anthropic/claude-opus-4.7",
"description": ""
},
{
"id": "anthropic/claude-opus-4.6",
"description": ""
},
{
"id": "anthropic/claude-sonnet-4.6",
"description": ""
},
{
"id": "qwen/qwen3.6-plus",
"description": ""
},
{
"id": "anthropic/claude-sonnet-4.5",
"description": ""
},
{
"id": "anthropic/claude-haiku-4.5",
"description": ""
},
{
"id": "openrouter/elephant-alpha",
"description": "free"
},
{
"id": "openai/gpt-5.5",
"description": ""
},
{
"id": "openai/gpt-5.4-mini",
"description": ""
},
{
"id": "xiaomi/mimo-v2.5-pro",
"description": ""
},
{
"id": "xiaomi/mimo-v2.5",
"description": ""
},
{
"id": "openai/gpt-5.3-codex",
"description": ""
},
{
"id": "google/gemini-3-pro-image-preview",
"description": ""
},
{
"id": "google/gemini-3-flash-preview",
"description": ""
},
{
"id": "google/gemini-3.1-pro-preview",
"description": ""
},
{
"id": "google/gemini-3.1-flash-lite-preview",
"description": ""
},
{
"id": "qwen/qwen3.5-plus-02-15",
"description": ""
},
{
"id": "qwen/qwen3.5-35b-a3b",
"description": ""
},
{
"id": "stepfun/step-3.5-flash",
"description": ""
},
{
"id": "minimax/minimax-m2.7",
"description": ""
},
{
"id": "minimax/minimax-m2.5",
"description": ""
},
{
"id": "minimax/minimax-m2.5:free",
"description": "free"
},
{
"id": "z-ai/glm-5.1",
"description": ""
},
{
"id": "z-ai/glm-5v-turbo",
"description": ""
},
{
"id": "z-ai/glm-5-turbo",
"description": ""
},
{
"id": "x-ai/grok-4.20",
"description": ""
},
{
"id": "nvidia/nemotron-3-super-120b-a12b",
"description": ""
},
{
"id": "nvidia/nemotron-3-super-120b-a12b:free",
"description": "free"
},
{
"id": "arcee-ai/trinity-large-preview:free",
"description": "free"
},
{
"id": "arcee-ai/trinity-large-thinking",
"description": ""
},
{
"id": "openai/gpt-5.5-pro",
"description": ""
},
{
"id": "openai/gpt-5.4-nano",
"description": ""
}
]
},
"nous": {
"metadata": {
"display_name": "Nous Portal",
"note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
},
"models": [
{
"id": "moonshotai/kimi-k2.6"
},
{
"id": "deepseek/deepseek-v4-pro"
},
{
"id": "deepseek/deepseek-v4-flash"
},
{
"id": "xiaomi/mimo-v2.5-pro"
},
{
"id": "xiaomi/mimo-v2.5"
},
{
"id": "anthropic/claude-opus-4.7"
},
{
"id": "anthropic/claude-opus-4.6"
},
{
"id": "anthropic/claude-sonnet-4.6"
},
{
"id": "anthropic/claude-sonnet-4.5"
},
{
"id": "anthropic/claude-haiku-4.5"
},
{
"id": "openai/gpt-5.5"
},
{
"id": "openai/gpt-5.4-mini"
},
{
"id": "openai/gpt-5.3-codex"
},
{
"id": "google/gemini-3-pro-preview"
},
{
"id": "google/gemini-3-flash-preview"
},
{
"id": "google/gemini-3.1-pro-preview"
},
{
"id": "google/gemini-3.1-flash-lite-preview"
},
{
"id": "qwen/qwen3.5-plus-02-15"
},
{
"id": "qwen/qwen3.5-35b-a3b"
},
{
"id": "stepfun/step-3.5-flash"
},
{
"id": "minimax/minimax-m2.7"
},
{
"id": "minimax/minimax-m2.5"
},
{
"id": "minimax/minimax-m2.5:free"
},
{
"id": "z-ai/glm-5.1"
},
{
"id": "z-ai/glm-5v-turbo"
},
{
"id": "z-ai/glm-5-turbo"
},
{
"id": "x-ai/grok-4.20-beta"
},
{
"id": "nvidia/nemotron-3-super-120b-a12b"
},
{
"id": "arcee-ai/trinity-large-thinking"
},
{
"id": "openai/gpt-5.5-pro"
},
{
"id": "openai/gpt-5.4-nano"
}
]
}
}
}