Merge origin/main, resolve conflicts (self._base_url_lower)

This commit is contained in:
Test 2026-03-18 04:09:00 -07:00
commit e7844e9c8d
54 changed files with 2281 additions and 179 deletions

View file

@ -55,8 +55,8 @@ logger = logging.getLogger(__name__)
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"zai": "glm-4.5-flash",
"kimi-coding": "kimi-k2-turbo-preview",
"minimax": "MiniMax-M2.5-highspeed",
"minimax-cn": "MiniMax-M2.5-highspeed",
"minimax": "MiniMax-M2.7-highspeed",
"minimax-cn": "MiniMax-M2.7-highspeed",
"anthropic": "claude-haiku-4-5-20251001",
"ai-gateway": "google/gemini-3-flash",
"opencode-zen": "gemini-3-flash",

View file

@ -45,16 +45,18 @@ class ContextCompressor:
quiet_mode: bool = False,
summary_model_override: str = None,
base_url: str = "",
api_key: str = "",
):
self.model = model
self.base_url = base_url
self.api_key = api_key
self.threshold_percent = threshold_percent
self.protect_first_n = protect_first_n
self.protect_last_n = protect_last_n
self.summary_target_tokens = summary_target_tokens
self.quiet_mode = quiet_mode
self.context_length = get_model_context_length(model, base_url=base_url)
self.context_length = get_model_context_length(model, base_url=base_url, api_key=api_key)
self.threshold_tokens = int(self.context_length * threshold_percent)
self.compression_count = 0
self._context_probed = False # True after a step-down from context error

View file

@ -10,6 +10,7 @@ import re
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
import requests
import yaml
@ -21,6 +22,9 @@ logger = logging.getLogger(__name__)
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
_model_metadata_cache_time: float = 0
_MODEL_CACHE_TTL = 3600
_endpoint_model_metadata_cache: Dict[str, Dict[str, Dict[str, Any]]] = {}
_endpoint_model_metadata_cache_time: Dict[str, float] = {}
_ENDPOINT_MODEL_CACHE_TTL = 300
# Descending tiers for context length probing when the model is unknown.
# We start high and step down on context-length errors until one works.
@ -77,6 +81,8 @@ DEFAULT_CONTEXT_LENGTHS = {
"kimi-k2-thinking-turbo": 262144,
"kimi-k2-turbo-preview": 262144,
"kimi-k2-0905-preview": 131072,
"MiniMax-M2.7": 204800,
"MiniMax-M2.7-highspeed": 204800,
"MiniMax-M2.5": 204800,
"MiniMax-M2.5-highspeed": 204800,
"MiniMax-M2.1": 204800,
@ -121,6 +127,128 @@ DEFAULT_CONTEXT_LENGTHS = {
"qwen-vl-max": 32768,
}
_CONTEXT_LENGTH_KEYS = (
"context_length",
"context_window",
"max_context_length",
"max_position_embeddings",
"max_model_len",
"max_input_tokens",
"max_sequence_length",
"max_seq_len",
)
_MAX_COMPLETION_KEYS = (
"max_completion_tokens",
"max_output_tokens",
"max_tokens",
)
def _normalize_base_url(base_url: str) -> str:
return (base_url or "").strip().rstrip("/")
def _is_openrouter_base_url(base_url: str) -> bool:
return "openrouter.ai" in _normalize_base_url(base_url).lower()
def _is_custom_endpoint(base_url: str) -> bool:
normalized = _normalize_base_url(base_url)
return bool(normalized) and not _is_openrouter_base_url(normalized)
def _is_known_provider_base_url(base_url: str) -> bool:
normalized = _normalize_base_url(base_url)
if not normalized:
return False
parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
host = parsed.netloc.lower() or parsed.path.lower()
known_hosts = (
"api.openai.com",
"chatgpt.com",
"api.anthropic.com",
"api.z.ai",
"api.moonshot.ai",
"api.kimi.com",
"api.minimax",
)
return any(known_host in host for known_host in known_hosts)
def _iter_nested_dicts(value: Any):
if isinstance(value, dict):
yield value
for nested in value.values():
yield from _iter_nested_dicts(nested)
elif isinstance(value, list):
for item in value:
yield from _iter_nested_dicts(item)
def _coerce_reasonable_int(value: Any, minimum: int = 1024, maximum: int = 10_000_000) -> Optional[int]:
try:
if isinstance(value, bool):
return None
if isinstance(value, str):
value = value.strip().replace(",", "")
result = int(value)
except (TypeError, ValueError):
return None
if minimum <= result <= maximum:
return result
return None
def _extract_first_int(payload: Dict[str, Any], keys: tuple[str, ...]) -> Optional[int]:
keyset = {key.lower() for key in keys}
for mapping in _iter_nested_dicts(payload):
for key, value in mapping.items():
if str(key).lower() not in keyset:
continue
coerced = _coerce_reasonable_int(value)
if coerced is not None:
return coerced
return None
def _extract_context_length(payload: Dict[str, Any]) -> Optional[int]:
return _extract_first_int(payload, _CONTEXT_LENGTH_KEYS)
def _extract_max_completion_tokens(payload: Dict[str, Any]) -> Optional[int]:
return _extract_first_int(payload, _MAX_COMPLETION_KEYS)
def _extract_pricing(payload: Dict[str, Any]) -> Dict[str, Any]:
alias_map = {
"prompt": ("prompt", "input", "input_cost_per_token", "prompt_token_cost"),
"completion": ("completion", "output", "output_cost_per_token", "completion_token_cost"),
"request": ("request", "request_cost"),
"cache_read": ("cache_read", "cached_prompt", "input_cache_read", "cache_read_cost_per_token"),
"cache_write": ("cache_write", "cache_creation", "input_cache_write", "cache_write_cost_per_token"),
}
for mapping in _iter_nested_dicts(payload):
normalized = {str(key).lower(): value for key, value in mapping.items()}
if not any(any(alias in normalized for alias in aliases) for aliases in alias_map.values()):
continue
pricing: Dict[str, Any] = {}
for target, aliases in alias_map.items():
for alias in aliases:
if alias in normalized and normalized[alias] not in (None, ""):
pricing[target] = normalized[alias]
break
if pricing:
return pricing
return {}
def _add_model_aliases(cache: Dict[str, Dict[str, Any]], model_id: str, entry: Dict[str, Any]) -> None:
cache[model_id] = entry
if "/" in model_id:
bare_model = model_id.split("/", 1)[1]
cache.setdefault(bare_model, entry)
def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
"""Fetch model metadata from OpenRouter (cached for 1 hour)."""
@ -137,15 +265,16 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
cache = {}
for model in data.get("data", []):
model_id = model.get("id", "")
cache[model_id] = {
entry = {
"context_length": model.get("context_length", 128000),
"max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
"name": model.get("name", model_id),
"pricing": model.get("pricing", {}),
}
_add_model_aliases(cache, model_id, entry)
canonical = model.get("canonical_slug", "")
if canonical and canonical != model_id:
cache[canonical] = cache[model_id]
_add_model_aliases(cache, canonical, entry)
_model_metadata_cache = cache
_model_metadata_cache_time = time.time()
@ -157,6 +286,75 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
return _model_metadata_cache or {}
def fetch_endpoint_model_metadata(
base_url: str,
api_key: str = "",
force_refresh: bool = False,
) -> Dict[str, Dict[str, Any]]:
"""Fetch model metadata from an OpenAI-compatible ``/models`` endpoint.
This is used for explicit custom endpoints where hardcoded global model-name
defaults are unreliable. Results are cached in memory per base URL.
"""
normalized = _normalize_base_url(base_url)
if not normalized or _is_openrouter_base_url(normalized):
return {}
if not force_refresh:
cached = _endpoint_model_metadata_cache.get(normalized)
cached_at = _endpoint_model_metadata_cache_time.get(normalized, 0)
if cached is not None and (time.time() - cached_at) < _ENDPOINT_MODEL_CACHE_TTL:
return cached
candidates = [normalized]
if normalized.endswith("/v1"):
alternate = normalized[:-3].rstrip("/")
else:
alternate = normalized + "/v1"
if alternate and alternate not in candidates:
candidates.append(alternate)
headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
last_error: Optional[Exception] = None
for candidate in candidates:
url = candidate.rstrip("/") + "/models"
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
payload = response.json()
cache: Dict[str, Dict[str, Any]] = {}
for model in payload.get("data", []):
if not isinstance(model, dict):
continue
model_id = model.get("id")
if not model_id:
continue
entry: Dict[str, Any] = {"name": model.get("name", model_id)}
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
max_completion_tokens = _extract_max_completion_tokens(model)
if max_completion_tokens is not None:
entry["max_completion_tokens"] = max_completion_tokens
pricing = _extract_pricing(model)
if pricing:
entry["pricing"] = pricing
_add_model_aliases(cache, model_id, entry)
_endpoint_model_metadata_cache[normalized] = cache
_endpoint_model_metadata_cache_time[normalized] = time.time()
return cache
except Exception as exc:
last_error = exc
if last_error:
logger.debug("Failed to fetch model metadata from %s/models: %s", normalized, last_error)
_endpoint_model_metadata_cache[normalized] = {}
_endpoint_model_metadata_cache_time[normalized] = time.time()
return {}
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
@ -241,14 +439,15 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
return None
def get_model_context_length(model: str, base_url: str = "") -> int:
def get_model_context_length(model: str, base_url: str = "", api_key: str = "") -> int:
"""Get the context length for a model.
Resolution order:
1. Persistent cache (previously discovered via probing)
2. OpenRouter API metadata
3. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match)
4. First probe tier (2M) will be narrowed on first context error
2. Active endpoint metadata (/models for explicit custom endpoints)
3. OpenRouter API metadata
4. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
5. First probe tier (2M) will be narrowed on first context error
"""
# 1. Check persistent cache (model+provider)
if base_url:
@ -256,19 +455,31 @@ def get_model_context_length(model: str, base_url: str = "") -> int:
if cached is not None:
return cached
# 2. OpenRouter API metadata
# 2. Active endpoint metadata for explicit custom routes
if _is_custom_endpoint(base_url):
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
if model in endpoint_metadata:
context_length = endpoint_metadata[model].get("context_length")
if isinstance(context_length, int):
return context_length
if not _is_known_provider_base_url(base_url):
# Explicit third-party endpoints should not borrow fuzzy global
# defaults from unrelated providers with similarly named models.
return CONTEXT_PROBE_TIERS[0]
# 3. OpenRouter API metadata
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", 128000)
# 3. Hardcoded defaults (fuzzy match — longest key first for specificity)
# 4. Hardcoded defaults (fuzzy match — longest key first for specificity)
for default_model, length in sorted(
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
):
if default_model in model or model in default_model:
return length
# 4. Unknown model — start at highest probe tier
# 5. Unknown model — start at highest probe tier
return CONTEXT_PROBE_TIERS[0]

View file

@ -330,28 +330,34 @@ def build_skills_system_prompt(
# Each entry: (skill_name, description)
# Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
# -> category "mlops/training", skill "axolotl"
# Load disabled skill names once for the entire scan
try:
from tools.skills_tool import _get_disabled_skill_names
disabled = _get_disabled_skill_names()
except Exception:
disabled = set()
skills_by_category: dict[str, list[tuple[str, str]]] = {}
for skill_file in skills_dir.rglob("SKILL.md"):
is_compatible, _, desc = _parse_skill_file(skill_file)
is_compatible, frontmatter, desc = _parse_skill_file(skill_file)
if not is_compatible:
continue
# Skip skills whose conditional activation rules exclude them
conditions = _read_skill_conditions(skill_file)
if not _skill_should_show(conditions, available_tools, available_toolsets):
continue
rel_path = skill_file.relative_to(skills_dir)
parts = rel_path.parts
if len(parts) >= 2:
# Category is everything between skills_dir and the skill folder
# e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
# → category = "mlops/training", skill_name = "axolotl"
# e.g. parts = ("github", "github-auth", "SKILL.md")
# → category = "github", skill_name = "github-auth"
skill_name = parts[-2]
category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
else:
category = "general"
skill_name = skill_file.parent.name
# Respect user's disabled skills config
fm_name = frontmatter.get("name", skill_name)
if fm_name in disabled or skill_name in disabled:
continue
# Skip skills whose conditional activation rules exclude them
conditions = _read_skill_conditions(skill_file)
if not _skill_should_show(conditions, available_tools, available_toolsets):
continue
skills_by_category.setdefault(category, []).append((skill_name, desc))
if not skills_by_category:

View file

@ -157,9 +157,10 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
global _skill_commands
_skill_commands = {}
try:
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform
from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
if not SKILLS_DIR.exists():
return _skill_commands
disabled = _get_disabled_skill_names()
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
continue
@ -170,6 +171,9 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
if not skill_matches_platform(frontmatter):
continue
name = frontmatter.get('name', skill_md.parent.name)
# Respect user's disabled skills config
if name in disabled:
continue
description = frontmatter.get('description', '')
if not description:
for line in body.strip().split('\n'):

View file

@ -5,7 +5,7 @@ from datetime import datetime, timezone
from decimal import Decimal
from typing import Any, Dict, Literal, Optional
from agent.model_metadata import fetch_model_metadata
from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
@ -335,8 +335,21 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]
def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
metadata = fetch_model_metadata()
model_id = route.model
return _pricing_entry_from_metadata(
fetch_model_metadata(),
route.model,
source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
pricing_version="openrouter-models-api",
)
def _pricing_entry_from_metadata(
metadata: Dict[str, Dict[str, Any]],
model_id: str,
*,
source_url: str,
pricing_version: str,
) -> Optional[PricingEntry]:
if model_id not in metadata:
return None
pricing = metadata[model_id].get("pricing") or {}
@ -355,6 +368,7 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
)
if prompt is None and completion is None and request is None:
return None
def _per_token_to_per_million(value: Optional[Decimal]) -> Optional[Decimal]:
if value is None:
return None
@ -367,8 +381,8 @@ def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
cache_write_cost_per_million=_per_token_to_per_million(cache_write),
request_cost=request,
source="provider_models_api",
source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
pricing_version="openrouter-models-api",
source_url=source_url,
pricing_version=pricing_version,
fetched_at=_UTC_NOW(),
)
@ -377,6 +391,7 @@ def get_pricing_entry(
model_name: str,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[PricingEntry]:
route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
if route.billing_mode == "subscription_included":
@ -390,6 +405,15 @@ def get_pricing_entry(
)
if route.provider == "openrouter":
return _openrouter_pricing_entry(route)
if route.base_url:
entry = _pricing_entry_from_metadata(
fetch_endpoint_model_metadata(route.base_url, api_key=api_key or ""),
route.model,
source_url=f"{route.base_url.rstrip('/')}/models",
pricing_version="openai-compatible-models-api",
)
if entry:
return entry
return _lookup_official_docs_pricing(route)
@ -460,6 +484,7 @@ def estimate_usage_cost(
*,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> CostResult:
route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
if route.billing_mode == "subscription_included":
@ -471,7 +496,7 @@ def estimate_usage_cost(
pricing_version="included-route",
)
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
if not entry:
return CostResult(amount_usd=None, status="unknown", source="none", label="n/a")
@ -536,6 +561,7 @@ def has_known_pricing(
model_name: str,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> bool:
"""Check whether we have pricing data for this model+route.
@ -545,7 +571,7 @@ def has_known_pricing(
route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
if route.billing_mode == "subscription_included":
return True
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
return entry is not None
@ -553,13 +579,14 @@ def get_pricing(
model_name: str,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Dict[str, float]:
"""Backward-compatible thin wrapper for legacy callers.
Returns only non-cache input/output fields when a pricing entry exists.
Unknown routes return zeroes.
"""
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
if not entry:
return {"input": 0.0, "output": 0.0}
return {
@ -575,6 +602,7 @@ def estimate_cost_usd(
*,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> float:
"""Backward-compatible helper for legacy callers.
@ -586,6 +614,7 @@ def estimate_cost_usd(
CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
provider=provider,
base_url=base_url,
api_key=api_key,
)
return float(result.amount_usd or _ZERO)

18
cli.py
View file

@ -1217,6 +1217,9 @@ class HermesCLI:
self._voice_tts_done = threading.Event()
self._voice_tts_done.set()
# Status bar visibility (toggled via /statusbar)
self._status_bar_visible = True
# Background task tracking: {task_id: threading.Thread}
self._background_tasks: Dict[str, threading.Thread] = {}
self._background_task_counter = 0
@ -1324,6 +1327,8 @@ class HermesCLI:
return f"{self.model if getattr(self, 'model', None) else 'Hermes'}"
def _get_status_bar_fragments(self):
if not self._status_bar_visible:
return []
try:
snapshot = self._get_status_bar_snapshot()
width = shutil.get_terminal_size((80, 24)).columns
@ -3575,6 +3580,10 @@ class HermesCLI:
self._handle_skills_command(cmd_original)
elif canonical == "platforms":
self._show_gateway_status()
elif canonical == "statusbar":
self._status_bar_visible = not self._status_bar_visible
state = "visible" if self._status_bar_visible else "hidden"
self.console.print(f" Status bar {state}")
elif canonical == "verbose":
self._toggle_verbose()
elif canonical == "reasoning":
@ -6613,9 +6622,12 @@ class HermesCLI:
filter=Condition(lambda: cli_ref._voice_mode),
)
status_bar = Window(
content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
height=1,
status_bar = ConditionalContainer(
Window(
content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()),
height=1,
),
filter=Condition(lambda: cli_ref._status_bar_visible),
)
# Layout: interactive prompt widgets + ruled input at bottom.

View file

@ -34,6 +34,7 @@ HERMES_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
CRON_DIR = HERMES_DIR / "cron"
JOBS_FILE = CRON_DIR / "jobs.json"
OUTPUT_DIR = CRON_DIR / "output"
ONESHOT_GRACE_SECONDS = 120
def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
@ -220,6 +221,33 @@ def _ensure_aware(dt: datetime) -> datetime:
return dt.astimezone(target_tz)
def _recoverable_oneshot_run_at(
schedule: Dict[str, Any],
now: datetime,
*,
last_run_at: Optional[str] = None,
) -> Optional[str]:
"""Return a one-shot run time if it is still eligible to fire.
One-shot jobs get a small grace window so jobs created a few seconds after
their requested minute still run on the next tick. Once a one-shot has
already run, it is never eligible again.
"""
if schedule.get("kind") != "once":
return None
if last_run_at:
return None
run_at = schedule.get("run_at")
if not run_at:
return None
run_at_dt = _ensure_aware(datetime.fromisoformat(run_at))
if run_at_dt >= now - timedelta(seconds=ONESHOT_GRACE_SECONDS):
return run_at
return None
def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None) -> Optional[str]:
"""
Compute the next run time for a schedule.
@ -229,9 +257,7 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
now = _hermes_now()
if schedule["kind"] == "once":
run_at = _ensure_aware(datetime.fromisoformat(schedule["run_at"]))
# If in the future, return it; if in the past, no more runs
return schedule["run_at"] if run_at > now else None
return _recoverable_oneshot_run_at(schedule, now, last_run_at=last_run_at)
elif schedule["kind"] == "interval":
minutes = schedule["minutes"]
@ -555,7 +581,26 @@ def get_due_jobs() -> List[Dict[str, Any]]:
next_run = job.get("next_run_at")
if not next_run:
continue
recovered_next = _recoverable_oneshot_run_at(
job.get("schedule", {}),
now,
last_run_at=job.get("last_run_at"),
)
if not recovered_next:
continue
job["next_run_at"] = recovered_next
next_run = recovered_next
logger.info(
"Job '%s' had no next_run_at; recovering one-shot run at %s",
job.get("name", job["id"]),
recovered_next,
)
for rj in raw_jobs:
if rj["id"] == job["id"]:
rj["next_run_at"] = recovered_next
needs_save = True
break
next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
if next_run_dt <= now:

View file

@ -32,6 +32,15 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
return bool(value)
def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
"""Normalize unauthorized DM behavior to a supported value."""
if isinstance(value, str):
normalized = value.strip().lower()
if normalized in {"pair", "ignore"}:
return normalized
return default
class Platform(Enum):
"""Supported messaging platforms."""
LOCAL = "local"
@ -215,6 +224,9 @@ class GatewayConfig:
# Session isolation in shared chats
group_sessions_per_user: bool = True # Isolate group/channel sessions per participant when user IDs are available
# Unauthorized DM policy
unauthorized_dm_behavior: str = "pair" # "pair" or "ignore"
# Streaming configuration
streaming: StreamingConfig = field(default_factory=StreamingConfig)
@ -289,6 +301,7 @@ class GatewayConfig:
"always_log_local": self.always_log_local,
"stt_enabled": self.stt_enabled,
"group_sessions_per_user": self.group_sessions_per_user,
"unauthorized_dm_behavior": self.unauthorized_dm_behavior,
"streaming": self.streaming.to_dict(),
}
@ -331,6 +344,10 @@ class GatewayConfig:
stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None
group_sessions_per_user = data.get("group_sessions_per_user")
unauthorized_dm_behavior = _normalize_unauthorized_dm_behavior(
data.get("unauthorized_dm_behavior"),
"pair",
)
return cls(
platforms=platforms,
@ -343,9 +360,21 @@ class GatewayConfig:
always_log_local=data.get("always_log_local", True),
stt_enabled=_coerce_bool(stt_enabled, True),
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
unauthorized_dm_behavior=unauthorized_dm_behavior,
streaming=StreamingConfig.from_dict(data.get("streaming", {})),
)
def get_unauthorized_dm_behavior(self, platform: Optional[Platform] = None) -> str:
"""Return the effective unauthorized-DM behavior for a platform."""
if platform:
platform_cfg = self.platforms.get(platform)
if platform_cfg and "unauthorized_dm_behavior" in platform_cfg.extra:
return _normalize_unauthorized_dm_behavior(
platform_cfg.extra.get("unauthorized_dm_behavior"),
self.unauthorized_dm_behavior,
)
return self.unauthorized_dm_behavior
def load_gateway_config() -> GatewayConfig:
"""
@ -416,6 +445,38 @@ def load_gateway_config() -> GatewayConfig:
if "always_log_local" in yaml_cfg:
gw_data["always_log_local"] = yaml_cfg["always_log_local"]
if "unauthorized_dm_behavior" in yaml_cfg:
gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
yaml_cfg.get("unauthorized_dm_behavior"),
"pair",
)
# Bridge per-platform unauthorized_dm_behavior from config.yaml
platforms_data = gw_data.setdefault("platforms", {})
if not isinstance(platforms_data, dict):
platforms_data = {}
gw_data["platforms"] = platforms_data
for plat in Platform:
if plat == Platform.LOCAL:
continue
platform_cfg = yaml_cfg.get(plat.value)
if not isinstance(platform_cfg, dict):
continue
if "unauthorized_dm_behavior" not in platform_cfg:
continue
plat_data = platforms_data.setdefault(plat.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[plat.value] = plat_data
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
extra["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
platform_cfg.get("unauthorized_dm_behavior"),
gw_data.get("unauthorized_dm_behavior", "pair"),
)
# Discord settings → env vars (env vars take precedence)
discord_cfg = yaml_cfg.get("discord", {})
if isinstance(discord_cfg, dict):

View file

@ -635,7 +635,7 @@ class MatrixAdapter(BasePlatformAdapter):
source=source,
raw_message=getattr(event, "source", {}),
message_id=event.event_id,
reply_to=reply_to,
reply_to_message_id=reply_to,
)
await self.handle_message(msg_event)

View file

@ -434,6 +434,16 @@ class GatewayRunner:
for session_key in list(managers.keys()):
self._shutdown_gateway_honcho(session_key)
# -- Setup skill availability ----------------------------------------
def _has_setup_skill(self) -> bool:
"""Check if the hermes-agent-setup skill is installed."""
try:
from tools.skill_manager_tool import _find_skill
return _find_skill("hermes-agent-setup") is not None
except Exception:
return False
# -- Voice mode persistence ------------------------------------------
_VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json"
@ -1251,6 +1261,13 @@ class GatewayRunner:
if "@" in user_id:
check_ids.add(user_id.split("@")[0])
return bool(check_ids & allowed_ids)
def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str:
"""Return how unauthorized DMs should be handled for a platform."""
config = getattr(self, "config", None)
if config and hasattr(config, "get_unauthorized_dm_behavior"):
return config.get_unauthorized_dm_behavior(platform)
return "pair"
async def _handle_message(self, event: MessageEvent) -> Optional[str]:
"""
@ -1271,7 +1288,7 @@ class GatewayRunner:
if not self._is_user_authorized(source):
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
# In DMs: offer pairing code. In groups: silently ignore.
if source.chat_type == "dm":
if source.chat_type == "dm" and self._get_unauthorized_dm_behavior(source.platform) == "pair":
platform_name = source.platform.value if source.platform else "unknown"
code = self.pairing_store.generate_code(
platform_name, source.user_id, source.user_name or ""
@ -1874,6 +1891,37 @@ class GatewayRunner:
message_text = await self._enrich_message_with_transcription(
message_text, audio_paths
)
# If STT failed, send a direct message to the user so they
# know voice isn't configured — don't rely on the agent to
# relay the error clearly.
_stt_fail_markers = (
"No STT provider",
"STT is disabled",
"can't listen",
"VOICE_TOOLS_OPENAI_KEY",
)
if any(m in message_text for m in _stt_fail_markers):
_stt_adapter = self.adapters.get(source.platform)
_stt_meta = {"thread_id": source.thread_id} if source.thread_id else None
if _stt_adapter:
try:
_stt_msg = (
"🎤 I received your voice message but can't transcribe it — "
"no speech-to-text provider is configured.\n\n"
"To enable voice: install faster-whisper "
"(`pip install faster-whisper` in the Hermes venv) "
"and set `stt.enabled: true` in config.yaml, "
"then /restart the gateway."
)
# Point to setup skill if it's installed
if self._has_setup_skill():
_stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`"
await _stt_adapter.send(
source.chat_id, _stt_msg,
metadata=_stt_meta,
)
except Exception:
pass
# -----------------------------------------------------------------
# Enrich document messages with context notes for the agent
@ -3943,7 +3991,13 @@ class GatewayRunner:
The enriched message string with transcriptions prepended.
"""
if not getattr(self.config, "stt_enabled", True):
disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]"
disabled_note = "[The user sent voice message(s), but transcription is disabled in config."
if self._has_setup_skill():
disabled_note += (
" You have a skill called hermes-agent-setup that can help "
"users configure Hermes features including voice, tools, and more."
)
disabled_note += "]"
if user_text:
return f"{disabled_note}\n\n{user_text}"
return disabled_note
@ -3970,11 +4024,20 @@ class GatewayRunner:
"No STT provider" in error
or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
):
enriched_parts.append(
_no_stt_note = (
"[The user sent a voice message but I can't listen "
"to it right now~ No STT provider is configured "
"(';w;') Let them know!]"
"to it right now — no STT provider is configured. "
"A direct message has already been sent to the user "
"with setup instructions."
)
if self._has_setup_skill():
_no_stt_note += (
" You have a skill called hermes-agent-setup "
"that can help users configure Hermes features "
"including voice, tools, and more."
)
_no_stt_note += "]"
enriched_parts.append(_no_stt_note)
else:
enriched_parts.append(
"[The user sent a voice message but I had trouble "

View file

@ -87,6 +87,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
patterns = (
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"gateway/run.py",
)
@ -105,6 +106,7 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
cmdline = " ".join(str(part) for part in argv)
patterns = (
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"gateway/run.py",
)

View file

@ -102,27 +102,22 @@ COMPACT_BANNER = """
# =========================================================================
def get_available_skills() -> Dict[str, List[str]]:
"""Scan ~/.hermes/skills/ and return skills grouped by category."""
import os
"""Return skills grouped by category, filtered by platform and disabled state.
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
skills_dir = hermes_home / "skills"
skills_by_category = {}
if not skills_dir.exists():
return skills_by_category
for skill_file in skills_dir.rglob("SKILL.md"):
rel_path = skill_file.relative_to(skills_dir)
parts = rel_path.parts
if len(parts) >= 2:
category = parts[0]
skill_name = parts[-2]
else:
category = "general"
skill_name = skill_file.parent.name
skills_by_category.setdefault(category, []).append(skill_name)
Delegates to ``_find_all_skills()`` from ``tools/skills_tool`` which already
handles platform gating (``platforms:`` frontmatter) and respects the
user's ``skills.disabled`` config list.
"""
try:
from tools.skills_tool import _find_all_skills
all_skills = _find_all_skills() # already filtered
except Exception:
return {}
skills_by_category: Dict[str, List[str]] = {}
for skill in all_skills:
category = skill.get("category") or "general"
skills_by_category.setdefault(category, []).append(skill["name"])
return skills_by_category
@ -233,6 +228,17 @@ def _format_context_length(tokens: int) -> str:
return str(tokens)
def _display_toolset_name(toolset_name: str) -> str:
"""Normalize internal/legacy toolset identifiers for banner display."""
if not toolset_name:
return "unknown"
return (
toolset_name[:-6]
if toolset_name.endswith("_tools")
else toolset_name
)
def build_welcome_banner(console: Console, model: str, cwd: str,
tools: List[dict] = None,
enabled_toolsets: List[str] = None,
@ -297,12 +303,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
for tool in tools:
tool_name = tool["function"]["name"]
toolset = get_toolset_for_tool(tool_name) or "other"
toolset = _display_toolset_name(get_toolset_for_tool(tool_name) or "other")
toolsets_dict.setdefault(toolset, []).append(tool_name)
for item in unavailable_toolsets:
toolset_id = item.get("id", item.get("name", "unknown"))
display_name = f"{toolset_id}_tools" if not toolset_id.endswith("_tools") else toolset_id
display_name = _display_toolset_name(toolset_id)
if display_name not in toolsets_dict:
toolsets_dict[display_name] = []
for tool_name in item.get("tools", []):
@ -342,10 +348,10 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
colored_names.append(f"[{text}]{name}[/]")
tools_str = ", ".join(colored_names)
right_lines.append(f"[dim #B8860B]{toolset}:[/] {tools_str}")
right_lines.append(f"[dim {dim}]{toolset}:[/] {tools_str}")
if remaining_toolsets > 0:
right_lines.append(f"[dim #B8860B](and {remaining_toolsets} more toolsets...)[/]")
right_lines.append(f"[dim {dim}](and {remaining_toolsets} more toolsets...)[/]")
# MCP Servers section (only if configured)
try:
@ -356,12 +362,12 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
if mcp_status:
right_lines.append("")
right_lines.append("[bold #FFBF00]MCP Servers[/]")
right_lines.append(f"[bold {accent}]MCP Servers[/]")
for srv in mcp_status:
if srv["connected"]:
right_lines.append(
f"[dim #B8860B]{srv['name']}[/] [#FFF8DC]({srv['transport']})[/] "
f"[dim #B8860B]—[/] [#FFF8DC]{srv['tools']} tool(s)[/]"
f"[dim {dim}]{srv['name']}[/] [{text}]({srv['transport']})[/] "
f"[dim {dim}]—[/] [{text}]{srv['tools']} tool(s)[/]"
)
else:
right_lines.append(

View file

@ -81,6 +81,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
cli_only=True, args_hint="[text]", subcommands=("clear",)),
CommandDef("personality", "Set a predefined personality", "Configuration",
args_hint="[name]"),
CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
cli_only=True, aliases=("sb",)),
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
"Configuration", cli_only=True),
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",

View file

@ -31,6 +31,7 @@ def find_gateway_pids() -> list:
pids = []
patterns = [
"hermes_cli.main gateway",
"hermes_cli/main.py gateway",
"hermes gateway",
"gateway/run.py",
]
@ -849,6 +850,46 @@ def launchd_stop():
subprocess.run(["launchctl", "stop", "ai.hermes.gateway"], check=True)
print("✓ Service stopped")
def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
"""Wait for the gateway process (by saved PID) to exit.
Uses the PID from the gateway.pid file not launchd labels so this
works correctly when multiple gateway instances run under separate
HERMES_HOME directories.
Args:
timeout: Total seconds to wait before giving up.
force_after: Seconds of graceful waiting before sending SIGKILL.
"""
import time
from gateway.status import get_running_pid
deadline = time.monotonic() + timeout
force_deadline = time.monotonic() + force_after
force_sent = False
while time.monotonic() < deadline:
pid = get_running_pid()
if pid is None:
return # Process exited cleanly.
if not force_sent and time.monotonic() >= force_deadline:
# Grace period expired — force-kill the specific PID.
try:
os.kill(pid, signal.SIGKILL)
print(f"⚠ Gateway PID {pid} did not exit gracefully; sent SIGKILL")
except (ProcessLookupError, PermissionError):
return # Already gone or we can't touch it.
force_sent = True
time.sleep(0.3)
# Timed out even after SIGKILL.
remaining_pid = get_running_pid()
if remaining_pid is not None:
print(f"⚠ Gateway PID {remaining_pid} still running after {timeout}s — restart may fail")
def launchd_restart():
try:
launchd_stop()
@ -856,6 +897,7 @@ def launchd_restart():
if e.returncode != 3:
raise
print("↻ launchd job was unloaded; skipping stop")
_wait_for_gateway_exit()
launchd_start()
def launchd_status(deep: bool = False):
@ -1753,10 +1795,9 @@ def gateway_command(args):
killed = kill_gateway_processes()
if killed:
print(f"✓ Stopped {killed} gateway process(es)")
import time
time.sleep(2)
_wait_for_gateway_exit(timeout=10.0, force_after=5.0)
# Start fresh
print("Starting gateway...")
run_gateway(verbose=False)

View file

@ -28,17 +28,26 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-opus-4.6", "recommended"),
("anthropic/claude-sonnet-4.5", ""),
("openai/gpt-5.4-pro", ""),
("anthropic/claude-haiku-4.5", ""),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("openrouter/hunter-alpha", "free"),
("openrouter/healer-alpha", "free"),
("openai/gpt-5.3-codex", ""),
("google/gemini-3-pro-preview", ""),
("google/gemini-3-flash-preview", ""),
("qwen/qwen3.5-plus-02-15", ""),
("qwen/qwen3.5-35b-a3b", ""),
("stepfun/step-3.5-flash", ""),
("z-ai/glm-5", ""),
("moonshotai/kimi-k2.5", ""),
("minimax/minimax-m2.5", ""),
("z-ai/glm-5", ""),
("z-ai/glm-5-turbo", ""),
("moonshotai/kimi-k2.5", ""),
("x-ai/grok-4.20-beta", ""),
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
("arcee-ai/trinity-large-preview:free", "free"),
("openai/gpt-5.4-pro", ""),
("openai/gpt-5.4-nano", ""),
]
_PROVIDER_MODELS: dict[str, list[str]] = {
@ -90,11 +99,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"kimi-k2-0905-preview",
],
"minimax": [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
"MiniMax-M2.1",
],
"minimax-cn": [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
"MiniMax-M2.1",

View file

@ -76,8 +76,8 @@ _DEFAULT_PROVIDER_MODELS = {
],
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
}
@ -1917,7 +1917,7 @@ def _install_neutts_deps() -> bool:
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
print_error(f"Failed to install neutts: {e}")
print_info("Try manually: pip install neutts[all]")
print_info("Try manually: python -m pip install -U neutts[all]")
return False

View file

@ -757,16 +757,14 @@ class SessionDB:
if not query:
return []
if source_filter is None:
source_filter = ["cli", "telegram", "discord", "whatsapp", "slack"]
# Build WHERE clauses dynamically
where_clauses = ["messages_fts MATCH ?"]
params: list = [query]
source_placeholders = ",".join("?" for _ in source_filter)
where_clauses.append(f"s.source IN ({source_placeholders})")
params.extend(source_filter)
if source_filter is not None:
source_placeholders = ",".join("?" for _ in source_filter)
where_clauses.append(f"s.source IN ({source_placeholders})")
params.extend(source_filter)
if role_filter:
role_placeholders = ",".join("?" for _ in role_filter)

View file

@ -276,6 +276,7 @@ def get_tool_definitions(
# The registry still holds their schemas; dispatch just returns a stub error
# so if something slips through, the LLM sees a sensible message.
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
def handle_function_call(
@ -305,7 +306,6 @@ def handle_function_call(
"""
# Notify the read-loop tracker when a non-read/search tool runs,
# so the *consecutive* counter resets (reads after other work are fine).
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
if function_name not in _READ_SEARCH_TOOLS:
try:
from tools.file_tools import notify_other_tool_call

View file

@ -203,6 +203,27 @@ class IterationBudget:
# When any of these appear in a batch, we fall back to sequential execution.
_NEVER_PARALLEL_TOOLS = frozenset({"clarify"})
# Read-only tools with no shared mutable session state.
_PARALLEL_SAFE_TOOLS = frozenset({
"ha_get_state",
"ha_list_entities",
"ha_list_services",
"honcho_context",
"honcho_profile",
"honcho_search",
"read_file",
"search_files",
"session_search",
"skill_view",
"skills_list",
"vision_analyze",
"web_extract",
"web_search",
})
# File tools can run concurrently when they target independent paths.
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
# Maximum number of concurrent worker threads for parallel tool execution.
_MAX_TOOL_WORKERS = 8
@ -234,6 +255,74 @@ def _is_destructive_command(cmd: str) -> bool:
return False
def _should_parallelize_tool_batch(tool_calls) -> bool:
"""Return True when a tool-call batch is safe to run concurrently."""
if len(tool_calls) <= 1:
return False
tool_names = [tc.function.name for tc in tool_calls]
if any(name in _NEVER_PARALLEL_TOOLS for name in tool_names):
return False
reserved_paths: list[Path] = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
except Exception:
logging.debug(
"Could not parse args for %s — defaulting to sequential; raw=%s",
tool_name,
tool_call.function.arguments[:200],
)
return False
if not isinstance(function_args, dict):
logging.debug(
"Non-dict args for %s (%s) — defaulting to sequential",
tool_name,
type(function_args).__name__,
)
return False
if tool_name in _PATH_SCOPED_TOOLS:
scoped_path = _extract_parallel_scope_path(tool_name, function_args)
if scoped_path is None:
return False
if any(_paths_overlap(scoped_path, existing) for existing in reserved_paths):
return False
reserved_paths.append(scoped_path)
continue
if tool_name not in _PARALLEL_SAFE_TOOLS:
return False
return True
def _extract_parallel_scope_path(tool_name: str, function_args: dict) -> Path | None:
"""Return the normalized file target for path-scoped tools."""
if tool_name not in _PATH_SCOPED_TOOLS:
return None
raw_path = function_args.get("path")
if not isinstance(raw_path, str) or not raw_path.strip():
return None
# Avoid resolve(); the file may not exist yet.
return Path(raw_path).expanduser()
def _paths_overlap(left: Path, right: Path) -> bool:
"""Return True when two paths may refer to the same subtree."""
left_parts = left.parts
right_parts = right.parts
if not left_parts or not right_parts:
# Empty paths shouldn't reach here (guarded upstream), but be safe.
return bool(left_parts) == bool(right_parts) and bool(left_parts)
common_len = min(len(left_parts), len(right_parts))
return left_parts[:common_len] == right_parts[:common_len]
def _inject_honcho_turn_context(content, turn_context: str):
"""Append Honcho recall to the current-turn user message without mutating history.
@ -263,11 +352,20 @@ def _inject_honcho_turn_context(content, turn_context: str):
class AIAgent:
"""
AI Agent with tool calling capabilities.
This class manages the conversation flow, tool execution, and response handling
for AI models that support function calling.
"""
@property
def base_url(self) -> str:
return self._base_url
@base_url.setter
def base_url(self, value: str) -> None:
self._base_url = value
self._base_url_lower = value.lower() if value else ""
def __init__(
self,
base_url: str = None,
@ -389,10 +487,10 @@ class AIAgent:
self.api_mode = api_mode
elif self.provider == "openai-codex":
self.api_mode = "codex_responses"
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
self.api_mode = "codex_responses"
self.provider = "openai-codex"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
self.api_mode = "anthropic_messages"
self.provider = "anthropic"
else:
@ -401,7 +499,7 @@ class AIAgent:
# Pre-warm OpenRouter model metadata cache in a background thread.
# fetch_model_metadata() is cached for 1 hour; this avoids a blocking
# HTTP request on the first API response when pricing is estimated.
if self.provider == "openrouter" or "openrouter" in self.base_url.lower():
if self.provider == "openrouter" or "openrouter" in self._base_url_lower:
threading.Thread(
target=lambda: fetch_model_metadata(),
daemon=True,
@ -445,7 +543,7 @@ class AIAgent:
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
# Reduces input costs by ~75% on multi-turn conversations by caching the
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
is_openrouter = "openrouter" in self.base_url.lower()
is_openrouter = "openrouter" in self._base_url_lower
is_claude = "claude" in self.model.lower()
is_native_anthropic = self.api_mode == "anthropic_messages"
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
@ -561,6 +659,7 @@ class AIAgent:
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
effective_key = api_key or resolve_anthropic_token() or ""
self.api_key = effective_key
self._anthropic_api_key = effective_key
self._anthropic_base_url = base_url
from agent.anthropic_adapter import _is_oauth_token as _is_oat
@ -622,6 +721,7 @@ class AIAgent:
}
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
self.api_key = client_kwargs.get("api_key", "")
try:
self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
if not self.quiet_mode:
@ -745,16 +845,24 @@ class AIAgent:
from tools.todo_tool import TodoStore
self._todo_store = TodoStore()
# Load config once for memory, skills, and compression sections
try:
from hermes_cli.config import load_config as _load_agent_config
_agent_cfg = _load_agent_config()
except Exception:
_agent_cfg = {}
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
self._memory_store = None
self._memory_enabled = False
self._user_profile_enabled = False
self._memory_nudge_interval = 10
self._memory_flush_min_turns = 6
self._turns_since_memory = 0
self._iters_since_skill = 0
if not skip_memory:
try:
from hermes_cli.config import load_config as _load_mem_config
mem_config = _load_mem_config().get("memory", {})
mem_config = _agent_cfg.get("memory", {})
self._memory_enabled = mem_config.get("memory_enabled", False)
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
@ -842,21 +950,16 @@ class AIAgent:
# Skills config: nudge interval for skill creation reminders
self._skill_nudge_interval = 10
try:
from hermes_cli.config import load_config as _load_skills_config
skills_config = _load_skills_config().get("skills", {})
skills_config = _agent_cfg.get("skills", {})
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
except Exception:
pass
# Initialize context compressor for automatic context management
# Compresses conversation when approaching model's context limit
# Configuration via config.yaml (compression section)
try:
from hermes_cli.config import load_config as _load_compression_config
_compression_cfg = _load_compression_config().get("compression", {})
if not isinstance(_compression_cfg, dict):
_compression_cfg = {}
except ImportError:
_compression_cfg = _agent_cfg.get("compression", {})
if not isinstance(_compression_cfg, dict):
_compression_cfg = {}
compression_threshold = float(_compression_cfg.get("threshold", 0.50))
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
@ -871,6 +974,7 @@ class AIAgent:
summary_model_override=compression_summary_model,
quiet_mode=self.quiet_mode,
base_url=self.base_url,
api_key=getattr(self, "api_key", ""),
)
self.compression_enabled = compression_enabled
self._user_turn_count = 0
@ -926,8 +1030,8 @@ class AIAgent:
OpenAI models use 'max_tokens'.
"""
_is_direct_openai = (
"api.openai.com" in self.base_url.lower()
and "openrouter" not in self.base_url.lower()
"api.openai.com" in self._base_url_lower
and "openrouter" not in self._base_url_lower
)
if _is_direct_openai:
return {"max_completion_tokens": value}
@ -2977,6 +3081,9 @@ class AIAgent:
return False
self._anthropic_api_key = new_token
# Update OAuth flag — token type may have changed (API key ↔ OAuth)
from agent.anthropic_adapter import _is_oauth_token
self._is_anthropic_oauth = _is_oauth_token(new_token)
return True
def _anthropic_messages_create(self, api_kwargs: dict):
@ -3368,11 +3475,12 @@ class AIAgent:
if fb_api_mode == "anthropic_messages":
# Build native Anthropic client instead of using OpenAI client
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
effective_key = fb_client.api_key or resolve_anthropic_token() or ""
self._anthropic_api_key = effective_key
self._anthropic_base_url = getattr(fb_client, "base_url", None)
self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
self._is_anthropic_oauth = _is_oauth_token(effective_key)
self.client = None
self._client_kwargs = {}
else:
@ -3678,10 +3786,10 @@ class AIAgent:
extra_body = {}
_is_openrouter = "openrouter" in self.base_url.lower()
_is_openrouter = "openrouter" in self._base_url_lower
_is_github_models = (
"models.github.ai" in self.base_url.lower()
or "api.githubcopilot.com" in self.base_url.lower()
"models.github.ai" in self._base_url_lower
or "api.githubcopilot.com" in self._base_url_lower
)
# Provider preferences (only, ignore, order, sort) are OpenRouter-
@ -3690,7 +3798,7 @@ class AIAgent:
# for _is_nous when their backend is updated.
if provider_preferences and _is_openrouter:
extra_body["provider"] = provider_preferences
_is_nous = "nousresearch" in self.base_url.lower()
_is_nous = "nousresearch" in self._base_url_lower
if self._supports_reasoning_extra_body():
if _is_github_models:
@ -3728,21 +3836,20 @@ class AIAgent:
Some providers/routes reject `reasoning` with 400s, so gate it to
known reasoning-capable model families and direct Nous Portal.
"""
base_url = (self.base_url or "").lower()
if "nousresearch" in base_url:
if "nousresearch" in self._base_url_lower:
return True
if "ai-gateway.vercel.sh" in base_url:
if "ai-gateway.vercel.sh" in self._base_url_lower:
return True
if "models.github.ai" in base_url or "api.githubcopilot.com" in base_url:
if "models.github.ai" in self._base_url_lower or "api.githubcopilot.com" in self._base_url_lower:
try:
from hermes_cli.models import github_model_reasoning_efforts
return bool(github_model_reasoning_efforts(self.model))
except Exception:
return False
if "openrouter" not in base_url:
if "openrouter" not in self._base_url_lower:
return False
if "api.mistral.ai" in base_url:
if "api.mistral.ai" in self._base_url_lower:
return False
model = (self.model or "").lower()
@ -3960,7 +4067,7 @@ class AIAgent:
try:
# Build API messages for the flush call
_is_strict_api = "api.mistral.ai" in self.base_url.lower()
_is_strict_api = "api.mistral.ai" in self._base_url_lower
api_messages = []
for msg in messages:
api_msg = msg.copy()
@ -4149,20 +4256,17 @@ class AIAgent:
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute tool calls from the assistant message and append results to messages.
Dispatches to concurrent execution when multiple independent tool calls
are present, falling back to sequential execution for single calls or
when interactive tools (e.g. clarify) are in the batch.
Dispatches to concurrent execution only for batches that look
independent: read-only tools may always share the parallel path, while
file reads/writes may do so only when their target paths do not overlap.
"""
tool_calls = assistant_message.tool_calls
# Single tool call or interactive tool present → sequential
if (len(tool_calls) <= 1
or any(tc.function.name in _NEVER_PARALLEL_TOOLS for tc in tool_calls)):
if not _should_parallelize_tool_batch(tool_calls):
return self._execute_tool_calls_sequential(
assistant_message, messages, effective_task_id, api_call_count
)
# Multiple non-interactive tools → concurrent
return self._execute_tool_calls_concurrent(
assistant_message, messages, effective_task_id, api_call_count
)
@ -4736,7 +4840,7 @@ class AIAgent:
try:
# Build API messages, stripping internal-only fields
# (finish_reason, reasoning) that strict APIs like Mistral reject with 422
_is_strict_api = "api.mistral.ai" in self.base_url.lower()
_is_strict_api = "api.mistral.ai" in self._base_url_lower
api_messages = []
for msg in messages:
api_msg = msg.copy()
@ -4757,7 +4861,7 @@ class AIAgent:
api_messages.insert(sys_offset + idx, pfm.copy())
summary_extra_body = {}
_is_nous = "nousresearch" in self.base_url.lower()
_is_nous = "nousresearch" in self._base_url_lower
if self._supports_reasoning_extra_body():
if self.reasoning_config is not None:
summary_extra_body["reasoning"] = self.reasoning_config
@ -4920,8 +5024,9 @@ class AIAgent:
self._incomplete_scratchpad_retries = 0
self._codex_incomplete_retries = 0
self._last_content_with_tools = None
self._turns_since_memory = 0
self._iters_since_skill = 0
# NOTE: _turns_since_memory and _iters_since_skill are NOT reset here.
# They are initialized in __init__ and must persist across run_conversation
# calls so that nudge logic accumulates correctly in CLI mode.
self.iteration_budget = IterationBudget(self.max_iterations)
# Initialize conversation (copy to avoid mutating the caller's list)
@ -5174,7 +5279,7 @@ class AIAgent:
# strict providers like Mistral that reject unknown fields with 422.
# Uses new dicts so the internal messages list retains the fields
# for Codex Responses compatibility.
if "api.mistral.ai" in self.base_url.lower():
if "api.mistral.ai" in self._base_url_lower:
self._sanitize_tool_calls_for_strict_api(api_msg)
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
# The signature field helps maintain reasoning continuity
@ -5546,6 +5651,7 @@ class AIAgent:
canonical_usage,
provider=self.provider,
base_url=self.base_url,
api_key=getattr(self, "api_key", ""),
)
if cost_result.amount_usd is not None:
self.session_estimated_cost_usd += float(cost_result.amount_usd)
@ -5939,10 +6045,6 @@ class AIAgent:
self._client_log_context(),
api_error,
)
if retry_count >= max_retries:
self._vprint(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
# Sleep in small increments so we can respond to interrupts quickly
# instead of blocking the entire wait_time in one sleep() call
sleep_end = time.time() + wait_time

View file

@ -0,0 +1,300 @@
---
name: hermes-agent-setup
description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself.
version: 1.1.0
author: Hermes Agent
tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills]
---
# Hermes Agent Setup & Configuration
Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting.
## Key Paths
- Config: `~/.hermes/config.yaml`
- API keys: `~/.hermes/.env`
- Skills: `~/.hermes/skills/`
- Hermes install: `~/.hermes/hermes-agent/`
- Venv: `~/.hermes/hermes-agent/.venv/` (or `venv/`)
## CLI Overview
Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo).
### Core commands:
```
hermes Interactive chat (default)
hermes chat -q "question" Single query, then exit
hermes chat -m MODEL Chat with a specific model
hermes -c Resume most recent session
hermes -c "project name" Resume session by name
hermes --resume SESSION_ID Resume by exact ID
hermes -w Isolated git worktree mode
hermes -s skill1,skill2 Preload skills for the session
hermes --yolo Skip dangerous command approval
```
### Configuration & setup:
```
hermes setup Interactive setup wizard (provider, API keys, model)
hermes model Interactive model/provider selection
hermes config View current configuration
hermes config edit Open config.yaml in $EDITOR
hermes config set KEY VALUE Set a config value directly
hermes login Authenticate with a provider
hermes logout Clear stored auth
hermes doctor Check configuration and dependencies
```
### Tools & skills:
```
hermes tools Interactive tool enable/disable per platform
hermes skills list List installed skills
hermes skills search QUERY Search the skills hub
hermes skills install NAME Install a skill from the hub
hermes skills config Enable/disable skills per platform
```
### Gateway (messaging platforms):
```
hermes gateway run Start the messaging gateway
hermes gateway install Install gateway as background service
hermes gateway status Check gateway status
```
### Session management:
```
hermes sessions list List past sessions
hermes sessions browse Interactive session picker
hermes sessions rename ID TITLE Rename a session
hermes sessions export ID Export session as markdown
hermes sessions prune Clean up old sessions
```
### Other:
```
hermes status Show status of all components
hermes cron list List cron jobs
hermes insights Usage analytics
hermes update Update to latest version
hermes pairing Manage DM authorization codes
```
## Setup Wizard (`hermes setup`)
The interactive setup wizard walks through:
1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more
2. **API key entry** — stores securely in the env file
3. **Model selection** — picks from available models for the chosen provider
4. **Basic settings** — reasoning effort, tool preferences
Run it from terminal:
```bash
cd ~/.hermes/hermes-agent
source .venv/bin/activate
python -m hermes_cli.main setup
```
To change just the model/provider later: `hermes model`
## Skills Configuration (`hermes skills`)
Skills are reusable instruction sets that extend what Hermes can do.
### Managing skills:
```bash
hermes skills list # Show installed skills
hermes skills search "docker" # Search the hub
hermes skills install NAME # Install from hub
hermes skills config # Enable/disable per platform
```
### Per-platform skill control:
`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform.
### Loading skills in a session:
- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2`
- Chat: `/skill skill-name`
- Gateway: type `/skill skill-name` in any chat
## Voice Messages (STT)
Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available.
### Provider priority (auto-detected):
1. **Local faster-whisper** — free, no API key, runs on CPU/GPU
2. **Groq Whisper** — free tier, needs GROQ_API_KEY
3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY
### Setup local STT (recommended):
```bash
cd ~/.hermes/hermes-agent
source .venv/bin/activate # or: source venv/bin/activate
pip install faster-whisper
```
Add to config.yaml under the `stt:` section:
```yaml
stt:
enabled: true
provider: local
local:
model: base # Options: tiny, base, small, medium, large-v3
```
Model downloads automatically on first use (~150 MB for base).
### Setup Groq STT (free cloud):
1. Get free key from https://console.groq.com
2. Add GROQ_API_KEY to the env file
3. Set provider to groq in config.yaml stt section
### Verify STT:
After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message.
## Voice Replies (TTS)
Hermes can reply with voice when users send voice messages.
### TTS providers (set API key in env file):
| Provider | Env var | Free? |
|----------|---------|-------|
| ElevenLabs | ELEVENLABS_API_KEY | Free tier |
| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid |
| Kokoro (local) | None needed | Free |
| Fish Audio | FISH_AUDIO_API_KEY | Free tier |
### Voice commands (in any chat):
- `/voice on` — voice reply to voice messages only
- `/voice tts` — voice reply to all messages
- `/voice off` — text only (default)
## Enabling/Disabling Tools (`hermes tools`)
### Interactive tool config:
```bash
cd ~/.hermes/hermes-agent
source .venv/bin/activate
python -m hermes_cli.main tools
```
This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.).
### After changing tools:
Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes).
### Common toolsets:
| Toolset | What it provides |
|---------|-----------------|
| terminal | Shell command execution |
| file | File read/write/search/patch |
| web | Web search and extraction |
| browser | Browser automation (needs Browserbase) |
| image_gen | AI image generation |
| mcp | MCP server connections |
| voice | Text-to-speech output |
| cronjob | Scheduled tasks |
## Installing Dependencies
Some tools need extra packages:
```bash
cd ~/.hermes/hermes-agent && source .venv/bin/activate
pip install faster-whisper # Local STT (voice transcription)
pip install browserbase # Browser automation
pip install mcp # MCP server connections
```
## Config File Reference
The main config file is `~/.hermes/config.yaml`. Key sections:
```yaml
# Model and provider
model:
default: anthropic/claude-opus-4.6
provider: openrouter
# Agent behavior
agent:
max_turns: 90
reasoning_effort: high # xhigh, high, medium, low, minimal, none
# Voice
stt:
enabled: true
provider: local # local, groq, openai
tts:
provider: elevenlabs # elevenlabs, openai, kokoro, fish
# Display
display:
skin: default # default, ares, mono, slate
tool_progress: full # full, compact, off
background_process_notifications: all # all, result, error, off
```
Edit with `hermes config edit` or `hermes config set KEY VALUE`.
## Gateway Commands (Messaging Platforms)
| Command | What it does |
|---------|-------------|
| /reset or /new | Fresh session (picks up new tool config) |
| /help | Show all commands |
| /model [name] | Show or change model |
| /compact | Compress conversation to save context |
| /voice [mode] | Configure voice replies |
| /reasoning [effort] | Set reasoning level |
| /sethome | Set home channel for cron/notifications |
| /restart | Restart the gateway (picks up config changes) |
| /status | Show session info |
| /retry | Retry last message |
| /undo | Remove last exchange |
| /personality [name] | Set agent personality |
| /skill [name] | Load a skill |
## Troubleshooting
### Voice messages not working
1. Check stt.enabled is true in config.yaml
2. Check a provider is available (faster-whisper installed, or API key set)
3. Restart gateway after config changes (/restart)
### Tool not available
1. Run `hermes tools` to check if the toolset is enabled for your platform
2. Some tools need env vars — check the env file
3. Use /reset after enabling tools
### Model/provider issues
1. Run `hermes doctor` to check configuration
2. Run `hermes login` to re-authenticate
3. Check the env file has the right API key
### Changes not taking effect
- Gateway: /reset for tool changes, /restart for config changes
- CLI: start a new session
### Skills not showing up
1. Check `hermes skills list` shows the skill
2. Check `hermes skills config` has it enabled for your platform
3. Load explicitly with `/skill name` or `hermes -s name`

View file

@ -0,0 +1,80 @@
---
name: huggingface-hub
description: Hugging Face Hub CLI (hf) — download/upload models and datasets, manage repos, run SQL on datasets, deploy inference endpoints, manage Spaces, and more. Use when working with HuggingFace models, datasets, or infrastructure.
version: 1.0.0
author: Hugging Face
license: MIT
tags: [huggingface, hf, models, datasets, hub, mlops]
---
# Hugging Face CLI (`hf`) Reference Guide
The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces.
> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command.
## Quick Start
* **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s`
* **Help:** Use `hf --help` to view all available functions and real-world examples.
* **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag.
---
## Core Commands
### General Operations
* `hf download REPO_ID`: Download files from the Hub.
* `hf upload REPO_ID`: Upload files/folders (recommended for single-commit).
* `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories.
* `hf sync`: Sync files between a local directory and a bucket.
* `hf env` / `hf version`: View environment and version details.
### Authentication (`hf auth`)
* `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens).
* `list` / `switch`: Manage and toggle between multiple stored access tokens.
* `whoami`: Identify the currently logged-in account.
### Repository Management (`hf repos`)
* `create` / `delete`: Create or permanently remove repositories.
* `duplicate`: Clone a model, dataset, or Space to a new ID.
* `move`: Transfer a repository between namespaces.
* `branch` / `tag`: Manage Git-like references.
* `delete-files`: Remove specific files using patterns.
---
## Specialized Hub Interactions
### Datasets & Models
* **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs).
* **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs.
* **Models:** `hf models list` and `info`.
* **Papers:** `hf papers list` — View daily papers.
### Discussions & Pull Requests (`hf discussions`)
* Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`.
* `diff`: View changes in a PR.
* `merge`: Finalize pull requests.
### Infrastructure & Compute
* **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`).
* **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring.
* **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts.
### Storage & Automation
* **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`).
* **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks).
* **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`).
* **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`).
---
## Advanced Usage & Tips
### Global Flags
* `--format json`: Produces machine-readable output for automation.
* `-q` / `--quiet`: Limits output to IDs only.
### Extensions & Skills
* **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`.
* **Skills:** Manage AI assistant skills with `hf skills add`.

View file

@ -188,6 +188,36 @@ class TestGetModelContextLength:
result = get_model_context_length("custom/model")
assert result == CONTEXT_PROBE_TIERS[0]
@patch("agent.model_metadata.fetch_model_metadata")
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
def test_custom_endpoint_metadata_beats_fuzzy_default(self, mock_endpoint_fetch, mock_fetch):
mock_fetch.return_value = {}
mock_endpoint_fetch.return_value = {
"zai-org/GLM-5-TEE": {"context_length": 65536}
}
result = get_model_context_length(
"zai-org/GLM-5-TEE",
base_url="https://llm.chutes.ai/v1",
api_key="test-key",
)
assert result == 65536
@patch("agent.model_metadata.fetch_model_metadata")
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch):
mock_fetch.return_value = {}
mock_endpoint_fetch.return_value = {}
result = get_model_context_length(
"zai-org/GLM-5-TEE",
base_url="https://llm.chutes.ai/v1",
api_key="test-key",
)
assert result == CONTEXT_PROBE_TIERS[0]
# =========================================================================
# fetch_model_metadata — caching, TTL, slugs, failures
@ -258,6 +288,25 @@ class TestFetchModelMetadata:
assert "anthropic/claude-3.5-sonnet" in result
assert result["anthropic/claude-3.5-sonnet"]["context_length"] == 200000
@patch("agent.model_metadata.requests.get")
def test_provider_prefixed_models_get_bare_aliases(self, mock_get):
self._reset_cache()
mock_response = MagicMock()
mock_response.json.return_value = {
"data": [{
"id": "provider/test-model",
"context_length": 123456,
"name": "Provider: Test Model",
}]
}
mock_response.raise_for_status = MagicMock()
mock_get.return_value = mock_response
result = fetch_model_metadata(force_refresh=True)
assert result["provider/test-model"]["context_length"] == 123456
assert result["test-model"]["context_length"] == 123456
@patch("agent.model_metadata.requests.get")
def test_ttl_expiry_triggers_refetch(self, mock_get):
"""Cache expires after _MODEL_CACHE_TTL seconds."""

View file

@ -309,6 +309,35 @@ class TestBuildSkillsSystemPrompt:
assert "imessage" in result
assert "Send iMessages" in result
def test_excludes_disabled_skills(self, monkeypatch, tmp_path):
"""Skills in the user's disabled list should not appear in the system prompt."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
skills_dir = tmp_path / "skills" / "tools"
skills_dir.mkdir(parents=True)
enabled_skill = skills_dir / "web-search"
enabled_skill.mkdir()
(enabled_skill / "SKILL.md").write_text(
"---\nname: web-search\ndescription: Search the web\n---\n"
)
disabled_skill = skills_dir / "old-tool"
disabled_skill.mkdir()
(disabled_skill / "SKILL.md").write_text(
"---\nname: old-tool\ndescription: Deprecated tool\n---\n"
)
from unittest.mock import patch
with patch(
"tools.skills_tool._get_disabled_skill_names",
return_value={"old-tool"},
):
result = build_skills_system_prompt()
assert "web-search" in result
assert "old-tool" not in result
def test_includes_setup_needed_skills(self, monkeypatch, tmp_path):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("MISSING_API_KEY_XYZ", raising=False)

View file

@ -85,6 +85,21 @@ class TestScanSkillCommands:
result = scan_skill_commands()
assert "/generic-tool" in result
def test_excludes_disabled_skills(self, tmp_path):
"""Disabled skills should not register slash commands."""
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"tools.skills_tool._get_disabled_skill_names",
return_value={"disabled-skill"},
),
):
_make_skill(tmp_path, "enabled-skill")
_make_skill(tmp_path, "disabled-skill")
result = scan_skill_commands()
assert "/enabled-skill" in result
assert "/disabled-skill" not in result
class TestBuildPreloadedSkillsPrompt:
def test_builds_prompt_for_multiple_named_skills(self, tmp_path):

View file

@ -99,3 +99,27 @@ def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(m
)
assert result.status == "unknown"
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_endpoint_model_metadata",
lambda base_url, api_key=None: {
"zai-org/GLM-5-TEE": {
"pricing": {
"prompt": "0.0000005",
"completion": "0.000002",
}
}
},
)
entry = get_pricing_entry(
"zai-org/GLM-5-TEE",
provider="custom",
base_url="https://llm.chutes.ai/v1",
api_key="test-key",
)
assert float(entry.input_cost_per_million) == 0.5
assert float(entry.output_cost_per_million) == 2.0

View file

@ -2,7 +2,7 @@
import json
import pytest
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import patch
@ -122,11 +122,29 @@ class TestComputeNextRun:
schedule = {"kind": "once", "run_at": future}
assert compute_next_run(schedule) == future
def test_once_recent_past_within_grace_returns_time(self, monkeypatch):
now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
run_at = "2026-03-18T04:22:00+00:00"
monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
schedule = {"kind": "once", "run_at": run_at}
assert compute_next_run(schedule) == run_at
def test_once_past_returns_none(self):
past = (datetime.now() - timedelta(hours=1)).isoformat()
schedule = {"kind": "once", "run_at": past}
assert compute_next_run(schedule) is None
def test_once_with_last_run_returns_none_even_within_grace(self, monkeypatch):
now = datetime(2026, 3, 18, 4, 22, 3, tzinfo=timezone.utc)
run_at = "2026-03-18T04:22:00+00:00"
monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
schedule = {"kind": "once", "run_at": run_at}
assert compute_next_run(schedule, last_run_at=now.isoformat()) is None
def test_interval_first_run(self):
schedule = {"kind": "interval", "minutes": 60}
result = compute_next_run(schedule)
@ -347,6 +365,67 @@ class TestGetDueJobs:
due = get_due_jobs()
assert len(due) == 0
def test_broken_recent_one_shot_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch):
now = datetime(2026, 3, 18, 4, 22, 30, tzinfo=timezone.utc)
monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
run_at = "2026-03-18T04:22:00+00:00"
save_jobs(
[{
"id": "oneshot-recover",
"name": "Recover me",
"prompt": "Word of the day",
"schedule": {"kind": "once", "run_at": run_at, "display": "once at 2026-03-18 04:22"},
"schedule_display": "once at 2026-03-18 04:22",
"repeat": {"times": 1, "completed": 0},
"enabled": True,
"state": "scheduled",
"paused_at": None,
"paused_reason": None,
"created_at": "2026-03-18T04:21:00+00:00",
"next_run_at": None,
"last_run_at": None,
"last_status": None,
"last_error": None,
"deliver": "local",
"origin": None,
}]
)
due = get_due_jobs()
assert [job["id"] for job in due] == ["oneshot-recover"]
assert get_job("oneshot-recover")["next_run_at"] == run_at
def test_broken_stale_one_shot_without_next_run_is_not_recovered(self, tmp_cron_dir, monkeypatch):
now = datetime(2026, 3, 18, 4, 30, 0, tzinfo=timezone.utc)
monkeypatch.setattr("cron.jobs._hermes_now", lambda: now)
save_jobs(
[{
"id": "oneshot-stale",
"name": "Too old",
"prompt": "Word of the day",
"schedule": {"kind": "once", "run_at": "2026-03-18T04:22:00+00:00", "display": "once at 2026-03-18 04:22"},
"schedule_display": "once at 2026-03-18 04:22",
"repeat": {"times": 1, "completed": 0},
"enabled": True,
"state": "scheduled",
"paused_at": None,
"paused_reason": None,
"created_at": "2026-03-18T04:21:00+00:00",
"next_run_at": None,
"last_run_at": None,
"last_status": None,
"last_error": None,
"deliver": "local",
"origin": None,
}]
)
assert get_due_jobs() == []
assert get_job("oneshot-stale")["next_run_at"] is None
class TestSaveJobOutput:
def test_creates_output_file(self, tmp_cron_dir):

View file

@ -115,6 +115,22 @@ class TestGatewayConfigRoundtrip:
assert restored.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
assert restored.group_sessions_per_user is False
def test_roundtrip_preserves_unauthorized_dm_behavior(self):
config = GatewayConfig(
unauthorized_dm_behavior="ignore",
platforms={
Platform.WHATSAPP: PlatformConfig(
enabled=True,
extra={"unauthorized_dm_behavior": "pair"},
),
},
)
restored = GatewayConfig.from_dict(config.to_dict())
assert restored.unauthorized_dm_behavior == "ignore"
assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
class TestLoadGatewayConfig:
def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
@ -158,3 +174,21 @@ class TestLoadGatewayConfig:
config = load_gateway_config()
assert config.quick_commands == {}
def test_bridges_unauthorized_dm_behavior_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"unauthorized_dm_behavior: ignore\n"
"whatsapp:\n"
" unauthorized_dm_behavior: pair\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
assert config.unauthorized_dm_behavior == "ignore"
assert config.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"

View file

@ -42,6 +42,26 @@ class TestGatewayPidState:
assert status.get_running_pid() == os.getpid()
def test_get_running_pid_accepts_script_style_gateway_cmdline(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
pid_path = tmp_path / "gateway.pid"
pid_path.write_text(json.dumps({
"pid": os.getpid(),
"kind": "hermes-gateway",
"argv": ["/venv/bin/python", "/repo/hermes_cli/main.py", "gateway", "run", "--replace"],
"start_time": 123,
}))
monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
monkeypatch.setattr(
status,
"_read_process_cmdline",
lambda pid: "/venv/bin/python /repo/hermes_cli/main.py gateway run --replace",
)
assert status.get_running_pid() == os.getpid()
class TestGatewayRuntimeStatus:
def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):

View file

@ -0,0 +1,137 @@
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.session import SessionSource
def _clear_auth_env(monkeypatch) -> None:
for key in (
"TELEGRAM_ALLOWED_USERS",
"DISCORD_ALLOWED_USERS",
"WHATSAPP_ALLOWED_USERS",
"SLACK_ALLOWED_USERS",
"SIGNAL_ALLOWED_USERS",
"EMAIL_ALLOWED_USERS",
"SMS_ALLOWED_USERS",
"MATTERMOST_ALLOWED_USERS",
"MATRIX_ALLOWED_USERS",
"DINGTALK_ALLOWED_USERS",
"GATEWAY_ALLOWED_USERS",
"TELEGRAM_ALLOW_ALL_USERS",
"DISCORD_ALLOW_ALL_USERS",
"WHATSAPP_ALLOW_ALL_USERS",
"SLACK_ALLOW_ALL_USERS",
"SIGNAL_ALLOW_ALL_USERS",
"EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS",
"MATTERMOST_ALLOW_ALL_USERS",
"MATRIX_ALLOW_ALL_USERS",
"DINGTALK_ALLOW_ALL_USERS",
"GATEWAY_ALLOW_ALL_USERS",
):
monkeypatch.delenv(key, raising=False)
def _make_event(platform: Platform, user_id: str, chat_id: str) -> MessageEvent:
return MessageEvent(
text="hello",
message_id="m1",
source=SessionSource(
platform=platform,
user_id=user_id,
chat_id=chat_id,
user_name="tester",
chat_type="dm",
),
)
def _make_runner(platform: Platform, config: GatewayConfig):
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner.config = config
adapter = SimpleNamespace(send=AsyncMock())
runner.adapters = {platform: adapter}
runner.pairing_store = MagicMock()
runner.pairing_store.is_approved.return_value = False
return runner, adapter
@pytest.mark.asyncio
async def test_unauthorized_dm_pairs_by_default(monkeypatch):
_clear_auth_env(monkeypatch)
config = GatewayConfig(
platforms={Platform.WHATSAPP: PlatformConfig(enabled=True)},
)
runner, adapter = _make_runner(Platform.WHATSAPP, config)
runner.pairing_store.generate_code.return_value = "ABC12DEF"
result = await runner._handle_message(
_make_event(
Platform.WHATSAPP,
"15551234567@s.whatsapp.net",
"15551234567@s.whatsapp.net",
)
)
assert result is None
runner.pairing_store.generate_code.assert_called_once_with(
"whatsapp",
"15551234567@s.whatsapp.net",
"tester",
)
adapter.send.assert_awaited_once()
assert "ABC12DEF" in adapter.send.await_args.args[1]
@pytest.mark.asyncio
async def test_unauthorized_whatsapp_dm_can_be_ignored(monkeypatch):
_clear_auth_env(monkeypatch)
config = GatewayConfig(
platforms={
Platform.WHATSAPP: PlatformConfig(
enabled=True,
extra={"unauthorized_dm_behavior": "ignore"},
),
},
)
runner, adapter = _make_runner(Platform.WHATSAPP, config)
result = await runner._handle_message(
_make_event(
Platform.WHATSAPP,
"15551234567@s.whatsapp.net",
"15551234567@s.whatsapp.net",
)
)
assert result is None
runner.pairing_store.generate_code.assert_not_called()
adapter.send.assert_not_awaited()
@pytest.mark.asyncio
async def test_global_ignore_suppresses_pairing_reply(monkeypatch):
_clear_auth_env(monkeypatch)
config = GatewayConfig(
unauthorized_dm_behavior="ignore",
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")},
)
runner, adapter = _make_runner(Platform.TELEGRAM, config)
result = await runner._handle_message(
_make_event(
Platform.TELEGRAM,
"12345",
"12345",
)
)
assert result is None
runner.pairing_store.generate_code.assert_not_called()
adapter.send.assert_not_awaited()

View file

@ -0,0 +1,70 @@
"""Tests for banner toolset name normalization and skin color usage."""
from unittest.mock import patch
from rich.console import Console
import hermes_cli.banner as banner
import model_tools
import tools.mcp_tool
def test_display_toolset_name_strips_legacy_suffix():
assert banner._display_toolset_name("homeassistant_tools") == "homeassistant"
assert banner._display_toolset_name("honcho_tools") == "honcho"
assert banner._display_toolset_name("web_tools") == "web"
def test_display_toolset_name_preserves_clean_names():
assert banner._display_toolset_name("browser") == "browser"
assert banner._display_toolset_name("file") == "file"
assert banner._display_toolset_name("terminal") == "terminal"
def test_display_toolset_name_handles_empty():
assert banner._display_toolset_name("") == "unknown"
assert banner._display_toolset_name(None) == "unknown"
def test_build_welcome_banner_uses_normalized_toolset_names():
"""Unavailable toolsets should not have '_tools' appended in banner output."""
with (
patch.object(
model_tools,
"check_tool_availability",
return_value=(
["web"],
[
{"name": "homeassistant", "tools": ["ha_call_service"]},
{"name": "honcho", "tools": ["honcho_conclude"]},
],
),
),
patch.object(banner, "get_available_skills", return_value={}),
patch.object(banner, "get_update_result", return_value=None),
patch.object(tools.mcp_tool, "get_mcp_status", return_value=[]),
):
console = Console(
record=True, force_terminal=False, color_system=None, width=160
)
banner.build_welcome_banner(
console=console,
model="anthropic/test-model",
cwd="/tmp/project",
tools=[
{"function": {"name": "web_search"}},
{"function": {"name": "read_file"}},
],
get_toolset_for_tool=lambda name: {
"web_search": "web_tools",
"read_file": "file",
}.get(name),
)
output = console.export_text()
assert "homeassistant:" in output
assert "honcho:" in output
assert "web:" in output
assert "homeassistant_tools:" not in output
assert "honcho_tools:" not in output
assert "web_tools:" not in output

View file

@ -0,0 +1,68 @@
"""Tests for banner get_available_skills() — disabled and platform filtering."""
from unittest.mock import patch
import pytest
_MOCK_SKILLS = [
{"name": "skill-a", "description": "A skill", "category": "tools"},
{"name": "skill-b", "description": "B skill", "category": "tools"},
{"name": "skill-c", "description": "C skill", "category": "creative"},
]
def test_get_available_skills_delegates_to_find_all_skills():
"""get_available_skills should call _find_all_skills (which handles filtering)."""
with patch("tools.skills_tool._find_all_skills", return_value=list(_MOCK_SKILLS)):
from hermes_cli.banner import get_available_skills
result = get_available_skills()
assert "tools" in result
assert "creative" in result
assert sorted(result["tools"]) == ["skill-a", "skill-b"]
assert result["creative"] == ["skill-c"]
def test_get_available_skills_excludes_disabled():
"""Disabled skills should not appear in the banner count."""
# _find_all_skills already filters disabled skills, so if we give it
# a filtered list, get_available_skills should reflect that.
filtered = [s for s in _MOCK_SKILLS if s["name"] != "skill-b"]
with patch("tools.skills_tool._find_all_skills", return_value=filtered):
from hermes_cli.banner import get_available_skills
result = get_available_skills()
all_names = [n for names in result.values() for n in names]
assert "skill-b" not in all_names
assert "skill-a" in all_names
assert len(all_names) == 2
def test_get_available_skills_empty_when_no_skills():
"""No skills installed returns empty dict."""
with patch("tools.skills_tool._find_all_skills", return_value=[]):
from hermes_cli.banner import get_available_skills
result = get_available_skills()
assert result == {}
def test_get_available_skills_handles_import_failure():
"""If _find_all_skills import fails, return empty dict gracefully."""
with patch("tools.skills_tool._find_all_skills", side_effect=ImportError("boom")):
from hermes_cli.banner import get_available_skills
result = get_available_skills()
assert result == {}
def test_get_available_skills_null_category_becomes_general():
"""Skills with None category should be grouped under 'general'."""
skills = [{"name": "orphan-skill", "description": "No cat", "category": None}]
with patch("tools.skills_tool._find_all_skills", return_value=skills):
from hermes_cli.banner import get_available_skills
result = get_available_skills()
assert "general" in result
assert result["general"] == ["orphan-skill"]

View file

@ -1,6 +1,8 @@
"""Tests for hermes_cli.gateway."""
import signal
from types import SimpleNamespace
from unittest.mock import patch, call
import hermes_cli.gateway as gateway
@ -169,3 +171,84 @@ def test_install_linux_gateway_from_setup_system_choice_as_root_installs(monkeyp
assert (scope, did_install) == ("system", True)
assert calls == [(True, True, "alice")]
# ---------------------------------------------------------------------------
# _wait_for_gateway_exit
# ---------------------------------------------------------------------------
class TestWaitForGatewayExit:
"""PID-based wait with force-kill on timeout."""
def test_returns_immediately_when_no_pid(self, monkeypatch):
"""If get_running_pid returns None, exit instantly."""
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
# Should return without sleeping at all.
gateway._wait_for_gateway_exit(timeout=1.0, force_after=0.5)
def test_returns_when_process_exits_gracefully(self, monkeypatch):
"""Process exits after a couple of polls — no SIGKILL needed."""
poll_count = 0
def mock_get_running_pid():
nonlocal poll_count
poll_count += 1
return 12345 if poll_count <= 2 else None
monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
monkeypatch.setattr("time.sleep", lambda _: None)
gateway._wait_for_gateway_exit(timeout=10.0, force_after=999.0)
# Should have polled until None was returned.
assert poll_count == 3
def test_force_kills_after_grace_period(self, monkeypatch):
"""When the process doesn't exit, SIGKILL the saved PID."""
import time as _time
# Simulate monotonic time advancing past force_after
call_num = 0
def fake_monotonic():
nonlocal call_num
call_num += 1
# First two calls: initial deadline + force_deadline setup (time 0)
# Then each loop iteration advances time
return call_num * 2.0 # 2, 4, 6, 8, ...
kills = []
def mock_kill(pid, sig):
kills.append((pid, sig))
# get_running_pid returns the PID until kill is sent, then None
def mock_get_running_pid():
return None if kills else 42
monkeypatch.setattr("time.monotonic", fake_monotonic)
monkeypatch.setattr("time.sleep", lambda _: None)
monkeypatch.setattr("gateway.status.get_running_pid", mock_get_running_pid)
monkeypatch.setattr("os.kill", mock_kill)
gateway._wait_for_gateway_exit(timeout=10.0, force_after=5.0)
assert (42, signal.SIGKILL) in kills
def test_handles_process_already_gone_on_kill(self, monkeypatch):
"""ProcessLookupError during SIGKILL is not fatal."""
import time as _time
call_num = 0
def fake_monotonic():
nonlocal call_num
call_num += 1
return call_num * 3.0 # Jump past force_after quickly
def mock_kill(pid, sig):
raise ProcessLookupError
monkeypatch.setattr("time.monotonic", fake_monotonic)
monkeypatch.setattr("time.sleep", lambda _: None)
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 99)
monkeypatch.setattr("os.kill", mock_kill)
# Should not raise — ProcessLookupError means it's already gone.
gateway._wait_for_gateway_exit(timeout=10.0, force_after=2.0)

View file

@ -131,7 +131,7 @@ class TestTryActivateFallback:
def test_activates_minimax_fallback(self):
agent = _make_agent(
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
)
mock_client = _mock_resolve(
api_key="sk-mm-key",
@ -139,10 +139,10 @@ class TestTryActivateFallback:
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "MiniMax-M2.5"),
return_value=(mock_client, "MiniMax-M2.7"),
):
assert agent._try_activate_fallback() is True
assert agent.model == "MiniMax-M2.5"
assert agent.model == "MiniMax-M2.7"
assert agent.provider == "minimax"
assert agent.client is mock_client
@ -165,7 +165,7 @@ class TestTryActivateFallback:
def test_returns_false_when_no_api_key(self):
"""Fallback should fail gracefully when the API key env var is unset."""
agent = _make_agent(
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
fallback_model={"provider": "minimax", "model": "MiniMax-M2.7"},
)
with patch(
"agent.auxiliary_client.resolve_provider_client",

View file

@ -210,6 +210,25 @@ class TestFTS5Search:
sources = [r["source"] for r in results]
assert all(s == "telegram" for s in sources)
def test_search_default_sources_include_acp(self, db):
db.create_session(session_id="s1", source="acp")
db.append_message("s1", role="user", content="ACP question about Python")
results = db.search_messages("Python")
sources = [r["source"] for r in results]
assert "acp" in sources
def test_search_default_includes_all_platforms(self, db):
"""Default search (no source_filter) should find sessions from any platform."""
for src in ("cli", "telegram", "signal", "homeassistant", "acp", "matrix"):
sid = f"s-{src}"
db.create_session(session_id=sid, source=src)
db.append_message(sid, role="user", content=f"universal search test from {src}")
results = db.search_messages("universal search test")
found_sources = {r["source"] for r in results}
assert found_sources == {"cli", "telegram", "signal", "homeassistant", "acp", "matrix"}
def test_search_with_role_filter(self, db):
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="What is FastAPI?")

View file

@ -828,7 +828,7 @@ class TestConcurrentToolExecution:
mock_con.assert_not_called()
def test_multiple_tools_uses_concurrent_path(self, agent):
"""Multiple non-interactive tools should use concurrent path."""
"""Multiple read-only tools should use concurrent path."""
tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2")
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
@ -839,6 +839,94 @@ class TestConcurrentToolExecution:
mock_con.assert_called_once()
mock_seq.assert_not_called()
def test_terminal_batch_forces_sequential(self, agent):
"""Stateful tools should not share the concurrent execution path."""
tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2")
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_seq.assert_called_once()
mock_con.assert_not_called()
def test_write_batch_forces_sequential(self, agent):
"""File mutations should stay ordered within a turn."""
tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1")
tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2")
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_seq.assert_called_once()
mock_con.assert_not_called()
def test_disjoint_write_batch_uses_concurrent_path(self, agent):
"""Independent file writes should still run concurrently."""
tc1 = _mock_tool_call(
name="write_file",
arguments='{"path":"src/a.py","content":"print(1)"}',
call_id="c1",
)
tc2 = _mock_tool_call(
name="write_file",
arguments='{"path":"src/b.py","content":"print(2)"}',
call_id="c2",
)
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_con.assert_called_once()
mock_seq.assert_not_called()
def test_overlapping_write_batch_forces_sequential(self, agent):
"""Writes to the same file must stay ordered."""
tc1 = _mock_tool_call(
name="write_file",
arguments='{"path":"src/a.py","content":"print(1)"}',
call_id="c1",
)
tc2 = _mock_tool_call(
name="patch",
arguments='{"path":"src/a.py","old_string":"1","new_string":"2"}',
call_id="c2",
)
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_seq.assert_called_once()
mock_con.assert_not_called()
def test_malformed_json_args_forces_sequential(self, agent):
"""Unparseable tool arguments should fall back to sequential."""
tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2")
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_seq.assert_called_once()
mock_con.assert_not_called()
def test_non_dict_args_forces_sequential(self, agent):
"""Tool arguments that parse to a non-dict type should fall back to sequential."""
tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1")
tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2")
mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2])
messages = []
with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq:
with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con:
agent._execute_tool_calls(mock_msg, messages, "task-1")
mock_seq.assert_called_once()
mock_con.assert_not_called()
def test_concurrent_executes_all_tools(self, agent):
"""Concurrent path should execute all tools and append results in order."""
tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1")
@ -965,6 +1053,39 @@ class TestConcurrentToolExecution:
assert "ok" in result
class TestPathsOverlap:
"""Unit tests for the _paths_overlap helper."""
def test_same_path_overlaps(self):
from run_agent import _paths_overlap
assert _paths_overlap(Path("src/a.py"), Path("src/a.py"))
def test_siblings_do_not_overlap(self):
from run_agent import _paths_overlap
assert not _paths_overlap(Path("src/a.py"), Path("src/b.py"))
def test_parent_child_overlap(self):
from run_agent import _paths_overlap
assert _paths_overlap(Path("src"), Path("src/sub/a.py"))
def test_different_roots_do_not_overlap(self):
from run_agent import _paths_overlap
assert not _paths_overlap(Path("src/a.py"), Path("other/a.py"))
def test_nested_vs_flat_do_not_overlap(self):
from run_agent import _paths_overlap
assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py"))
def test_empty_paths_do_not_overlap(self):
from run_agent import _paths_overlap
assert not _paths_overlap(Path(""), Path(""))
def test_one_empty_path_does_not_overlap(self):
from run_agent import _paths_overlap
assert not _paths_overlap(Path(""), Path("src/a.py"))
assert not _paths_overlap(Path("src/a.py"), Path(""))
class TestHandleMaxIterations:
def test_returns_summary(self, agent):
resp = _mock_response(content="Here is a summary of what I did.")
@ -2774,3 +2895,135 @@ class TestNormalizeCodexDictArguments:
msg, _ = agent._normalize_codex_response(response)
tc = msg.tool_calls[0]
assert tc.function.arguments == args_str
# ---------------------------------------------------------------------------
# OAuth flag and nudge counter fixes (salvaged from PR #1797)
# ---------------------------------------------------------------------------
class TestOAuthFlagAfterCredentialRefresh:
"""_is_anthropic_oauth must update when token type changes during refresh."""
def test_oauth_flag_updates_api_key_to_oauth(self, agent):
"""Refreshing from API key to OAuth token must set flag to True."""
agent.api_mode = "anthropic_messages"
agent._anthropic_api_key = "sk-ant-api-old"
agent._anthropic_client = MagicMock()
agent._is_anthropic_oauth = False
with (
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value="sk-ant-setup-oauth-token"),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
):
result = agent._try_refresh_anthropic_client_credentials()
assert result is True
assert agent._is_anthropic_oauth is True
def test_oauth_flag_updates_oauth_to_api_key(self, agent):
"""Refreshing from OAuth to API key must set flag to False."""
agent.api_mode = "anthropic_messages"
agent._anthropic_api_key = "sk-ant-setup-old"
agent._anthropic_client = MagicMock()
agent._is_anthropic_oauth = True
with (
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value="sk-ant-api03-new-key"),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
):
result = agent._try_refresh_anthropic_client_credentials()
assert result is True
assert agent._is_anthropic_oauth is False
class TestFallbackSetsOAuthFlag:
"""_try_activate_fallback must set _is_anthropic_oauth for Anthropic fallbacks."""
def test_fallback_to_anthropic_oauth_sets_flag(self, agent):
agent._fallback_activated = False
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
mock_client = MagicMock()
mock_client.base_url = "https://api.anthropic.com/v1"
mock_client.api_key = "sk-ant-setup-oauth-token"
with (
patch("agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, None)),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value=None),
):
result = agent._try_activate_fallback()
assert result is True
assert agent._is_anthropic_oauth is True
def test_fallback_to_anthropic_api_key_clears_flag(self, agent):
agent._fallback_activated = False
agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"}
mock_client = MagicMock()
mock_client.base_url = "https://api.anthropic.com/v1"
mock_client.api_key = "sk-ant-api03-regular-key"
with (
patch("agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, None)),
patch("agent.anthropic_adapter.build_anthropic_client",
return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token",
return_value=None),
):
result = agent._try_activate_fallback()
assert result is True
assert agent._is_anthropic_oauth is False
class TestMemoryNudgeCounterPersistence:
"""_turns_since_memory must persist across run_conversation calls."""
def test_counters_initialized_in_init(self):
"""Counters must exist on the agent after __init__."""
with patch("run_agent.get_tool_definitions", return_value=[]):
a = AIAgent(
model="test", api_key="test-key", provider="openrouter",
skip_context_files=True, skip_memory=True,
)
assert hasattr(a, "_turns_since_memory")
assert hasattr(a, "_iters_since_skill")
assert a._turns_since_memory == 0
assert a._iters_since_skill == 0
def test_counters_not_reset_in_preamble(self):
"""The run_conversation preamble must not zero the nudge counters."""
import inspect
src = inspect.getsource(AIAgent.run_conversation)
# The preamble resets many fields (retry counts, budget, etc.)
# before the main loop. Find that reset block and verify our
# counters aren't in it. The reset block ends at iteration_budget.
preamble_end = src.index("self.iteration_budget = IterationBudget")
preamble = src[:preamble_end]
assert "self._turns_since_memory = 0" not in preamble
assert "self._iters_since_skill = 0" not in preamble
class TestDeadRetryCode:
"""Unreachable retry_count >= max_retries after raise must not exist."""
def test_no_unreachable_max_retries_after_backoff(self):
import inspect
source = inspect.getsource(AIAgent.run_conversation)
occurrences = source.count("if retry_count >= max_retries:")
assert occurrences == 2, (
f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
f"but found {occurrences}"
)

View file

@ -505,6 +505,42 @@ class TestToolsetInjection:
assert "mcp_fs_list_files" not in fake_toolsets["non-hermes"]["tools"]
# Original tools preserved
assert "terminal" in fake_toolsets["hermes-cli"]["tools"]
# Server name becomes a standalone toolset
assert "fs" in fake_toolsets
assert "mcp_fs_list_files" in fake_toolsets["fs"]["tools"]
assert fake_toolsets["fs"]["description"].startswith("MCP server '")
def test_server_toolset_skips_builtin_collision(self):
"""MCP server named after a built-in toolset shouldn't overwrite it."""
from tools.mcp_tool import MCPServerTask
mock_tools = [_make_mcp_tool("run", "Run command")]
mock_session = MagicMock()
fresh_servers = {}
async def fake_connect(name, config):
server = MCPServerTask(name)
server.session = mock_session
server._tools = mock_tools
return server
fake_toolsets = {
"hermes-cli": {"tools": ["terminal"], "description": "CLI", "includes": []},
# Built-in toolset named "terminal" — must not be overwritten
"terminal": {"tools": ["terminal"], "description": "Terminal tools", "includes": []},
}
fake_config = {"terminal": {"command": "npx", "args": []}}
with patch("tools.mcp_tool._MCP_AVAILABLE", True), \
patch("tools.mcp_tool._servers", fresh_servers), \
patch("tools.mcp_tool._load_mcp_config", return_value=fake_config), \
patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
patch("toolsets.TOOLSETS", fake_toolsets):
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
# Built-in toolset preserved — description unchanged
assert fake_toolsets["terminal"]["description"] == "Terminal tools"
def test_server_connection_failure_skipped(self):
"""If one server fails to connect, others still proceed."""

View file

@ -374,6 +374,35 @@ class TestSkillView:
result = json.loads(raw)
assert result["success"] is False
def test_view_disabled_skill_blocked(self, tmp_path):
"""Disabled skills should not be viewable via skill_view."""
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"tools.skills_tool._is_skill_disabled",
return_value=True,
),
):
_make_skill(tmp_path, "hidden-skill")
raw = skill_view("hidden-skill")
result = json.loads(raw)
assert result["success"] is False
assert "disabled" in result["error"].lower()
def test_view_enabled_skill_allowed(self, tmp_path):
"""Non-disabled skills should be viewable normally."""
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"tools.skills_tool._is_skill_disabled",
return_value=False,
),
):
_make_skill(tmp_path, "active-skill")
raw = skill_view("active-skill")
result = json.loads(raw)
assert result["success"] is True
class TestSkillViewSecureSetupOnLoad:
def test_requests_missing_required_env_and_continues(self, tmp_path, monkeypatch):

View file

@ -173,10 +173,6 @@ def _build_child_agent(
from run_agent import AIAgent
import model_tools
# Save the parent's resolved tool names before the child agent can
# overwrite the process-global via get_tool_definitions().
_saved_tool_names = list(model_tools._last_resolved_tool_names)
# When no explicit toolsets given, inherit from parent's enabled toolsets
# so disabled tools (e.g. web) don't leak to subagents.
if toolsets:
@ -268,6 +264,13 @@ def _run_single_child(
# Get the progress callback from the child agent
child_progress_cb = getattr(child, 'tool_progress_callback', None)
# Save the parent's resolved tool names before the child agent can
# overwrite the process-global via get_tool_definitions().
# This must be in _run_single_child (not _build_child_agent) so the
# save/restore happens in the same scope as the try/finally.
import model_tools
_saved_tool_names = list(model_tools._last_resolved_tool_names)
try:
result = child.run_conversation(user_message=goal)

View file

@ -1238,6 +1238,57 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
}
def _sync_mcp_toolsets(server_names: Optional[List[str]] = None) -> None:
"""Expose each MCP server as a standalone toolset and inject into hermes-* sets.
Creates a real toolset entry in TOOLSETS for each server name (e.g.
TOOLSETS["github"] = {"tools": ["mcp_github_list_files", ...]}). This
makes raw server names resolvable in platform_toolsets overrides.
Also injects all MCP tools into hermes-* umbrella toolsets for the
default behavior.
Skips server names that collide with built-in toolsets.
"""
from toolsets import TOOLSETS
if server_names is None:
server_names = list(_load_mcp_config().keys())
existing = _existing_tool_names()
all_mcp_tools: List[str] = []
for server_name in server_names:
safe_prefix = f"mcp_{server_name.replace('-', '_').replace('.', '_')}_"
server_tools = sorted(
t for t in existing if t.startswith(safe_prefix)
)
all_mcp_tools.extend(server_tools)
# Don't overwrite a built-in toolset that happens to share the name.
existing_ts = TOOLSETS.get(server_name)
if existing_ts and not str(existing_ts.get("description", "")).startswith("MCP server '"):
logger.warning(
"Skipping MCP toolset alias '%s' — a built-in toolset already uses that name",
server_name,
)
continue
TOOLSETS[server_name] = {
"description": f"MCP server '{server_name}' tools",
"tools": server_tools,
"includes": [],
}
# Also inject into hermes-* umbrella toolsets for default behavior.
for ts_name, ts in TOOLSETS.items():
if not ts_name.startswith("hermes-"):
continue
for tool_name in all_mcp_tools:
if tool_name not in ts["tools"]:
ts["tools"].append(tool_name)
def _build_utility_schemas(server_name: str) -> List[dict]:
"""Build schemas for the MCP utility tools (resources & prompts).
@ -1523,6 +1574,7 @@ def discover_mcp_tools() -> List[str]:
}
if not new_servers:
_sync_mcp_toolsets(list(servers.keys()))
return _existing_tool_names()
# Start the background event loop for MCP connections
@ -1562,14 +1614,7 @@ def discover_mcp_tools() -> List[str]:
# The outer timeout is generous: 120s total for parallel discovery.
_run_on_mcp_loop(_discover_all(), timeout=120)
if all_tools:
# Dynamically inject into all hermes-* platform toolsets
from toolsets import TOOLSETS
for ts_name, ts in TOOLSETS.items():
if ts_name.startswith("hermes-"):
for tool_name in all_tools:
if tool_name not in ts["tools"]:
ts["tools"].append(tool_name)
_sync_mcp_toolsets(list(servers.keys()))
# Print summary
total_servers = len(new_servers)

View file

@ -8,7 +8,7 @@ Usage:
python -m tools.neutts_synth --text "Hello" --out output.wav \
--ref-audio samples/jo.wav --ref-text samples/jo.txt
Requires: pip install neutts[all]
Requires: python -m pip install -U neutts[all]
System: apt install espeak-ng (or brew install espeak-ng)
"""
@ -75,7 +75,7 @@ def main():
try:
from neutts import NeuTTS
except ImportError:
print("Error: neutts not installed. Run: pip install neutts[all]", file=sys.stderr)
print("Error: neutts not installed. Run: python -m pip install -U neutts[all]", file=sys.stderr)
sys.exit(1)
tts = NeuTTS(

View file

@ -1009,7 +1009,7 @@ async def rl_list_runs() -> str:
TEST_MODELS = [
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
{"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
{"id": "minimax/minimax-m2.5", "name": "MiniMax M2.5", "scale": "large"},
{"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"},
]
# Default test parameters - quick but representative
@ -1370,7 +1370,7 @@ RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status
RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}}
RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}}
RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}}
RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.5"}}, "required": []}}
RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}}
_rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"]

View file

@ -920,6 +920,20 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
ensure_ascii=False,
)
# Check if the skill is disabled by the user
resolved_name = parsed_frontmatter.get("name", skill_md.parent.name)
if _is_skill_disabled(resolved_name):
return json.dumps(
{
"success": False,
"error": (
f"Skill '{resolved_name}' is disabled. "
"Enable it with `hermes skills` or inspect the files directly on disk."
),
},
ensure_ascii=False,
)
# If a specific file path is requested, read that instead
if file_path and skill_dir:
# Security: Prevent path traversal attacks

View file

@ -423,8 +423,8 @@ def text_to_speech_tool(
if not _check_neutts_available():
return json.dumps({
"success": False,
"error": "NeuTTS provider selected but neutts_cli is not installed. "
"Install the NeuTTS skill and run the bootstrap helper first."
"error": "NeuTTS provider selected but neutts is not installed. "
"Run hermes setup and choose NeuTTS, or install espeak-ng and run python -m pip install -U neutts[all]."
}, ensure_ascii=False)
logger.info("Generating speech with NeuTTS (local)...")
_generate_neutts(text, file_str, tts_config)

View file

@ -37,8 +37,13 @@ That last part matters. Good MCP usage is not just “connect everything.” It
## Step 1: install MCP support
If you installed Hermes with the standard install script, MCP support is already included (the installer runs `uv pip install -e ".[all]"`).
If you installed without extras and need to add MCP separately:
```bash
pip install hermes-agent[mcp]
cd ~/.hermes/hermes-agent
uv pip install -e ".[mcp]"
```
For npm-based servers, make sure Node.js and `npx` are available.

View file

@ -72,6 +72,12 @@ pip install hermes-agent[messaging]
pip install hermes-agent[tts-premium]
```
### Local NeuTTS (optional)
```bash
python -m pip install -U neutts[all]
```
### Everything
```bash
@ -84,18 +90,21 @@ pip install hermes-agent[all]
```bash
brew install portaudio ffmpeg opus
brew install espeak-ng
```
### Ubuntu / Debian
```bash
sudo apt install portaudio19-dev ffmpeg libopus0
sudo apt install espeak-ng
```
Why these matter:
- `portaudio` → microphone input / playback for CLI voice mode
- `ffmpeg` → audio conversion for TTS and messaging delivery
- `opus` → Discord voice codec support
- `espeak-ng` → phonemizer backend for NeuTTS
## Step 4: choose STT and TTS providers
@ -133,9 +142,20 @@ ELEVENLABS_API_KEY=***
#### Text-to-speech
- `edge` → free and good enough for most users
- `neutts` → free local/on-device TTS
- `elevenlabs` → best quality
- `openai` → good middle ground
### If you use `hermes setup`
If you choose NeuTTS in the setup wizard, Hermes checks whether `neutts` is already installed. If it is missing, the wizard tells you NeuTTS needs the Python package `neutts` and the system package `espeak-ng`, offers to install them for you, installs `espeak-ng` with your platform package manager, and then runs:
```bash
python -m pip install -U neutts[all]
```
If you skip that install or it fails, the wizard falls back to Edge TTS.
## Step 5: recommended config
```yaml
@ -159,6 +179,18 @@ tts:
This is a good conservative default for most people.
If you want local TTS instead, switch the `tts` block to:
```yaml
tts:
provider: "neutts"
neutts:
ref_audio: ''
ref_text: ''
model: neuphonic/neutts-air-q4-gguf
device: cpu
```
## Use case 1: CLI voice mode
## Turn it on

View file

@ -372,8 +372,8 @@ hermes chat --continue
**Solution:**
```bash
# Ensure MCP dependencies are installed
pip install hermes-agent[mcp]
# Ensure MCP dependencies are installed (already included in standard install)
cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"
# For npm-based servers, ensure Node.js is available
node --version

View file

@ -186,11 +186,11 @@ hermes chat --provider kimi-coding --model moonshot-v1-auto
# Requires: KIMI_API_KEY in ~/.hermes/.env
# MiniMax (global endpoint)
hermes chat --provider minimax --model MiniMax-Text-01
hermes chat --provider minimax --model MiniMax-M2.7
# Requires: MINIMAX_API_KEY in ~/.hermes/.env
# MiniMax (China endpoint)
hermes chat --provider minimax-cn --model MiniMax-Text-01
hermes chat --provider minimax-cn --model MiniMax-M2.7
# Requires: MINIMAX_CN_API_KEY in ~/.hermes/.env
# Alibaba Cloud / DashScope (Qwen models)
@ -984,7 +984,7 @@ You can also change the reasoning effort at runtime with the `/reasoning` comman
```yaml
tts:
provider: "edge" # "edge" | "elevenlabs" | "openai"
provider: "edge" # "edge" | "elevenlabs" | "openai" | "neutts"
edge:
voice: "en-US-AriaNeural" # 322 voices, 74 languages
elevenlabs:
@ -993,6 +993,11 @@ tts:
openai:
model: "gpt-4o-mini-tts"
voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer
neutts:
ref_audio: ''
ref_text: ''
model: neuphonic/neutts-air-q4-gguf
device: cpu
```
This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway).
@ -1140,6 +1145,21 @@ group_sessions_per_user: true # true = per-user isolation in groups/channels, f
For the behavior details and examples, see [Sessions](/docs/user-guide/sessions) and the [Discord guide](/docs/user-guide/messaging/discord).
## Unauthorized DM Behavior
Control what Hermes does when an unknown user sends a direct message:
```yaml
unauthorized_dm_behavior: pair
whatsapp:
unauthorized_dm_behavior: ignore
```
- `pair` is the default. Hermes denies access, but replies with a one-time pairing code in DMs.
- `ignore` silently drops unauthorized DMs.
- Platform sections override the global default, so you can keep pairing enabled broadly while making one platform quieter.
## Quick Commands
Define custom commands that run shell commands without invoking the LLM — zero token usage, instant execution. Especially useful from messaging platforms (Telegram, Discord, etc.) for quick server checks or utility scripts.

View file

@ -20,10 +20,11 @@ If you have ever wanted Hermes to use a tool that already exists somewhere else,
## Quick start
1. Install MCP support:
1. Install MCP support (already included if you used the standard install script):
```bash
pip install hermes-agent[mcp]
cd ~/.hermes/hermes-agent
uv pip install -e ".[mcp]"
```
2. Add an MCP server to `~/.hermes/config.yaml`:
@ -374,7 +375,9 @@ Inspect the project root and explain the directory layout.
Check:
```bash
pip install hermes-agent[mcp]
# Verify MCP deps are installed (already included in standard install)
cd ~/.hermes/hermes-agent && uv pip install -e ".[mcp]"
node --version
npx --version
```

View file

@ -147,7 +147,7 @@ Default configuration:
- Tests 3 models at different scales for robustness:
- `qwen/qwen3-8b` (small)
- `z-ai/glm-4.7-flash` (medium)
- `minimax/minimax-m2.5` (large)
- `minimax/minimax-m2.7` (large)
- Total: ~144 rollouts
This validates:

View file

@ -10,13 +10,14 @@ Hermes Agent supports both text-to-speech output and voice message transcription
## Text-to-Speech
Convert text to speech with three providers:
Convert text to speech with four providers:
| Provider | Quality | Cost | API Key |
|----------|---------|------|---------|
| **Edge TTS** (default) | Good | Free | None needed |
| **ElevenLabs** | Excellent | Paid | `ELEVENLABS_API_KEY` |
| **OpenAI TTS** | Good | Paid | `VOICE_TOOLS_OPENAI_KEY` |
| **NeuTTS** | Good | Free | None needed |
### Platform Delivery
@ -32,7 +33,7 @@ Convert text to speech with three providers:
```yaml
# In ~/.hermes/config.yaml
tts:
provider: "edge" # "edge" | "elevenlabs" | "openai"
provider: "edge" # "edge" | "elevenlabs" | "openai" | "neutts"
edge:
voice: "en-US-AriaNeural" # 322 voices, 74 languages
elevenlabs:
@ -41,6 +42,11 @@ tts:
openai:
model: "gpt-4o-mini-tts"
voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer
neutts:
ref_audio: ''
ref_text: ''
model: neuphonic/neutts-air-q4-gguf
device: cpu
```
### Telegram Voice Bubbles & ffmpeg
@ -49,6 +55,7 @@ Telegram voice bubbles require Opus/OGG audio format:
- **OpenAI and ElevenLabs** produce Opus natively — no extra setup
- **Edge TTS** (default) outputs MP3 and needs **ffmpeg** to convert:
- **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles
```bash
# Ubuntu/Debian
@ -61,7 +68,7 @@ brew install ffmpeg
sudo dnf install ffmpeg
```
Without ffmpeg, Edge TTS audio is sent as a regular audio file (playable, but shows as a rectangular player instead of a voice bubble).
Without ffmpeg, Edge TTS and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble).
:::tip
If you want voice bubbles without installing ffmpeg, switch to the OpenAI or ElevenLabs provider.

View file

@ -44,6 +44,9 @@ pip install hermes-agent[messaging]
# Premium TTS (ElevenLabs)
pip install hermes-agent[tts-premium]
# Local TTS (NeuTTS, optional)
python -m pip install -U neutts[all]
# Everything at once
pip install hermes-agent[all]
```
@ -54,6 +57,8 @@ pip install hermes-agent[all]
| `messaging` | `discord.py[voice]`, `python-telegram-bot`, `aiohttp` | Discord & Telegram bots |
| `tts-premium` | `elevenlabs` | ElevenLabs TTS provider |
Optional local TTS provider: install `neutts` separately with `python -m pip install -U neutts[all]`. On first use it downloads the model automatically.
:::info
`discord.py[voice]` installs **PyNaCl** (for voice encryption) and **opus bindings** automatically. This is required for Discord voice channel support.
:::
@ -63,9 +68,11 @@ pip install hermes-agent[all]
```bash
# macOS
brew install portaudio ffmpeg opus
brew install espeak-ng # for NeuTTS
# Ubuntu/Debian
sudo apt install portaudio19-dev ffmpeg libopus0
sudo apt install espeak-ng # for NeuTTS
```
| Dependency | Purpose | Required For |
@ -73,6 +80,7 @@ sudo apt install portaudio19-dev ffmpeg libopus0
| **PortAudio** | Microphone input and audio playback | CLI voice mode |
| **ffmpeg** | Audio format conversion (MP3 → Opus, PCM → WAV) | All platforms |
| **Opus** | Discord voice codec | Discord voice channels |
| **espeak-ng** | Phonemizer backend | Local NeuTTS provider |
### API Keys
@ -84,8 +92,9 @@ Add to `~/.hermes/.env`:
GROQ_API_KEY=your-key # Groq Whisper — fast, free tier (cloud)
VOICE_TOOLS_OPENAI_KEY=your-key # OpenAI Whisper — paid (cloud)
# Text-to-Speech (optional — Edge TTS works without any key)
ELEVENLABS_API_KEY=your-key # ElevenLabs — premium quality
# Text-to-Speech (optional — Edge TTS and NeuTTS work without any key)
ELEVENLABS_API_KEY=*** # ElevenLabs — premium quality
# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS
```
:::tip
@ -303,8 +312,9 @@ DISCORD_ALLOWED_USERS=your-user-id
# STT — local provider needs no key (pip install faster-whisper)
# GROQ_API_KEY=your-key # Alternative: cloud-based, fast, free tier
# TTS — optional, Edge TTS (free) is the default
# ELEVENLABS_API_KEY=your-key # Premium quality
# TTS — optional. Edge TTS and NeuTTS need no key.
# ELEVENLABS_API_KEY=*** # Premium quality
# VOICE_TOOLS_OPENAI_KEY=*** # OpenAI TTS / Whisper
```
### Start the Gateway
@ -385,7 +395,7 @@ stt:
# Text-to-Speech
tts:
provider: "edge" # "edge" (free) | "elevenlabs" | "openai"
provider: "edge" # "edge" (free) | "elevenlabs" | "openai" | "neutts"
edge:
voice: "en-US-AriaNeural" # 322 voices, 74 languages
elevenlabs:
@ -394,6 +404,11 @@ tts:
openai:
model: "gpt-4o-mini-tts"
voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer
neutts:
ref_audio: ''
ref_text: ''
model: neuphonic/neutts-air-q4-gguf
device: cpu
```
### Environment Variables
@ -410,9 +425,9 @@ STT_OPENAI_MODEL=whisper-1 # Override default OpenAI STT model
GROQ_BASE_URL=https://api.groq.com/openai/v1 # Custom Groq endpoint
STT_OPENAI_BASE_URL=https://api.openai.com/v1 # Custom OpenAI STT endpoint
# Text-to-Speech providers (Edge TTS needs no key)
ELEVENLABS_API_KEY=... # ElevenLabs (premium quality)
# OpenAI TTS uses VOICE_TOOLS_OPENAI_KEY
# Text-to-Speech providers (Edge TTS and NeuTTS need no key)
ELEVENLABS_API_KEY=*** # ElevenLabs (premium quality)
# VOICE_TOOLS_OPENAI_KEY above also enables OpenAI TTS
# Discord voice channel
DISCORD_BOT_TOKEN=...
@ -440,6 +455,9 @@ Provider priority (automatic fallback): **local** > **groq** > **openai**
| **Edge TTS** | Good | Free | ~1s | No |
| **ElevenLabs** | Excellent | Paid | ~2s | Yes |
| **OpenAI TTS** | Good | Paid | ~1.5s | Yes |
| **NeuTTS** | Good | Free | Depends on CPU/GPU | No |
NeuTTS uses the `tts.neutts` config block above.
---

View file

@ -97,6 +97,18 @@ WHATSAPP_MODE=bot # "bot" or "self-chat"
WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers (with country code, no +)
```
Optional behavior settings in `~/.hermes/config.yaml`:
```yaml
unauthorized_dm_behavior: pair
whatsapp:
unauthorized_dm_behavior: ignore
```
- `unauthorized_dm_behavior: pair` is the global default. Unknown DM senders get a pairing code.
- `whatsapp.unauthorized_dm_behavior: ignore` makes WhatsApp stay silent for unauthorized DMs, which is usually the better choice for a private number.
Then start the gateway:
```bash
@ -162,6 +174,7 @@ whatsapp:
| **Bridge crashes or reconnect loops** | Restart the gateway, update Hermes, and re-pair if the session was invalidated by a WhatsApp protocol change. |
| **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. |
| **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). |
| **Bot replies to strangers with a pairing code** | Set `whatsapp.unauthorized_dm_behavior: ignore` in `~/.hermes/config.yaml` if you want unauthorized DMs to be silently ignored instead. |
---
@ -173,6 +186,13 @@ of authorized users. Without this setting, the gateway will **deny all incoming
safety measure.
:::
By default, unauthorized DMs still receive a pairing code reply. If you want a private WhatsApp number to stay completely silent to strangers, set:
```yaml
whatsapp:
unauthorized_dm_behavior: ignore
```
- The `~/.hermes/whatsapp/session` directory contains full session credentials — protect it like a password
- Set file permissions: `chmod 700 ~/.hermes/whatsapp/session`
- Use a **dedicated phone number** for the bot to isolate risk from your personal account

View file

@ -151,6 +151,19 @@ For more flexible authorization, Hermes includes a code-based pairing system. In
3. The bot owner runs `hermes pairing approve <platform> <code>` on the CLI
4. The user is permanently approved for that platform
Control how unauthorized direct messages are handled in `~/.hermes/config.yaml`:
```yaml
unauthorized_dm_behavior: pair
whatsapp:
unauthorized_dm_behavior: ignore
```
- `pair` is the default. Unauthorized DMs get a pairing code reply.
- `ignore` silently drops unauthorized DMs.
- Platform sections override the global default, so you can keep pairing on Telegram while keeping WhatsApp silent.
**Security features** (based on OWASP + NIST SP 800-63-4 guidance):
| Feature | Details |