diff --git a/AGENTS.md b/AGENTS.md index 8bd979b058..0f5ce15f28 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4 Worker count above 4 will surface test-ordering flakes that CI never sees. Always run the full suite before pushing changes. + +### Don't write change-detector tests + +A test is a **change-detector** if it fails whenever data that is **expected +to change** gets updated — model catalogs, config version numbers, +enumeration counts, hardcoded lists of provider models. These tests add no +behavioral coverage; they just guarantee that routine source updates break +CI and cost engineering time to "fix." + +**Do not write:** + +```python +# catalog snapshot — breaks every model release +assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"] +assert "MiniMax-M2.7" in models + +# config version literal — breaks every schema bump +assert DEFAULT_CONFIG["_config_version"] == 21 + +# enumeration count — breaks every time a skill/provider is added +assert len(_PROVIDER_MODELS["huggingface"]) == 8 +``` + +**Do write:** + +```python +# behavior: does the catalog plumbing work at all? +assert "gemini" in _PROVIDER_MODELS +assert len(_PROVIDER_MODELS["gemini"]) >= 1 + +# behavior: does migration bump the user's version to current latest? +assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] + +# invariant: no plan-only model leaks into the legacy list +assert not (set(moonshot_models) & coding_plan_only_models) + +# invariant: every model in the catalog has a context-length entry +for m in _PROVIDER_MODELS["huggingface"]: + assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER +``` + +The rule: if the test reads like a snapshot of current data, delete it. If +it reads like a contract about how two pieces of data must relate, keep it. +When a PR adds a new provider/model and you want a test, make the test +assert the relationship (e.g. "catalog entries all have context lengths"), +not the specific names. + +Reviewers should reject new change-detector tests; authors should convert +them into invariants before re-requesting review. diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py index 68f61e340a..c2e1a59826 100644 --- a/acp_adapter/permissions.py +++ b/acp_adapter/permissions.py @@ -63,6 +63,9 @@ def make_approval_callback( logger.warning("Permission request timed out or failed: %s", exc) return "deny" + if response is None: + return "deny" + outcome = response.outcome if isinstance(outcome, AllowedOutcome): option_id = outcome.option_id diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 119a08685a..d73c71157a 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -4,6 +4,7 @@ from __future__ import annotations import asyncio import logging +import os from collections import defaultdict, deque from concurrent.futures import ThreadPoolExecutor from typing import Any, Deque, Optional @@ -51,7 +52,7 @@ try: except ImportError: from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined] -from acp_adapter.auth import detect_provider, has_provider +from acp_adapter.auth import detect_provider from acp_adapter.events import ( make_message_cb, make_step_cb, @@ -71,6 +72,11 @@ except Exception: # Thread pool for running AIAgent (synchronous) in parallel. _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent") +# Server-side page size for list_sessions. The ACP ListSessionsRequest schema +# does not expose a client-side limit, so this is a fixed cap that clients +# paginate against using `cursor` / `next_cursor`. +_LIST_SESSIONS_PAGE_SIZE = 50 + def _extract_text( prompt: list[ @@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent): ) async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None: - if has_provider(): - return AuthenticateResponse() - return None + # Only accept authenticate() calls whose method_id matches the + # provider we advertised in initialize(). Without this check, + # authenticate() would acknowledge any method_id as long as the + # server has provider credentials configured — harmless under + # Hermes' threat model (ACP is stdio-only, local-trust), but poor + # API hygiene and confusing if ACP ever grows multi-method auth. + provider = detect_provider() + if not provider: + return None + if not isinstance(method_id, str) or method_id.strip().lower() != provider: + return None + return AuthenticateResponse() # ---- Session management ------------------------------------------------- @@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent): cwd: str | None = None, **kwargs: Any, ) -> ListSessionsResponse: + """List ACP sessions with optional ``cwd`` filtering and cursor pagination. + + ``cwd`` is passed through to ``SessionManager.list_sessions`` which already + normalizes and filters by working directory. ``cursor`` is a ``session_id`` + previously returned as ``next_cursor``; results resume after that entry. + Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more + results remain, ``next_cursor`` is set to the last returned ``session_id``. + """ infos = self.session_manager.list_sessions(cwd=cwd) + + if cursor: + for idx, s in enumerate(infos): + if s["session_id"] == cursor: + infos = infos[idx + 1:] + break + else: + # Unknown cursor -> empty page (do not fall back to full list). + infos = [] + + has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE + infos = infos[:_LIST_SESSIONS_PAGE_SIZE] + sessions = [] for s in infos: updated_at = s.get("updated_at") @@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent): updated_at=updated_at, ) ) - return ListSessionsResponse(sessions=sessions) + + next_cursor = sessions[-1].session_id if has_more and sessions else None + return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor) # ---- Prompt (core) ------------------------------------------------------ @@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent): agent.step_callback = step_cb agent.message_callback = message_cb - if approval_cb: - try: - from tools import terminal_tool as _terminal_tool - previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None) - _terminal_tool.set_approval_callback(approval_cb) - except Exception: - logger.debug("Could not set ACP approval callback", exc_info=True) + # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr). + # Set it INSIDE _run_agent so the TLS write happens in the executor + # thread — setting it here would write to the event-loop thread's TLS, + # not the executor's. Also set HERMES_INTERACTIVE so approval.py + # takes the CLI-interactive path (which calls the registered + # callback via prompt_dangerous_approval) instead of the + # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff). + # ACP's conn.request_permission maps cleanly to the interactive + # callback shape — not the gateway-queue HERMES_EXEC_ASK path, + # which requires a notify_cb registered in _gateway_notify_cbs. + previous_approval_cb = None + previous_interactive = None def _run_agent() -> dict: + nonlocal previous_approval_cb, previous_interactive + if approval_cb: + try: + from tools import terminal_tool as _terminal_tool + previous_approval_cb = _terminal_tool._get_approval_callback() + _terminal_tool.set_approval_callback(approval_cb) + except Exception: + logger.debug("Could not set ACP approval callback", exc_info=True) + # Signal to tools.approval that we have an interactive callback + # and the non-interactive auto-approve path must not fire. + previous_interactive = os.environ.get("HERMES_INTERACTIVE") + os.environ["HERMES_INTERACTIVE"] = "1" try: result = agent.run_conversation( user_message=user_text, @@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent): logger.exception("Agent error in session %s", session_id) return {"final_response": f"Error: {e}", "messages": state.history} finally: + # Restore HERMES_INTERACTIVE. + if previous_interactive is None: + os.environ.pop("HERMES_INTERACTIVE", None) + else: + os.environ["HERMES_INTERACTIVE"] = previous_interactive if approval_cb: try: from tools import terminal_tool as _terminal_tool diff --git a/agent/account_usage.py b/agent/account_usage.py new file mode 100644 index 0000000000..0e9562dcc9 --- /dev/null +++ b/agent/account_usage.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Optional + +import httpx + +from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token +from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials +from hermes_cli.runtime_provider import resolve_runtime_provider + + +def _utc_now() -> datetime: + return datetime.now(timezone.utc) + + +@dataclass(frozen=True) +class AccountUsageWindow: + label: str + used_percent: Optional[float] = None + reset_at: Optional[datetime] = None + detail: Optional[str] = None + + +@dataclass(frozen=True) +class AccountUsageSnapshot: + provider: str + source: str + fetched_at: datetime + title: str = "Account limits" + plan: Optional[str] = None + windows: tuple[AccountUsageWindow, ...] = () + details: tuple[str, ...] = () + unavailable_reason: Optional[str] = None + + @property + def available(self) -> bool: + return bool(self.windows or self.details) and not self.unavailable_reason + + +def _title_case_slug(value: Optional[str]) -> Optional[str]: + cleaned = str(value or "").strip() + if not cleaned: + return None + return cleaned.replace("_", " ").replace("-", " ").title() + + +def _parse_dt(value: Any) -> Optional[datetime]: + if value in (None, ""): + return None + if isinstance(value, (int, float)): + return datetime.fromtimestamp(float(value), tz=timezone.utc) + if isinstance(value, str): + text = value.strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + dt = datetime.fromisoformat(text) + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + except ValueError: + return None + return None + + +def _format_reset(dt: Optional[datetime]) -> str: + if not dt: + return "unknown" + local_dt = dt.astimezone() + delta = dt - _utc_now() + total_seconds = int(delta.total_seconds()) + if total_seconds <= 0: + return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + hours, rem = divmod(total_seconds, 3600) + minutes = rem // 60 + if hours >= 24: + days, hours = divmod(hours, 24) + rel = f"in {days}d {hours}h" + elif hours > 0: + rel = f"in {hours}h {minutes}m" + else: + rel = f"in {minutes}m" + return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + + +def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]: + if not snapshot: + return [] + header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}" + lines = [header] + if snapshot.plan: + lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})") + else: + lines.append(f"Provider: {snapshot.provider}") + for window in snapshot.windows: + if window.used_percent is None: + base = f"{window.label}: unavailable" + else: + remaining = max(0, round(100 - float(window.used_percent))) + used = max(0, round(float(window.used_percent))) + base = f"{window.label}: {remaining}% remaining ({used}% used)" + if window.reset_at: + base += f" • resets {_format_reset(window.reset_at)}" + elif window.detail: + base += f" • {window.detail}" + lines.append(base) + for detail in snapshot.details: + lines.append(detail) + if snapshot.unavailable_reason: + lines.append(f"Unavailable: {snapshot.unavailable_reason}") + return lines + + +def _resolve_codex_usage_url(base_url: str) -> str: + normalized = (base_url or "").strip().rstrip("/") + if not normalized: + normalized = "https://chatgpt.com/backend-api/codex" + if normalized.endswith("/codex"): + normalized = normalized[: -len("/codex")] + if "/backend-api" in normalized: + return normalized + "/wham/usage" + return normalized + "/api/codex/usage" + + +def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]: + creds = resolve_codex_runtime_credentials(refresh_if_expiring=True) + token_data = _read_codex_tokens() + tokens = token_data.get("tokens") or {} + account_id = str(tokens.get("account_id", "") or "").strip() or None + headers = { + "Authorization": f"Bearer {creds['api_key']}", + "Accept": "application/json", + "User-Agent": "codex-cli", + } + if account_id: + headers["ChatGPT-Account-Id"] = account_id + with httpx.Client(timeout=15.0) as client: + response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers) + response.raise_for_status() + payload = response.json() or {} + rate_limit = payload.get("rate_limit") or {} + windows: list[AccountUsageWindow] = [] + for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")): + window = rate_limit.get(key) or {} + used = window.get("used_percent") + if used is None: + continue + windows.append( + AccountUsageWindow( + label=label, + used_percent=float(used), + reset_at=_parse_dt(window.get("reset_at")), + ) + ) + details: list[str] = [] + credits = payload.get("credits") or {} + if credits.get("has_credits"): + balance = credits.get("balance") + if isinstance(balance, (int, float)): + details.append(f"Credits balance: ${float(balance):.2f}") + elif credits.get("unlimited"): + details.append("Credits balance: unlimited") + return AccountUsageSnapshot( + provider="openai-codex", + source="usage_api", + fetched_at=_utc_now(), + plan=_title_case_slug(payload.get("plan_type")), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]: + token = (resolve_anthropic_token() or "").strip() + if not token: + return None + if not _is_oauth_token(token): + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.", + ) + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + "Content-Type": "application/json", + "anthropic-beta": "oauth-2025-04-20", + "User-Agent": "claude-code/2.1.0", + } + with httpx.Client(timeout=15.0) as client: + response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers) + response.raise_for_status() + payload = response.json() or {} + windows: list[AccountUsageWindow] = [] + mapping = ( + ("five_hour", "Current session"), + ("seven_day", "Current week"), + ("seven_day_opus", "Opus week"), + ("seven_day_sonnet", "Sonnet week"), + ) + for key, label in mapping: + window = payload.get(key) or {} + util = window.get("utilization") + if util is None: + continue + used = float(util) * 100 if float(util) <= 1 else float(util) + windows.append( + AccountUsageWindow( + label=label, + used_percent=used, + reset_at=_parse_dt(window.get("resets_at")), + ) + ) + details: list[str] = [] + extra = payload.get("extra_usage") or {} + if extra.get("is_enabled"): + used_credits = extra.get("used_credits") + monthly_limit = extra.get("monthly_limit") + currency = extra.get("currency") or "USD" + if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)): + details.append( + f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}" + ) + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]: + runtime = resolve_runtime_provider( + requested="openrouter", + explicit_base_url=base_url, + explicit_api_key=api_key, + ) + token = str(runtime.get("api_key", "") or "").strip() + if not token: + return None + normalized = str(runtime.get("base_url", "") or "").rstrip("/") + credits_url = f"{normalized}/credits" + key_url = f"{normalized}/key" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + with httpx.Client(timeout=10.0) as client: + credits_resp = client.get(credits_url, headers=headers) + credits_resp.raise_for_status() + credits = (credits_resp.json() or {}).get("data") or {} + try: + key_resp = client.get(key_url, headers=headers) + key_resp.raise_for_status() + key_data = (key_resp.json() or {}).get("data") or {} + except Exception: + key_data = {} + total_credits = float(credits.get("total_credits") or 0.0) + total_usage = float(credits.get("total_usage") or 0.0) + details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"] + windows: list[AccountUsageWindow] = [] + limit = key_data.get("limit") + limit_remaining = key_data.get("limit_remaining") + limit_reset = str(key_data.get("limit_reset") or "").strip() + usage = key_data.get("usage") + if ( + isinstance(limit, (int, float)) + and float(limit) > 0 + and isinstance(limit_remaining, (int, float)) + and 0 <= float(limit_remaining) <= float(limit) + ): + limit_value = float(limit) + remaining_value = float(limit_remaining) + used_percent = ((limit_value - remaining_value) / limit_value) * 100 + detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"] + if limit_reset: + detail_parts.append(f"resets {limit_reset}") + windows.append( + AccountUsageWindow( + label="API key quota", + used_percent=used_percent, + detail=" • ".join(detail_parts), + ) + ) + if isinstance(usage, (int, float)): + usage_parts = [f"API key usage: ${float(usage):.2f} total"] + for value, label in ( + (key_data.get("usage_daily"), "today"), + (key_data.get("usage_weekly"), "this week"), + (key_data.get("usage_monthly"), "this month"), + ): + if isinstance(value, (int, float)) and float(value) > 0: + usage_parts.append(f"${float(value):.2f} {label}") + details.append(" • ".join(usage_parts)) + return AccountUsageSnapshot( + provider="openrouter", + source="credits_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def fetch_account_usage( + provider: Optional[str], + *, + base_url: Optional[str] = None, + api_key: Optional[str] = None, +) -> Optional[AccountUsageSnapshot]: + normalized = str(provider or "").strip().lower() + if normalized in {"", "auto", "custom"}: + return None + try: + if normalized == "openai-codex": + return _fetch_codex_account_usage() + if normalized == "anthropic": + return _fetch_anthropic_account_usage() + if normalized == "openrouter": + return _fetch_openrouter_account_usage(base_url, api_key) + except Exception: + return None + return None diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 3ca7492667..46afe67f3b 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -19,6 +19,7 @@ from pathlib import Path from hermes_constants import get_hermes_home from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple +from utils import normalize_proxy_env_vars try: import anthropic as _anthropic_sdk @@ -308,6 +309,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional "The 'anthropic' package is required for the Anthropic provider. " "Install it with: pip install 'anthropic>=0.39.0'" ) + + normalize_proxy_env_vars() + from httpx import Timeout normalized_base_url = _normalize_base_url_text(base_url) @@ -1525,3 +1529,42 @@ def normalize_anthropic_response( ), finish_reason, ) + + +def normalize_anthropic_response_v2( + response, + strip_tool_prefix: bool = False, +) -> "NormalizedResponse": + """Normalize Anthropic response to NormalizedResponse. + + Wraps the existing normalize_anthropic_response() and maps its output + to the shared transport types. This allows incremental migration — + one call site at a time — without changing the original function. + """ + from agent.transports.types import NormalizedResponse, build_tool_call + + assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix) + + tool_calls = None + if assistant_msg.tool_calls: + tool_calls = [ + build_tool_call( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + ) + for tc in assistant_msg.tool_calls + ] + + provider_data = {} + if getattr(assistant_msg, "reasoning_details", None): + provider_data["reasoning_details"] = assistant_msg.reasoning_details + + return NormalizedResponse( + content=assistant_msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=getattr(assistant_msg, "reasoning", None), + usage=None, # Anthropic usage is on the raw response, not the normaliser + provider_data=provider_data or None, + ) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c513a88a52..e4223771fd 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -51,7 +51,7 @@ if TYPE_CHECKING: from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL -from utils import base_url_host_matches, base_url_hostname +from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars logger = logging.getLogger(__name__) @@ -1036,6 +1036,8 @@ def _validate_proxy_env_urls() -> None: """ from urllib.parse import urlparse + normalize_proxy_env_vars() + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = str(os.environ.get(key) or "").strip() diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 7a0d3dfd65..783f949567 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -21,6 +21,9 @@ from pathlib import Path from types import SimpleNamespace from typing import Any +from agent.file_safety import get_read_block_error, is_write_denied +from agent.redact import redact_sensitive_text + ACP_MARKER_BASE_URL = "acp://copilot" _DEFAULT_TIMEOUT_SECONDS = 900.0 @@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: } +def _permission_denied(message_id: Any) -> dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": message_id, + "result": { + "outcome": { + "outcome": "cancelled", + } + }, + } + + def _format_messages_as_prompt( messages: list[dict[str, Any]], model: str | None = None, @@ -535,18 +550,13 @@ class CopilotACPClient: params = msg.get("params") or {} if method == "session/request_permission": - response = { - "jsonrpc": "2.0", - "id": message_id, - "result": { - "outcome": { - "outcome": "allow_once", - } - }, - } + response = _permission_denied(message_id) elif method == "fs/read_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + block_error = get_read_block_error(str(path)) + if block_error: + raise PermissionError(block_error) content = path.read_text() if path.exists() else "" line = params.get("line") limit = params.get("limit") @@ -555,6 +565,8 @@ class CopilotACPClient: start = line - 1 end = start + limit if isinstance(limit, int) and limit > 0 else None content = "".join(lines[start:end]) + if content: + content = redact_sensitive_text(content) response = { "jsonrpc": "2.0", "id": message_id, @@ -567,6 +579,10 @@ class CopilotACPClient: elif method == "fs/write_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + if is_write_denied(str(path)): + raise PermissionError( + f"Write denied: '{path}' is a protected system/credential file." + ) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(str(params.get("content") or "")) response = { diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 39f5838acb..7185cc8ff7 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -998,6 +998,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup active_sources: Set[str] = set() auth_store = _load_auth_store() + # Shared suppression gate — used at every upsert site so + # `hermes auth remove ` is stable across all source types. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + if provider == "anthropic": # Only auto-discover external credentials (Claude Code, Hermes PKCE) # when the user has explicitly configured anthropic as their provider. @@ -1017,13 +1025,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup ("claude_code", read_claude_code_credentials()), ): if creds and creds.get("accessToken"): - # Check if user explicitly removed this source - try: - from hermes_cli.auth import is_source_suppressed - if is_source_suppressed(provider, source_name): - continue - except ImportError: - pass + if _is_suppressed(provider, source_name): + continue active_sources.add(source_name) changed |= _upsert_entry( entries, @@ -1041,7 +1044,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup elif provider == "nous": state = _load_provider_state(auth_store, "nous") - if state: + if state and not _is_suppressed(provider, "device_code"): active_sources.add("device_code") # Prefer a user-supplied label embedded in the singleton state # (set by persist_nous_credentials(label=...) when the user ran @@ -1082,20 +1085,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token, source = resolve_copilot_token() if token: source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" - active_sources.add(source_name) - pconfig = PROVIDER_REGISTRY.get(provider) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": token, - "base_url": pconfig.inference_base_url if pconfig else "", - "label": source, - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + pconfig = PROVIDER_REGISTRY.get(provider) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": token, + "base_url": pconfig.inference_base_url if pconfig else "", + "label": source, + }, + ) except Exception as exc: logger.debug("Copilot token seed failed: %s", exc) @@ -1111,20 +1115,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token = creds.get("api_key", "") if token: source_name = creds.get("source", "qwen-cli") - active_sources.add(source_name) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_OAUTH, - "access_token": token, - "expires_at_ms": creds.get("expires_at_ms"), - "base_url": creds.get("base_url", ""), - "label": creds.get("auth_file", source_name), - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_OAUTH, + "access_token": token, + "expires_at_ms": creds.get("expires_at_ms"), + "base_url": creds.get("base_url", ""), + "label": creds.get("auth_file", source_name), + }, + ) except Exception as exc: logger.debug("Qwen OAuth token seed failed: %s", exc) @@ -1133,13 +1138,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup # the device_code source as suppressed so it won't be re-seeded from # the Hermes auth store. Without this gate the removal is instantly # undone on the next load_pool() call. - codex_suppressed = False - try: - from hermes_cli.auth import is_source_suppressed - codex_suppressed = is_source_suppressed(provider, "device_code") - except ImportError: - pass - if codex_suppressed: + if _is_suppressed(provider, "device_code"): return changed, active_sources state = _load_provider_state(auth_store, "openai-codex") @@ -1173,10 +1172,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: changed = False active_sources: Set[str] = set() + # Honour user suppression — `hermes auth remove ` for an + # env-seeded credential marks the env: source as suppressed so it + # won't be re-seeded from the user's shell environment or ~/.hermes/.env. + # Without this gate the removal is silently undone on the next + # load_pool() call whenever the var is still exported by the shell. + try: + from hermes_cli.auth import is_source_suppressed as _is_source_suppressed + except ImportError: + def _is_source_suppressed(_p, _s): # type: ignore[misc] + return False if provider == "openrouter": token = os.getenv("OPENROUTER_API_KEY", "").strip() if token: source = "env:OPENROUTER_API_KEY" + if _is_source_suppressed(provider, source): + return changed, active_sources active_sources.add(source) changed |= _upsert_entry( entries, @@ -1213,6 +1224,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool if not token: continue source = f"env:{env_var}" + if _is_source_suppressed(provider, source): + continue active_sources.add(source) auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY base_url = env_url or pconfig.inference_base_url @@ -1257,6 +1270,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b changed = False active_sources: Set[str] = set() + # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + # Seed from the custom_providers config entry's api_key field cp_config = _get_custom_provider_config(pool_key) if cp_config: @@ -1265,19 +1285,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b name = str(cp_config.get("name") or "").strip() if api_key: source = f"config:{name}" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": api_key, - "base_url": base_url, - "label": name or source, - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": api_key, + "base_url": base_url, + "label": name or source, + }, + ) # Seed from model.api_key if model.provider=='custom' and model.base_url matches try: @@ -1297,19 +1318,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b matched_key = get_custom_provider_pool_key(model_base_url) if matched_key == pool_key: source = "model_config" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": model_api_key, - "base_url": model_base_url, - "label": "model_config", - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": model_api_key, + "base_url": model_base_url, + "label": "model_config", + }, + ) except Exception: pass diff --git a/agent/credential_sources.py b/agent/credential_sources.py new file mode 100644 index 0000000000..8ad2fade0b --- /dev/null +++ b/agent/credential_sources.py @@ -0,0 +1,401 @@ +"""Unified removal contract for every credential source Hermes reads from. + +Hermes seeds its credential pool from many places: + + env: — os.environ / ~/.hermes/.env + claude_code — ~/.claude/.credentials.json + hermes_pkce — ~/.hermes/.anthropic_oauth.json + device_code — auth.json providers. (nous, openai-codex, ...) + qwen-cli — ~/.qwen/oauth_creds.json + gh_cli — gh auth token + config: — custom_providers config entry + model_config — model.api_key when model.provider == "custom" + manual — user ran `hermes auth add` + +Each source has its own reader inside ``agent.credential_pool._seed_from_*`` +(which keep their existing shape — we haven't restructured them). What we +unify here is **removal**: + + ``hermes auth remove `` must make the pool entry stay gone. + +Before this module, every source had an ad-hoc removal branch in +``auth_remove_command``, and several sources had no branch at all — so +``auth remove`` silently reverted on the next ``load_pool()`` call for +qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and +custom-config sources. + +Now every source registers a ``RemovalStep`` that does exactly three things +in the same shape: + + 1. Clean up whatever externally-readable state the source reads from + (.env line, auth.json block, OAuth file, etc.) + 2. Suppress the ``(provider, source_id)`` in auth.json so the + corresponding ``_seed_from_*`` branch skips the upsert on re-load + 3. Return ``RemovalResult`` describing what was cleaned and any + diagnostic hints the user should see (shell-exported env vars, + external credential files we deliberately don't delete, etc.) + +Adding a new credential source is: + - wire up a reader branch in ``_seed_from_*`` (existing pattern) + - gate that reader behind ``is_source_suppressed(provider, source_id)`` + - register a ``RemovalStep`` here + +No more per-source if/elif chain in ``auth_remove_command``. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, List, Optional + + +@dataclass +class RemovalResult: + """Outcome of removing a credential source. + + Attributes: + cleaned: Short strings describing external state that was actually + mutated (``"Cleared XAI_API_KEY from .env"``, + ``"Cleared openai-codex OAuth tokens from auth store"``). + Printed as plain lines to the user. + hints: Diagnostic lines ABOUT state the user may need to clean up + themselves or is deliberately left intact (shell-exported env + var, Claude Code credential file we don't delete, etc.). + Printed as plain lines to the user. Always non-destructive. + suppress: Whether to call ``suppress_credential_source`` after + cleanup so future ``load_pool`` calls skip this source. + Default True — almost every source needs this to stay sticky. + The only legitimate False is ``manual`` entries, which aren't + seeded from anywhere external. + """ + + cleaned: List[str] = field(default_factory=list) + hints: List[str] = field(default_factory=list) + suppress: bool = True + + +@dataclass +class RemovalStep: + """How to remove one specific credential source cleanly. + + Attributes: + provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...). + Special value ``"*"`` means "matches any provider" — used for + sources like ``manual`` that aren't provider-specific. + source_id: Source identifier as it appears in + ``PooledCredential.source``. May be a literal (``"claude_code"``) + or a prefix pattern matched via ``match_fn``. + match_fn: Optional predicate overriding literal ``source_id`` + matching. Gets the removed entry's source string. Used for + ``env:*`` (any env-seeded key), ``config:*`` (any custom + pool), and ``manual:*`` (any manual-source variant). + remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the + actual cleanup and returns what happened for the user. + description: One-line human-readable description for docs / tests. + """ + + provider: str + source_id: str + remove_fn: Callable[..., RemovalResult] + match_fn: Optional[Callable[[str], bool]] = None + description: str = "" + + def matches(self, provider: str, source: str) -> bool: + if self.provider != "*" and self.provider != provider: + return False + if self.match_fn is not None: + return self.match_fn(source) + return source == self.source_id + + +_REGISTRY: List[RemovalStep] = [] + + +def register(step: RemovalStep) -> RemovalStep: + _REGISTRY.append(step) + return step + + +def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]: + """Return the first matching RemovalStep, or None if unregistered. + + Unregistered sources fall through to the default remove path in + ``auth_remove_command``: the pool entry is already gone (that happens + before dispatch), no external cleanup, no suppression. This is the + correct behaviour for ``manual`` entries — they were only ever stored + in the pool, nothing external to clean up. + """ + for step in _REGISTRY: + if step.matches(provider, source): + return step + return None + + +# --------------------------------------------------------------------------- +# Individual RemovalStep implementations — one per source. +# --------------------------------------------------------------------------- +# Each remove_fn is intentionally small and single-purpose. Adding a new +# credential source means adding ONE entry here — no other changes to +# auth_remove_command. + + +def _remove_env_source(provider: str, removed) -> RemovalResult: + """env: — the most common case. + + Handles three user situations: + 1. Var lives only in ~/.hermes/.env → clear it + 2. Var lives only in the user's shell (shell profile, systemd + EnvironmentFile, launchd plist) → hint them where to unset it + 3. Var lives in both → clear from .env, hint about shell + """ + from hermes_cli.config import get_env_path, remove_env_value + + result = RemovalResult() + env_var = removed.source[len("env:"):] + if not env_var: + return result + + # Detect shell vs .env BEFORE remove_env_value pops os.environ. + env_in_process = bool(os.getenv(env_var)) + env_in_dotenv = False + try: + env_path = get_env_path() + if env_path.exists(): + env_in_dotenv = any( + line.strip().startswith(f"{env_var}=") + for line in env_path.read_text(errors="replace").splitlines() + ) + except OSError: + pass + shell_exported = env_in_process and not env_in_dotenv + + cleared = remove_env_value(env_var) + if cleared: + result.cleaned.append(f"Cleared {env_var} from .env") + + if shell_exported: + result.hints.extend([ + f"Note: {env_var} is still set in your shell environment " + f"(not in ~/.hermes/.env).", + " Unset it there (shell profile, systemd EnvironmentFile, " + "launchd plist, etc.) or it will keep being visible to Hermes.", + f" The pool entry is now suppressed — Hermes will ignore " + f"{env_var} until you run `hermes auth add {provider}`.", + ]) + else: + result.hints.append( + f"Suppressed env:{env_var} — it will not be re-seeded even " + f"if the variable is re-exported later." + ) + return result + + +def _remove_claude_code(provider: str, removed) -> RemovalResult: + """~/.claude/.credentials.json is owned by Claude Code itself. + + We don't delete it — the user's Claude Code install still needs to + work. We just suppress it so Hermes stops reading it. + """ + return RemovalResult(hints=[ + "Suppressed claude_code credential — it will not be re-seeded.", + "Note: Claude Code credentials still live in ~/.claude/.credentials.json", + "Run `hermes auth add anthropic` to re-enable if needed.", + ]) + + +def _remove_hermes_pkce(provider: str, removed) -> RemovalResult: + """~/.hermes/.anthropic_oauth.json is ours — delete it outright.""" + from hermes_constants import get_hermes_home + + result = RemovalResult() + oauth_file = get_hermes_home() / ".anthropic_oauth.json" + if oauth_file.exists(): + try: + oauth_file.unlink() + result.cleaned.append("Cleared Hermes Anthropic OAuth credentials") + except OSError as exc: + result.hints.append(f"Could not delete {oauth_file}: {exc}") + return result + + +def _clear_auth_store_provider(provider: str) -> bool: + """Delete auth_store.providers[provider]. Returns True if deleted.""" + from hermes_cli.auth import ( + _auth_store_lock, + _load_auth_store, + _save_auth_store, + ) + + with _auth_store_lock(): + auth_store = _load_auth_store() + providers_dict = auth_store.get("providers") + if isinstance(providers_dict, dict) and provider in providers_dict: + del providers_dict[provider] + _save_auth_store(auth_store) + return True + return False + + +def _remove_nous_device_code(provider: str, removed) -> RemovalResult: + """Nous OAuth lives in auth.json providers.nous — clear it and suppress. + + We suppress in addition to clearing because nothing else stops the + user's next `hermes login` run from writing providers.nous again + before they decide to. Suppression forces them to go through + `hermes auth add nous` to re-engage, which is the documented re-add + path and clears the suppression atomically. + """ + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + return result + + +def _remove_codex_device_code(provider: str, removed) -> RemovalResult: + """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json. + + refresh_codex_oauth_pure() writes both every time, so clearing only + the Hermes auth store is not enough — _seed_from_singletons() would + re-import from ~/.codex/auth.json on the next load_pool() call and + the removal would be instantly undone. We suppress instead of + deleting Codex CLI's file, so the Codex CLI itself keeps working. + + The canonical source name in ``_seed_from_singletons`` is + ``"device_code"`` (no prefix). Entries may show up in the pool as + either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added + via ``hermes auth add openai-codex``), but in both cases the re-seed + gate lives at the ``"device_code"`` suppression key. We suppress + that canonical key here; the central dispatcher also suppresses + ``removed.source`` which is fine — belt-and-suspenders, idempotent. + """ + from hermes_cli.auth import suppress_credential_source + + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + # Suppress the canonical re-seed source, not just whatever source the + # removed entry had. Otherwise `manual:device_code` removals wouldn't + # block the `device_code` re-seed path. + suppress_credential_source(provider, "device_code") + result.hints.extend([ + "Suppressed openai-codex device_code source — it will not be re-seeded.", + "Note: Codex CLI credentials still live in ~/.codex/auth.json", + "Run `hermes auth add openai-codex` to re-enable if needed.", + ]) + return result + + +def _remove_qwen_cli(provider: str, removed) -> RemovalResult: + """~/.qwen/oauth_creds.json is owned by the Qwen CLI. + + Same pattern as claude_code — suppress, don't delete. The user's + Qwen CLI install still reads from that file. + """ + return RemovalResult(hints=[ + "Suppressed qwen-cli credential — it will not be re-seeded.", + "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json", + "Run `hermes auth add qwen-oauth` to re-enable if needed.", + ]) + + +def _remove_copilot_gh(provider: str, removed) -> RemovalResult: + """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN. + + Copilot is special: the same token can be seeded as multiple source + entries (gh_cli from ``_seed_from_singletons`` plus env: from + ``_seed_from_env``), so removing one entry without suppressing the + others lets the duplicates resurrect. We suppress ALL known copilot + sources here so removal is stable regardless of which entry the + user clicked. + + We don't touch the user's gh CLI or shell state — just suppress so + Hermes stops picking the token up. + """ + # Suppress ALL copilot source variants up-front so no path resurrects + # the pool entry. The central dispatcher in auth_remove_command will + # ALSO suppress removed.source, but it's idempotent so double-calling + # is harmless. + from hermes_cli.auth import suppress_credential_source + suppress_credential_source(provider, "gh_cli") + for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + suppress_credential_source(provider, f"env:{env_var}") + + return RemovalResult(hints=[ + "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.", + "Note: Your gh CLI / shell environment is unchanged.", + "Run `hermes auth add copilot` to re-enable if needed.", + ]) + + +def _remove_custom_config(provider: str, removed) -> RemovalResult: + """Custom provider pools are seeded from custom_providers config or + model.api_key. Both are in config.yaml — modifying that from here + is more invasive than suppression. We suppress; the user can edit + config.yaml if they want to remove the key from disk entirely. + """ + source_label = removed.source + return RemovalResult(hints=[ + f"Suppressed {source_label} — it will not be re-seeded.", + "Note: The underlying value in config.yaml is unchanged. Edit it " + "directly if you want to remove the credential from disk.", + ]) + + +def _register_all_sources() -> None: + """Called once on module import. + + ORDER MATTERS — ``find_removal_step`` returns the first match. Put + provider-specific steps before the generic ``env:*`` step so that e.g. + copilot's ``env:GH_TOKEN`` goes through the copilot removal (which + doesn't touch the user's shell), not the generic env-var removal + (which would try to clear .env). + """ + register(RemovalStep( + provider="copilot", source_id="gh_cli", + match_fn=lambda src: src == "gh_cli" or src.startswith("env:"), + remove_fn=_remove_copilot_gh, + description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN", + )) + register(RemovalStep( + provider="*", source_id="env:", + match_fn=lambda src: src.startswith("env:"), + remove_fn=_remove_env_source, + description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)", + )) + register(RemovalStep( + provider="anthropic", source_id="claude_code", + remove_fn=_remove_claude_code, + description="~/.claude/.credentials.json", + )) + register(RemovalStep( + provider="anthropic", source_id="hermes_pkce", + remove_fn=_remove_hermes_pkce, + description="~/.hermes/.anthropic_oauth.json", + )) + register(RemovalStep( + provider="nous", source_id="device_code", + remove_fn=_remove_nous_device_code, + description="auth.json providers.nous", + )) + register(RemovalStep( + provider="openai-codex", source_id="device_code", + match_fn=lambda src: src == "device_code" or src.endswith(":device_code"), + remove_fn=_remove_codex_device_code, + description="auth.json providers.openai-codex + ~/.codex/auth.json", + )) + register(RemovalStep( + provider="qwen-oauth", source_id="qwen-cli", + remove_fn=_remove_qwen_cli, + description="~/.qwen/oauth_creds.json", + )) + register(RemovalStep( + provider="*", source_id="config:", + match_fn=lambda src: src.startswith("config:") or src == "model_config", + remove_fn=_remove_custom_config, + description="Custom provider config.yaml api_key field", + )) + + +_register_all_sources() diff --git a/agent/file_safety.py b/agent/file_safety.py new file mode 100644 index 0000000000..09da46cafd --- /dev/null +++ b/agent/file_safety.py @@ -0,0 +1,111 @@ +"""Shared file safety rules used by both tools and ACP shims.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Optional + + +def _hermes_home_path() -> Path: + """Resolve the active HERMES_HOME (profile-aware) without circular imports.""" + try: + from hermes_constants import get_hermes_home # local import to avoid cycles + return get_hermes_home() + except Exception: + return Path(os.path.expanduser("~/.hermes")) + + +def build_write_denied_paths(home: str) -> set[str]: + """Return exact sensitive paths that must never be written.""" + hermes_home = _hermes_home_path() + return { + os.path.realpath(p) + for p in [ + os.path.join(home, ".ssh", "authorized_keys"), + os.path.join(home, ".ssh", "id_rsa"), + os.path.join(home, ".ssh", "id_ed25519"), + os.path.join(home, ".ssh", "config"), + str(hermes_home / ".env"), + os.path.join(home, ".bashrc"), + os.path.join(home, ".zshrc"), + os.path.join(home, ".profile"), + os.path.join(home, ".bash_profile"), + os.path.join(home, ".zprofile"), + os.path.join(home, ".netrc"), + os.path.join(home, ".pgpass"), + os.path.join(home, ".npmrc"), + os.path.join(home, ".pypirc"), + "/etc/sudoers", + "/etc/passwd", + "/etc/shadow", + ] + } + + +def build_write_denied_prefixes(home: str) -> list[str]: + """Return sensitive directory prefixes that must never be written.""" + return [ + os.path.realpath(p) + os.sep + for p in [ + os.path.join(home, ".ssh"), + os.path.join(home, ".aws"), + os.path.join(home, ".gnupg"), + os.path.join(home, ".kube"), + "/etc/sudoers.d", + "/etc/systemd", + os.path.join(home, ".docker"), + os.path.join(home, ".azure"), + os.path.join(home, ".config", "gh"), + ] + ] + + +def get_safe_write_root() -> Optional[str]: + """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset.""" + root = os.getenv("HERMES_WRITE_SAFE_ROOT", "") + if not root: + return None + try: + return os.path.realpath(os.path.expanduser(root)) + except Exception: + return None + + +def is_write_denied(path: str) -> bool: + """Return True if path is blocked by the write denylist or safe root.""" + home = os.path.realpath(os.path.expanduser("~")) + resolved = os.path.realpath(os.path.expanduser(str(path))) + + if resolved in build_write_denied_paths(home): + return True + for prefix in build_write_denied_prefixes(home): + if resolved.startswith(prefix): + return True + + safe_root = get_safe_write_root() + if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)): + return True + + return False + + +def get_read_block_error(path: str) -> Optional[str]: + """Return an error message when a read targets internal Hermes cache files.""" + resolved = Path(path).expanduser().resolve() + hermes_home = _hermes_home_path().resolve() + blocked_dirs = [ + hermes_home / "skills" / ".hub" / "index-cache", + hermes_home / "skills" / ".hub", + ] + for blocked in blocked_dirs: + try: + resolved.relative_to(blocked) + except ValueError: + continue + return ( + f"Access denied: {path} is an internal Hermes cache file " + "and cannot be read directly to prevent prompt injection. " + "Use the skills_list or skill_view tools instead." + ) + return None diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 47f9bba94f..6506bffe6d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -170,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = { "Qwen/Qwen3.5-35B-A3B": 131072, "deepseek-ai/DeepSeek-V3.2": 65536, "moonshotai/Kimi-K2.5": 262144, + "moonshotai/Kimi-K2.6": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, "XiaomiMiMo/MiMo-V2-Flash": 256000, diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 280105daca..a4345ca8c4 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like import json import logging import re +import subprocess from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional @@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]") _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}") +# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. +# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are +# left as-is so the user can debug them. +_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") + +# Matches inline shell snippets like: !`date +%Y-%m-%d` +# Non-greedy, single-line only — no newlines inside the backticks. +_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") + +# Cap inline-shell output so a runaway command can't blow out the context. +_INLINE_SHELL_MAX_OUTPUT = 4000 + + +def _load_skills_config() -> dict: + """Load the ``skills`` section of config.yaml (best-effort).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + skills_cfg = cfg.get("skills") + if isinstance(skills_cfg, dict): + return skills_cfg + except Exception: + logger.debug("Could not read skills config", exc_info=True) + return {} + + +def _substitute_template_vars( + content: str, + skill_dir: Path | None, + session_id: str | None, +) -> str: + """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. + + Only substitutes tokens for which a concrete value is available — + unresolved tokens are left in place so the author can spot them. + """ + if not content: + return content + + skill_dir_str = str(skill_dir) if skill_dir else None + + def _replace(match: re.Match) -> str: + token = match.group(1) + if token == "HERMES_SKILL_DIR" and skill_dir_str: + return skill_dir_str + if token == "HERMES_SESSION_ID" and session_id: + return str(session_id) + return match.group(0) + + return _SKILL_TEMPLATE_RE.sub(_replace, content) + + +def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: + """Execute a single inline-shell snippet and return its stdout (trimmed). + + Failures return a short ``[inline-shell error: ...]`` marker instead of + raising, so one bad snippet can't wreck the whole skill message. + """ + try: + completed = subprocess.run( + ["bash", "-c", command], + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=max(1, int(timeout)), + check=False, + ) + except subprocess.TimeoutExpired: + return f"[inline-shell timeout after {timeout}s: {command}]" + except FileNotFoundError: + return f"[inline-shell error: bash not found]" + except Exception as exc: + return f"[inline-shell error: {exc}]" + + output = (completed.stdout or "").rstrip("\n") + if not output and completed.stderr: + output = completed.stderr.rstrip("\n") + if len(output) > _INLINE_SHELL_MAX_OUTPUT: + output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]" + return output + + +def _expand_inline_shell( + content: str, + skill_dir: Path | None, + timeout: int, +) -> str: + """Replace every !`cmd` snippet in ``content`` with its stdout. + + Runs each snippet with the skill directory as CWD so relative paths in + the snippet work the way the author expects. + """ + if "!`" not in content: + return content + + def _replace(match: re.Match) -> str: + cmd = match.group(1).strip() + if not cmd: + return "" + return _run_inline_shell(cmd, skill_dir, timeout) + + return _INLINE_SHELL_RE.sub(_replace, content) + def build_plan_path( user_instruction: str = "", @@ -133,14 +238,36 @@ def _build_skill_message( activation_note: str, user_instruction: str = "", runtime_note: str = "", + session_id: str | None = None, ) -> str: """Format a loaded skill into a user/system message payload.""" from tools.skills_tool import SKILLS_DIR content = str(loaded_skill.get("content") or "") + # ── Template substitution and inline-shell expansion ── + # Done before anything else so downstream blocks (setup notes, + # supporting-file hints) see the expanded content. + skills_cfg = _load_skills_config() + if skills_cfg.get("template_vars", True): + content = _substitute_template_vars(content, skill_dir, session_id) + if skills_cfg.get("inline_shell", False): + timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10) + content = _expand_inline_shell(content, skill_dir, timeout) + parts = [activation_note, "", content.strip()] + # ── Inject the absolute skill directory so the agent can reference + # bundled scripts without an extra skill_view() round-trip. ── + if skill_dir: + parts.append("") + parts.append(f"[Skill directory: {skill_dir}]") + parts.append( + "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, " + "`templates/config.yaml`) against that directory, then run them " + "with the terminal tool using the absolute path." + ) + # ── Inject resolved skill config values ── _inject_skill_config(loaded_skill, parts) @@ -188,11 +315,13 @@ def _build_skill_message( # Skill is from an external dir — use the skill name instead skill_view_target = skill_dir.name parts.append("") - parts.append("[This skill has supporting files you can load with the skill_view tool:]") + parts.append("[This skill has supporting files:]") for sf in supporting: - parts.append(f"- {sf}") + parts.append(f"- {sf} -> {skill_dir / sf}") parts.append( - f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="")' + f'\nLoad any of these with skill_view(name="{skill_view_target}", ' + f'file_path=""), or run scripts directly by absolute path ' + f"(e.g. `node {skill_dir}/scripts/foo.js`)." ) if user_instruction: @@ -332,6 +461,7 @@ def build_skill_invocation_message( activation_note, user_instruction=user_instruction, runtime_note=runtime_note, + session_id=task_id, ) @@ -370,6 +500,7 @@ def build_preloaded_skills_prompt( loaded_skill, skill_dir, activation_note, + session_id=task_id, ) ) loaded_names.append(skill_name) diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py new file mode 100644 index 0000000000..6cd3a277a1 --- /dev/null +++ b/agent/transports/__init__.py @@ -0,0 +1,39 @@ +"""Transport layer types and registry for provider response normalization. + +Usage: + from agent.transports import get_transport + transport = get_transport("anthropic_messages") + result = transport.normalize_response(raw_response) +""" + +from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401 + +_REGISTRY: dict = {} + + +def register_transport(api_mode: str, transport_cls: type) -> None: + """Register a transport class for an api_mode string.""" + _REGISTRY[api_mode] = transport_cls + + +def get_transport(api_mode: str): + """Get a transport instance for the given api_mode. + + Returns None if no transport is registered for this api_mode. + This allows gradual migration — call sites can check for None + and fall back to the legacy code path. + """ + if not _REGISTRY: + _discover_transports() + cls = _REGISTRY.get(api_mode) + if cls is None: + return None + return cls() + + +def _discover_transports() -> None: + """Import all transport modules to trigger auto-registration.""" + try: + import agent.transports.anthropic # noqa: F401 + except ImportError: + pass diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py new file mode 100644 index 0000000000..7ffa71a6f9 --- /dev/null +++ b/agent/transports/anthropic.py @@ -0,0 +1,129 @@ +"""Anthropic Messages API transport. + +Delegates to the existing adapter functions in agent/anthropic_adapter.py. +This transport owns format conversion and normalization — NOT client lifecycle. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse + + +class AnthropicTransport(ProviderTransport): + """Transport for api_mode='anthropic_messages'. + + Wraps the existing functions in anthropic_adapter.py behind the + ProviderTransport ABC. Each method delegates — no logic is duplicated. + """ + + @property + def api_mode(self) -> str: + return "anthropic_messages" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI messages to Anthropic (system, messages) tuple. + + kwargs: + base_url: Optional[str] — affects thinking signature handling. + """ + from agent.anthropic_adapter import convert_messages_to_anthropic + + base_url = kwargs.get("base_url") + return convert_messages_to_anthropic(messages, base_url=base_url) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Anthropic input_schema format.""" + from agent.anthropic_adapter import convert_tools_to_anthropic + + return convert_tools_to_anthropic(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Anthropic messages.create() kwargs. + + Calls convert_messages and convert_tools internally. + + params (all optional): + max_tokens: int + reasoning_config: dict | None + tool_choice: str | None + is_oauth: bool + preserve_dots: bool + context_length: int | None + base_url: str | None + fast_mode: bool + """ + from agent.anthropic_adapter import build_anthropic_kwargs + + return build_anthropic_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=params.get("max_tokens", 16384), + reasoning_config=params.get("reasoning_config"), + tool_choice=params.get("tool_choice"), + is_oauth=params.get("is_oauth", False), + preserve_dots=params.get("preserve_dots", False), + context_length=params.get("context_length"), + base_url=params.get("base_url"), + fast_mode=params.get("fast_mode", False), + ) + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Anthropic response to NormalizedResponse. + + kwargs: + strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names. + """ + from agent.anthropic_adapter import normalize_anthropic_response_v2 + + strip_tool_prefix = kwargs.get("strip_tool_prefix", False) + return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix) + + def validate_response(self, response: Any) -> bool: + """Check Anthropic response structure is valid.""" + if response is None: + return False + content_blocks = getattr(response, "content", None) + if not isinstance(content_blocks, list): + return False + if not content_blocks: + return False + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Extract Anthropic cache_read and cache_creation token counts.""" + usage = getattr(response, "usage", None) + if usage is None: + return None + cached = getattr(usage, "cache_read_input_tokens", 0) or 0 + written = getattr(usage, "cache_creation_input_tokens", 0) or 0 + if cached or written: + return {"cached_tokens": cached, "creation_tokens": written} + return None + + # Promote the adapter's canonical mapping to module level so it's shared + _STOP_REASON_MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "refusal": "content_filter", + "model_context_window_exceeded": "length", + } + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Anthropic stop_reason to OpenAI finish_reason.""" + return self._STOP_REASON_MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("anthropic_messages", AnthropicTransport) diff --git a/agent/transports/base.py b/agent/transports/base.py new file mode 100644 index 0000000000..b516967b6a --- /dev/null +++ b/agent/transports/base.py @@ -0,0 +1,89 @@ +"""Abstract base for provider transports. + +A transport owns the data path for one api_mode: + convert_messages → convert_tools → build_kwargs → normalize_response + +It does NOT own: client construction, streaming, credential refresh, +prompt caching, interrupt handling, or retry logic. Those stay on AIAgent. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +from agent.transports.types import NormalizedResponse + + +class ProviderTransport(ABC): + """Base class for provider-specific format conversion and normalization.""" + + @property + @abstractmethod + def api_mode(self) -> str: + """The api_mode string this transport handles (e.g. 'anthropic_messages').""" + ... + + @abstractmethod + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI-format messages to provider-native format. + + Returns provider-specific structure (e.g. (system, messages) for Anthropic, + or the messages list unchanged for chat_completions). + """ + ... + + @abstractmethod + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI-format tool definitions to provider-native format. + + Returns provider-specific tool list (e.g. Anthropic input_schema format). + """ + ... + + @abstractmethod + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build the complete API call kwargs dict. + + This is the primary entry point — it typically calls convert_messages() + and convert_tools() internally, then adds model-specific config. + + Returns a dict ready to be passed to the provider's SDK client. + """ + ... + + @abstractmethod + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize a raw provider response to the shared NormalizedResponse type. + + This is the only method that returns a transport-layer type. + """ + ... + + def validate_response(self, response: Any) -> bool: + """Optional: check if the raw response is structurally valid. + + Returns True if valid, False if the response should be treated as invalid. + Default implementation always returns True. + """ + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Optional: extract provider-specific cache hit/creation stats. + + Returns dict with 'cached_tokens' and 'creation_tokens', or None. + Default returns None. + """ + return None + + def map_finish_reason(self, raw_reason: str) -> str: + """Optional: map provider-specific stop reason to OpenAI equivalent. + + Default returns the raw reason unchanged. Override for providers + with different stop reason vocabularies. + """ + return raw_reason diff --git a/agent/transports/types.py b/agent/transports/types.py new file mode 100644 index 0000000000..2b048fcaa4 --- /dev/null +++ b/agent/transports/types.py @@ -0,0 +1,100 @@ +"""Shared types for normalized provider responses. + +These dataclasses define the canonical shape that all provider adapters +normalize responses to. The shared surface is intentionally minimal — +only fields that every downstream consumer reads are top-level. +Protocol-specific state goes in ``provider_data`` dicts (response-level +and per-tool-call) so that protocol-aware code paths can access it +without polluting the shared type. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class ToolCall: + """A normalized tool call from any provider. + + ``id`` is the protocol's canonical identifier — what gets used in + ``tool_call_id`` / ``tool_use_id`` when constructing tool result + messages. May be ``None`` when the provider omits it; the agent + fills it via ``_deterministic_call_id()`` before storing in history. + + ``provider_data`` carries per-tool-call protocol metadata that only + protocol-aware code reads: + + * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}`` + * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}`` + * Others: ``None`` + """ + + id: Optional[str] + name: str + arguments: str # JSON string + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +@dataclass +class Usage: + """Token usage from an API response.""" + + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cached_tokens: int = 0 + + +@dataclass +class NormalizedResponse: + """Normalized API response from any provider. + + Shared fields are truly cross-provider — every caller can rely on + them without branching on api_mode. Protocol-specific state goes in + ``provider_data`` so that only protocol-aware code paths read it. + + Response-level ``provider_data`` examples: + + * Anthropic: ``{"reasoning_details": [...]}`` + * Codex: ``{"codex_reasoning_items": [...]}`` + * Others: ``None`` + """ + + content: Optional[str] + tool_calls: Optional[List[ToolCall]] + finish_reason: str # "stop", "tool_calls", "length", "content_filter" + reasoning: Optional[str] = None + usage: Optional[Usage] = None + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +# --------------------------------------------------------------------------- +# Factory helpers +# --------------------------------------------------------------------------- + +def build_tool_call( + id: Optional[str], + name: str, + arguments: Any, + **provider_fields: Any, +) -> ToolCall: + """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict. + + Any extra keyword arguments are collected into ``provider_data``. + """ + args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments) + pd = dict(provider_fields) if provider_fields else None + return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd) + + +def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str: + """Translate a provider-specific stop reason to the normalised set. + + Falls back to ``"stop"`` for unknown or ``None`` reasons. + """ + if reason is None: + return "stop" + return mapping.get(reason, "stop") diff --git a/cli.py b/cli.py index 8cdd1e2b4e..3851aea295 100644 --- a/cli.py +++ b/cli.py @@ -19,6 +19,7 @@ import shutil import sys import json import re +import concurrent.futures import base64 import atexit import tempfile @@ -65,6 +66,7 @@ from agent.usage_pricing import ( format_duration_compact, format_token_count_compact, ) +from agent.account_usage import fetch_account_usage, render_account_usage_lines from hermes_cli.banner import _format_context_length, format_banner_version_label _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏") @@ -5271,6 +5273,30 @@ class HermesCLI: except Exception: return False + def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool: + """Return True when /steer should be dispatched immediately while the agent is running. + + /steer MUST bypass the normal _pending_input → process_loop path when + the agent is active, because process_loop is blocked inside + self.chat() for the duration of the run. By the time the queued + command is pulled from _pending_input, _agent_running has already + flipped back to False, and process_command() takes the idle + fallback — delivering the steer as a next-turn message instead of + injecting it mid-run. Dispatching inline on the UI thread calls + agent.steer() directly, which is thread-safe (uses _pending_steer_lock). + """ + if not text or has_images or not _looks_like_slash_command(text): + return False + if not getattr(self, "_agent_running", False): + return False + try: + from hermes_cli.commands import resolve_command + base = text.split(None, 1)[0].lower().lstrip('/') + cmd = resolve_command(base) + return bool(cmd and cmd.name == "steer") + except Exception: + return False + def _show_model_and_providers(self): """Show current model + provider and list all authenticated providers. @@ -7022,6 +7048,27 @@ class HermesCLI: if cost_result.status == "unknown": print(f" Note: Pricing unknown for {agent.model}") + # Account limits -- fetched off-thread with a hard timeout so slow + # provider APIs don't hang the prompt. + provider = getattr(agent, "provider", None) or getattr(self, "provider", None) + base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None) + api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None) + account_snapshot = None + if provider: + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool: + try: + account_snapshot = _pool.submit( + fetch_account_usage, provider, + base_url=base_url, api_key=api_key, + ).result(timeout=10.0) + except (concurrent.futures.TimeoutError, Exception): + account_snapshot = None + account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)] + if account_lines: + print() + for line in account_lines: + print(line) + if self.verbose: logging.getLogger().setLevel(logging.DEBUG) for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'): @@ -7398,11 +7445,12 @@ class HermesCLI: self._voice_stop_and_transcribe() # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=880, count=1) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=880, count=1) + except Exception: + pass try: self._voice_recorder.start(on_silence_stop=_on_silence) @@ -7450,11 +7498,12 @@ class HermesCLI: wav_path = self._voice_recorder.stop() # Audio cue: double beep after stream stopped (no CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=660, count=2) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=660, count=2) + except Exception: + pass if wav_path is None: _cprint(f"{_DIM}No speech detected.{_RST}") @@ -7604,6 +7653,17 @@ class HermesCLI: _cprint(f"Unknown voice subcommand: {subcommand}") _cprint("Usage: /voice [on|off|tts|status]") + def _voice_beeps_enabled(self) -> bool: + """Return whether CLI voice mode should play record start/stop beeps.""" + try: + from hermes_cli.config import load_config + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + def _enable_voice_mode(self): """Enable voice mode after checking requirements.""" if self._voice_mode: @@ -8007,8 +8067,18 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] for i, choice in enumerate(choices): label = choice_labels.get(choice, choice) - prefix = '❯ ' if i == selected else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' # No number for items beyond 10th + if i == selected: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Budget vertical space so HSplit never clips the command or choices. @@ -9075,6 +9145,17 @@ class HermesCLI: event.app.current_buffer.reset(append_to_history=True) return + # Handle /steer while the agent is running immediately on the + # UI thread. Queuing through _pending_input would deadlock the + # steer until after the agent loop finishes (process_loop is + # blocked inside self.chat()), which turns /steer into a + # post-run next-turn message — defeating mid-run injection. + # agent.steer() is thread-safe (holds _pending_steer_lock). + if self._should_handle_steer_command_inline(text, has_images=has_images): + self.process_command(text) + event.app.current_buffer.reset(append_to_history=True) + return + # Snapshot and clear attached images images = list(self._attached_images) self._attached_images.clear() @@ -9172,6 +9253,29 @@ class HermesCLI: self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1) event.app.invalidate() + # Number keys for quick clarify selection (1-9, 0 for 10th item) + def _make_clarify_number_handler(idx): + def handler(event): + if self._clarify_state and not self._clarify_freetext: + choices = self._clarify_state.get("choices") or [] + # Map index to choice (treating "Other" as the last option) + if idx < len(choices): + # Select a numbered choice + self._clarify_state["response_queue"].put(choices[idx]) + self._clarify_state = None + self._clarify_freetext = False + event.app.invalidate() + elif idx == len(choices): + # Select "Other" option + self._clarify_freetext = True + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10thitem) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx)) + # --- Dangerous command approval: arrow-key navigation --- @kb.add('up', filter=Condition(lambda: bool(self._approval_state))) @@ -9213,6 +9317,20 @@ class HermesCLI: event.app.current_buffer.reset() event.app.invalidate() + # Number keys for quick approval selection (1-9, 0 for 10th item) + def _make_approval_number_handler(idx): + def handler(event): + if self._approval_state and idx < len(self._approval_state["choices"]): + self._approval_state["selected"] = idx + self._handle_approval_selection() + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10th item) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx)) + # --- History navigation: up/down browse history in normal input mode --- # The TextArea is multiline, so by default up/down only move the cursor. # Buffer.auto_up/auto_down handle both: cursor movement when multi-line, @@ -9781,14 +9899,32 @@ class HermesCLI: selected = state.get("selected", 0) preview_lines = _wrap_panel_text(question, 60) for i, choice in enumerate(choices): - prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else " " - preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f"❯ {num_prefix}. " + else: + prefix = f" {num_prefix}. " + preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # "Other" option in preview + other_num = len(choices) + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' other_label = ( - "❯ Other (type below)" if cli_ref._clarify_freetext - else "❯ Other (type your answer)" if selected == len(choices) - else " Other (type your answer)" + f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext + else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices) + else f" {other_num_prefix}. Other (type your answer)" ) - preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) + preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) box_width = _panel_box_width("Hermes needs your input", preview_lines) inner_text_width = max(8, box_width - 2) @@ -9796,18 +9932,35 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] if choices: for i, choice in enumerate(choices): - prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Trailing Other row(s) other_idx = len(choices) - if selected == other_idx and not cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type your answer)' - elif cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type below)' + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' else: - other_label_mand = ' Other (type your answer)' - other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)' + elif cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type below)' + else: + other_label_mand = f' {other_num_prefix}. Other (type your answer)' + other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") elif cli_ref._clarify_freetext: # Freetext-only mode: the guidance line takes the place of choices. other_wrapped = _wrap_panel_text( @@ -9872,6 +10025,15 @@ class HermesCLI: # "Other" option (trailing row(s), only shown when choices exist) other_idx = len(choices) + # Calculate number prefix for "Other" option + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: other_style = 'class:clarify-selected' elif cli_ref._clarify_freetext: diff --git a/cron/scheduler.py b/cron/scheduler.py index 8c6fc7a134..fc05c60e6c 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata) future = asyncio.run_coroutine_threadsafe(coro, loop) - result = future.result(timeout=30) + try: + result = future.result(timeout=30) + except TimeoutError: + future.cancel() + raise if result and not getattr(result, "success", True): logger.warning( "Job '%s': media send failed for %s: %s", @@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), loop, ) - send_result = future.result(timeout=60) + try: + send_result = future.result(timeout=60) + except TimeoutError: + future.cancel() + raise if send_result and not getattr(send_result, "success", True): err = getattr(send_result, "error", "unknown") logger.warning( diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 69cfbf9ce8..8caa07e1cf 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -19,6 +19,8 @@ import uuid from abc import ABC, abstractmethod from urllib.parse import urlsplit +from utils import normalize_proxy_url + logger = logging.getLogger(__name__) @@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: if platform_env_var: value = (os.environ.get(platform_env_var) or "").strip() if value: - return value + return normalize_proxy_url(value) for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = (os.environ.get(key) or "").strip() if value: - return value - return _detect_macos_system_proxy() + return normalize_proxy_url(value) + return normalize_proxy_url(_detect_macos_system_proxy()) def proxy_kwargs_for_bot(proxy_url: str | None) -> dict: diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index b8cb6dc1d1..5b86bad3e7 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -794,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter): # Telegram pushes updates to our HTTP endpoint. This # enables cloud platforms (Fly.io, Railway) to auto-wake # suspended machines on inbound HTTP traffic. + # + # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it, + # python-telegram-bot passes secret_token=None and the + # webhook endpoint accepts any HTTP POST — attackers can + # inject forged updates as if from Telegram. Refuse to + # start rather than silently run in fail-open mode. + # See GHSA-3vpc-7q5r-276h. webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443")) - webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None + webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() + if not webhook_secret: + raise RuntimeError( + "TELEGRAM_WEBHOOK_SECRET is required when " + "TELEGRAM_WEBHOOK_URL is set. Without it, the " + "webhook endpoint accepts forged updates from " + "anyone who can reach it — see " + "https://github.com/NousResearch/hermes-agent/" + "security/advisories/GHSA-3vpc-7q5r-276h.\n\n" + "Generate a secret and set it in your .env:\n" + " export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n" + "Then register it with Telegram when setting the " + "webhook via setWebhook's secret_token parameter." + ) from urllib.parse import urlparse webhook_path = urlparse(webhook_url).path or "/telegram" @@ -2333,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter): DMs remain unrestricted. Group/supergroup messages are accepted when: - the chat is explicitly allowlisted in ``free_response_chats`` - ``require_mention`` is disabled - - the message is a command - the message replies to the bot - the bot is @mentioned - the text/caption matches a configured regex wake-word pattern + + When ``require_mention`` is enabled, slash commands are not given + special treatment — they must pass the same mention/reply checks + as any other group message. Users can still trigger commands via + the Telegram bot menu (``/command@botname``) or by explicitly + mentioning the bot (``@botname /command``), both of which are + recognised as mentions by :meth:`_message_mentions_bot`. """ if not self._is_group_chat(message): return True @@ -2351,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter): return True if not self._telegram_require_mention(): return True - if is_command: - return True if self._is_reply_to_bot(message): return True if self._message_mentions_bot(message): diff --git a/gateway/run.py b/gateway/run.py index b64bd22d66..eaabdcd7e6 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -30,6 +30,8 @@ from pathlib import Path from datetime import datetime from typing import Dict, Optional, Any, List +from agent.account_usage import fetch_account_usage, render_account_usage_lines + # --- Agent cache tuning --------------------------------------------------- # Bounds the per-session AIAgent cache to prevent unbounded growth in # long-lived gateways (each AIAgent holds LLM clients, tool schemas, @@ -279,6 +281,7 @@ from gateway.session import ( build_session_context, build_session_context_prompt, build_session_key, + is_shared_multi_user_session, ) from gateway.delivery import DeliveryRouter from gateway.platforms.base import ( @@ -3791,12 +3794,12 @@ class GatewayRunner: history = history or [] message_text = event.text or "" - _is_shared_thread = ( - source.chat_type != "dm" - and source.thread_id - and not getattr(self.config, "thread_sessions_per_user", False) + _is_shared_multi_user = is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False), ) - if _is_shared_thread and source.user_name: + if _is_shared_multi_user and source.user_name: message_text = f"[{source.user_name}] {message_text}" if event.media_urls: @@ -7263,6 +7266,38 @@ class GatewayRunner: if cached: agent = cached[0] + # Resolve provider/base_url/api_key for the account-usage fetch. + # Prefer the live agent; fall back to persisted billing data on the + # SessionDB row so `/usage` still returns account info between turns + # when no agent is resident. + provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + if not provider and getattr(self, "_session_db", None) is not None: + try: + _entry_for_billing = self.session_store.get_or_create_session(source) + persisted = self._session_db.get_session(_entry_for_billing.session_id) or {} + except Exception: + persisted = {} + provider = provider or persisted.get("billing_provider") + base_url = base_url or persisted.get("billing_base_url") + + # Fetch account usage off the event loop so slow provider APIs don't + # block the gateway. Failures are non-fatal -- account_lines stays []. + account_lines: list[str] = [] + if provider: + try: + account_snapshot = await asyncio.to_thread( + fetch_account_usage, + provider, + base_url=base_url, + api_key=api_key, + ) + except Exception: + account_snapshot = None + if account_snapshot: + account_lines = render_account_usage_lines(account_snapshot, markdown=True) + if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0: lines = [] @@ -7320,6 +7355,10 @@ class GatewayRunner: if ctx.compression_count: lines.append(f"Compressions: {ctx.compression_count}") + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) # No agent at all -- check session history for a rough count @@ -7329,12 +7368,18 @@ class GatewayRunner: from agent.model_metadata import estimate_messages_tokens_rough msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")] approx = estimate_messages_tokens_rough(msgs) - return ( - f"📊 **Session Info**\n" - f"Messages: {len(msgs)}\n" - f"Estimated context: ~{approx:,} tokens\n" - f"_(Detailed usage available after the first agent response)_" - ) + lines = [ + "📊 **Session Info**", + f"Messages: {len(msgs)}", + f"Estimated context: ~{approx:,} tokens", + "_(Detailed usage available after the first agent response)_", + ] + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) + if account_lines: + return "\n".join(account_lines) return "No usage data available for this session." async def _handle_insights_command(self, event: MessageEvent) -> str: @@ -10774,6 +10819,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() + # remove_pid_file() is a no-op when the PID doesn't match. + # Force-unlink to cover the old-process-crashed case. + try: + (get_hermes_home() / "gateway.pid").unlink(missing_ok=True) + except Exception: + pass # Clean up any takeover marker the old process didn't consume # (e.g. SIGKILL'd before its shutdown handler could read it). try: @@ -10912,6 +10963,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = else: logger.info("Skipping signal handlers (not running in main thread).") + # Claim the PID file BEFORE bringing up any platform adapters. + # This closes the --replace race window: two concurrent `gateway run + # --replace` invocations both pass the termination-wait above, but + # only the winner of the O_CREAT|O_EXCL race below will ever open + # Telegram polling, Discord gateway sockets, etc. The loser exits + # cleanly before touching any external service. + import atexit + from gateway.status import write_pid_file, remove_pid_file, get_running_pid + _current_pid = get_running_pid() + if _current_pid is not None and _current_pid != os.getpid(): + logger.error( + "Another gateway instance (PID %d) started during our startup. " + "Exiting to avoid double-running.", _current_pid + ) + return False + try: + write_pid_file() + except FileExistsError: + logger.error( + "PID file race lost to another gateway instance. Exiting." + ) + return False + atexit.register(remove_pid_file) + # Start the gateway success = await runner.start() if not success: @@ -10921,12 +10996,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = logger.error("Gateway exiting cleanly: %s", runner.exit_reason) return True - # Write PID file so CLI can detect gateway is running - import atexit - from gateway.status import write_pid_file, remove_pid_file - write_pid_file() - atexit.register(remove_pid_file) - # Start background cron ticker so scheduled jobs fire automatically. # Pass the event loop so cron delivery can use live adapters (E2EE support). cron_stop = threading.Event() diff --git a/gateway/session.py b/gateway/session.py index 81278e8521..7fc83b0811 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -152,6 +152,7 @@ class SessionContext: source: SessionSource connected_platforms: List[Platform] home_channels: Dict[Platform, HomeChannel] + shared_multi_user_session: bool = False # Session metadata session_key: str = "" @@ -166,6 +167,7 @@ class SessionContext: "home_channels": { p.value: hc.to_dict() for p, hc in self.home_channels.items() }, + "shared_multi_user_session": self.shared_multi_user_session, "session_key": self.session_key, "session_id": self.session_id, "created_at": self.created_at.isoformat() if self.created_at else None, @@ -240,18 +242,16 @@ def build_session_context_prompt( lines.append(f"**Channel Topic:** {context.source.chat_topic}") # User identity. - # In shared thread sessions (non-DM with thread_id), multiple users - # contribute to the same conversation. Don't pin a single user name - # in the system prompt — it changes per-turn and would bust the prompt - # cache. Instead, note that this is a multi-user thread; individual - # sender names are prefixed on each user message by the gateway. - _is_shared_thread = ( - context.source.chat_type != "dm" - and context.source.thread_id - ) - if _is_shared_thread: + # In shared multi-user sessions (shared threads OR shared non-thread groups + # when group_sessions_per_user=False), multiple users contribute to the same + # conversation. Don't pin a single user name in the system prompt — it + # changes per-turn and would bust the prompt cache. Instead, note that + # this is a multi-user session; individual sender names are prefixed on + # each user message by the gateway. + if context.shared_multi_user_session: + session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session" lines.append( - "**Session type:** Multi-user thread — messages are prefixed " + f"**Session type:** {session_label} — messages are prefixed " "with [sender name]. Multiple users may participate." ) elif context.source.user_name: @@ -467,6 +467,27 @@ class SessionEntry: ) +def is_shared_multi_user_session( + source: SessionSource, + *, + group_sessions_per_user: bool = True, + thread_sessions_per_user: bool = False, +) -> bool: + """Return True when a non-DM session is shared across participants. + + Mirrors the isolation rules in :func:`build_session_key`: + - DMs are never shared. + - Threads are shared unless ``thread_sessions_per_user`` is True. + - Non-thread group/channel sessions are shared unless + ``group_sessions_per_user`` is True (default: True = isolated). + """ + if source.chat_type == "dm": + return False + if source.thread_id: + return not thread_sessions_per_user + return not group_sessions_per_user + + def build_session_key( source: SessionSource, group_sessions_per_user: bool = True, @@ -1238,6 +1259,11 @@ def build_session_context( source=source, connected_platforms=connected, home_channels=home_channels, + shared_multi_user_session=is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False), + ), ) if session_entry: diff --git a/gateway/status.py b/gateway/status.py index e1598e1797..74763332c8 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None: def write_pid_file() -> None: - """Write the current process PID and metadata to the gateway PID file.""" - _write_json_file(_get_pid_path(), _build_pid_record()) + """Write the current process PID and metadata to the gateway PID file. + + Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace + invocations race: exactly one process wins and the rest get + FileExistsError. + """ + path = _get_pid_path() + path.parent.mkdir(parents=True, exist_ok=True) + record = json.dumps(_build_pid_record()) + try: + fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) + except FileExistsError: + raise # Let caller decide: another gateway is racing us + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(record) + except Exception: + try: + path.unlink(missing_ok=True) + except OSError: + pass + raise def write_runtime_status( diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 30e5182949..9c33200107 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -152,6 +152,23 @@ def auth_add_command(args) -> None: pool = load_pool(provider) + # Clear ALL suppressions for this provider — re-adding a credential is + # a strong signal the user wants auth re-enabled. This covers env:* + # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli, + # device_code (codex), etc. One consistent re-engagement pattern. + # Matches the Codex device_code re-link pattern that predates this. + if not provider.startswith(CUSTOM_POOL_PREFIX): + try: + from hermes_cli.auth import ( + _load_auth_store, + unsuppress_credential_source, + ) + suppressed = _load_auth_store().get("suppressed_sources", {}) + for src in list(suppressed.get(provider, []) or []): + unsuppress_credential_source(provider, src) + except Exception: + pass + if requested_type == AUTH_TYPE_API_KEY: token = (getattr(args, "api_key", None) or "").strip() if not token: @@ -338,71 +355,28 @@ def auth_remove_command(args) -> None: raise SystemExit(f'No credential matching "{target}" for provider {provider}.') print(f"Removed {provider} credential #{index} ({removed.label})") - # If this was an env-seeded credential, also clear the env var from .env - # so it doesn't get re-seeded on the next load_pool() call. - if removed.source.startswith("env:"): - env_var = removed.source[len("env:"):] - if env_var: - from hermes_cli.config import remove_env_value - cleared = remove_env_value(env_var) - if cleared: - print(f"Cleared {env_var} from .env") + # Unified removal dispatch. Every credential source Hermes reads from + # (env vars, external OAuth files, auth.json blocks, custom config) + # has a RemovalStep registered in agent.credential_sources. The step + # handles its source-specific cleanup and we centralise suppression + + # user-facing output here so every source behaves identically from + # the user's perspective. + from agent.credential_sources import find_removal_step + from hermes_cli.auth import suppress_credential_source - # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce), - # clear the underlying auth store / credential file so it doesn't get - # re-seeded on the next load_pool() call. - elif provider == "openai-codex" and ( - removed.source == "device_code" or removed.source.endswith(":device_code") - ): - # Codex tokens live in TWO places: the Hermes auth store and - # ~/.codex/auth.json (the Codex CLI shared file). On every refresh, - # refresh_codex_oauth_pure() writes to both. So clearing only the - # Hermes auth store is not enough — _seed_from_singletons() will - # auto-import from ~/.codex/auth.json on the next load_pool() and - # the removal is instantly undone. Mark the source as suppressed - # so auto-import is skipped; leave ~/.codex/auth.json untouched so - # the Codex CLI itself keeps working. - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - suppress_credential_source, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - suppress_credential_source(provider, "device_code") - print("Suppressed openai-codex device_code source — it will not be re-seeded.") - print("Note: Codex CLI credentials still live in ~/.codex/auth.json") - print("Run `hermes auth add openai-codex` to re-enable if needed.") + step = find_removal_step(provider, removed.source) + if step is None: + # Unregistered source — e.g. "manual", which has nothing external + # to clean up. The pool entry is already gone; we're done. + return - elif removed.source == "device_code" and provider == "nous": - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - - elif removed.source == "hermes_pkce" and provider == "anthropic": - from hermes_constants import get_hermes_home - oauth_file = get_hermes_home() / ".anthropic_oauth.json" - if oauth_file.exists(): - oauth_file.unlink() - print("Cleared Hermes Anthropic OAuth credentials") - - elif removed.source == "claude_code" and provider == "anthropic": - from hermes_cli.auth import suppress_credential_source - suppress_credential_source(provider, "claude_code") - print("Suppressed claude_code credential — it will not be re-seeded.") - print("Note: Claude Code credentials still live in ~/.claude/.credentials.json") - print("Run `hermes auth add anthropic` to re-enable if needed.") + result = step.remove_fn(provider, removed) + for line in result.cleaned: + print(line) + if result.suppress: + suppress_credential_source(provider, removed.source) + for line in result.hints: + print(line) def auth_reset_command(args) -> None: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 2a1a0f2008..74c27bca94 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -738,6 +738,26 @@ DEFAULT_CONFIG: _DefaultConfig = { # (terminal and execute_code). Skill-declared required_environment_variables # are passed through automatically; this list is for non-skill use cases. "env_passthrough": [], + # Extra files to source in the login shell when building the + # per-session environment snapshot. Use this when tools like nvm, + # pyenv, asdf, or custom PATH entries are registered by files that + # a bash login shell would skip — most commonly ``~/.bashrc`` + # (bash doesn't source bashrc in non-interactive login mode) or + # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``. + # Paths support ``~`` / ``${VAR}``. Missing files are silently + # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the + # snapshot shell is bash (this is the ``auto_source_bashrc`` + # behaviour — disable with that key if you want strict login-only + # semantics). + "shell_init_files": [], + # When true (default), Hermes sources ``~/.bashrc`` in the login + # shell used to build the environment snapshot. This captures + # PATH additions, shell functions, and aliases defined in the + # user's bashrc — which a plain ``bash -l -c`` would otherwise + # miss because bash skips bashrc in non-interactive login mode. + # Turn this off if you have a bashrc that misbehaves when sourced + # non-interactively (e.g. one that hard-exits on TTY checks). + "auto_source_bashrc": True, "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20", "docker_forward_env": [], # Explicit environment variables to set inside Docker containers. @@ -996,6 +1016,7 @@ DEFAULT_CONFIG: _DefaultConfig = { "record_key": "ctrl+b", "max_recording_seconds": 120, "auto_tts": False, + "beep_enabled": True, # Play record start/stop beeps in CLI voice mode "silence_threshold": 200, # RMS below this = silence (0-32767) "silence_duration": 3.0, # Seconds of silence before auto-stop }, @@ -1054,6 +1075,20 @@ DEFAULT_CONFIG: _DefaultConfig = { # always goes to ~/.hermes/skills/. "skills": { "external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"] + # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md + # content with the absolute skill directory and the active session id + # before the agent sees it. Lets skill authors reference bundled + # scripts without the agent having to join paths. + "template_vars": True, + # Pre-execute inline shell snippets written as !`cmd` in SKILL.md + # body. Their stdout is inlined into the skill message before the + # agent reads it, so skills can inject dynamic context (dates, git + # state, detected tool versions, …). Off by default because any + # content from the skill author runs on the host without approval; + # only enable for skill sources you trust. + "inline_shell": False, + # Timeout (seconds) for each !`cmd` snippet when inline_shell is on. + "inline_shell_timeout": 10, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. @@ -1200,7 +1235,7 @@ DEFAULT_CONFIG: _DefaultConfig = { }, # Config schema version - bump this when adding new required fields - "_config_version": 21, + "_config_version": 22, } # ============================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 82b6e9a4be..e4c52cb8aa 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1327,7 +1327,6 @@ def cmd_whatsapp(args): except (EOFError, KeyboardInterrupt): response = "n" if response.lower() in ("y", "yes"): - shutil.rmtree(session_dir, ignore_errors=True) session_dir.mkdir(parents=True, exist_ok=True) print(" ✓ Session cleared") @@ -5213,7 +5212,9 @@ def _install_hangup_protection(gateway_mode: bool = False): # (2) Mirror output to update.log and wrap stdio for broken-pipe # tolerance. Any failure here is non-fatal; we just skip the wrap. try: - logs_dir = get_hermes_home() / "logs" + from hermes_cli.config import get_hermes_home as _get_hermes_home + + logs_dir = _get_hermes_home() / "logs" logs_dir.mkdir(parents=True, exist_ok=True) log_path = logs_dir / "update.log" log_file = open(log_path, "a", buffering=1, encoding="utf-8") diff --git a/hermes_cli/models.py b/hermes_cli/models.py index e8772d246d..33614d4263 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -292,6 +292,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "big-pickle", ], "opencode-go": [ + "kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", @@ -299,6 +300,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", + "qwen3.6-plus", + "qwen3.5-plus", ], "kilocode": [ "anthropic/claude-opus-4.6", @@ -685,6 +688,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool: return False +def _openrouter_model_supports_tools(item: Any) -> bool: + """Return True when the model's ``supported_parameters`` advertise tool calling. + + hermes-agent is tool-calling-first — every provider path assumes the model + can invoke tools. Models that don't advertise ``tools`` in their + ``supported_parameters`` (e.g. image-only or completion-only models) cannot + be driven by the agent loop and would fail at the first tool call. + + **Permissive when the field is missing.** Some OpenRouter-compatible gateways + (Nous Portal, private mirrors, older catalog snapshots) don't populate + ``supported_parameters`` at all. Treat that as "unknown capability → allow" + so the picker doesn't silently empty for those users. Only hide models + whose ``supported_parameters`` is an explicit list that omits ``tools``. + + Ported from Kilo-Org/kilocode#9068. + """ + if not isinstance(item, dict): + return True + params = item.get("supported_parameters") + if not isinstance(params, list): + # Field absent / malformed / None — be permissive. + return True + return "tools" in params + + def fetch_openrouter_models( timeout: float = 8.0, *, @@ -727,6 +755,11 @@ def fetch_openrouter_models( live_item = live_by_id.get(preferred_id) if live_item is None: continue + # Hide models that don't advertise tool-calling support — hermes-agent + # requires it and surfacing them leads to immediate runtime failures + # when the user selects them. Ported from Kilo-Org/kilocode#9068. + if not _openrouter_model_supports_tools(live_item): + continue desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else "" curated.append((preferred_id, desc)) @@ -2393,13 +2426,70 @@ def validate_requested_model( except Exception: pass # Fall through to generic warning + # Static-catalog fallback: when the /models probe was unreachable, + # validate against the curated list from provider_model_ids() — same + # pattern as the openai-codex and minimax branches above. This fixes + # /model switches in the gateway for providers like opencode-go and + # opencode-zen whose /models endpoint returns 404 against the HTML + # marketing site. Without this block, validate_requested_model would + # reject every model on such providers, switch_model() would return + # success=False, and the gateway would never write to + # _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + + if catalog_models: + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9 + ) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5 + ) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join( + f"`{catalog_lower[s]}`" for s in suggestions + ) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in the {provider_label} curated catalog " + f"and the /models endpoint was unreachable.{suggestion_text}" + f"\n The model may still work if it exists on the provider." + ), + } + + # No catalog available — accept with a warning, matching the comment's + # stated intent ("Accept and persist, but warn"). return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Could not reach the {provider_label} API to validate `{requested}`. " + f"Note: could not reach the {provider_label} API to validate `{requested}`. " f"If the service isn't down, this model may not be valid." ), } diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index a4883b056b..78181aab2b 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status from hermes_cli.config import get_env_value, load_config from tools.managed_tool_gateway import is_managed_tool_gateway_ready from tools.tool_backend_helpers import ( + fal_key_is_configured, has_direct_modal_credentials, managed_nous_tools_enabled, normalize_browser_cloud_provider, @@ -271,7 +272,7 @@ def get_nous_subscription_features( direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) direct_tavily = bool(get_env_value("TAVILY_API_KEY")) - direct_fal = bool(get_env_value("FAL_KEY")) + direct_fal = fal_key_is_configured() direct_openai_tts = bool(resolve_openai_audio_api_key()) direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY")) direct_camofox = bool(get_env_value("CAMOFOX_URL")) @@ -520,7 +521,7 @@ def apply_nous_managed_defaults( browser_cfg["cloud_provider"] = "browser-use" changed.add("browser") - if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"): + if "image_gen" in selected_toolsets and not fal_key_is_configured(): changed.add("image_gen") return changed @@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]: or get_env_value("TAVILY_API_KEY") or get_env_value("EXA_API_KEY") ), - "image_gen": bool(get_env_value("FAL_KEY")), + "image_gen": fal_key_is_configured(), "tts": bool( resolve_openai_audio_api_key() or get_env_value("ELEVENLABS_API_KEY") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index fd28f51368..62f1407cc7 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -492,8 +492,12 @@ def _resolve_openrouter_runtime( else: # Custom endpoint: use api_key from config when using config base_url (#1760). # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's - # the canonical env var for ollama.com authentication. - _is_ollama_url = "ollama.com" in base_url.lower() + # the canonical env var for ollama.com authentication. Match on + # HOST, not substring — a custom base_url whose path contains + # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose + # hostname is a look-alike (ollama.com.attacker.test) must not + # receive the Ollama credential. See GHSA-76xc-57q6-vm5m. + _is_ollama_url = base_url_host_matches(base_url, "ollama.com") api_key_candidates = [ explicit_api_key, (cfg_api_key if use_config_base_url else ""), diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 3c00fa4f0f..d7eb7b734a 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -102,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = { "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"], + "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", @@ -441,6 +441,16 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (NeuTTS local)", True, None)) else: tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'")) + elif tts_provider == "kittentts": + try: + import importlib.util + kittentts_ok = importlib.util.find_spec("kittentts") is not None + except Exception: + kittentts_ok = False + if kittentts_ok: + tool_status.append(("Text-to-Speech (KittenTTS local)", True, None)) + else: + tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'")) else: tool_status.append(("Text-to-Speech (Edge TTS)", True, None)) @@ -901,6 +911,31 @@ def _install_neutts_deps() -> bool: return False +def _install_kittentts_deps() -> bool: + """Install KittenTTS dependencies with user approval. Returns True on success.""" + import subprocess + import sys + + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + print() + print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...") + print() + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + check=True, timeout=300, + ) + print_success("kittentts installed successfully") + return True + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + print_error(f"Failed to install kittentts: {e}") + print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile") + return False + + def _setup_tts_provider(config: dict): """Interactive TTS provider selection with install flow for NeuTTS.""" tts_config = config.get("tts", {}) @@ -916,6 +951,7 @@ def _setup_tts_provider(config: dict): "mistral": "Mistral Voxtral TTS", "gemini": "Google Gemini TTS", "neutts": "NeuTTS", + "kittentts": "KittenTTS", } current_label = provider_labels.get(current_provider, current_provider) @@ -939,9 +975,10 @@ def _setup_tts_provider(config: dict): "Mistral Voxtral TTS (multilingual, native Opus, needs API key)", "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)", "NeuTTS (local on-device, free, ~300MB model download)", + "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)", ] ) - providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"]) + providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"]) choices.append(f"Keep current ({current_label})") keep_current_idx = len(choices) - 1 idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) @@ -1060,6 +1097,29 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" + elif selected == "kittentts": + # Check if already installed + try: + import importlib.util + already_installed = importlib.util.find_spec("kittentts") is not None + except Exception: + already_installed = False + + if already_installed: + print_success("KittenTTS is already installed") + else: + print() + print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).") + print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + print() + if prompt_yes_no("Install KittenTTS now?", True): + if not _install_kittentts_deps(): + print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.") + selected = "edge" + else: + print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.") + selected = "edge" + # Save the selection if "tts" not in config: config["tts"] = {} diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 632c710121..f91a0e037b 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import ( apply_nous_managed_defaults, get_nous_subscription_features, ) -from tools.tool_backend_helpers import managed_nous_tools_enabled +from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled from utils import base_url_hostname logger = logging.getLogger(__name__) @@ -182,6 +182,14 @@ TOOL_CATEGORIES = { ], "tts_provider": "gemini", }, + { + "name": "KittenTTS", + "badge": "local · free", + "tag": "Lightweight local ONNX TTS (~25MB), no API key", + "env_vars": [], + "tts_provider": "kittentts", + "post_setup": "kittentts", + }, ], }, "web": { @@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str): _print_warning(" Node.js not found. Install Camofox via Docker:") _print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser") + elif post_setup_key == "kittentts": + try: + __import__("kittentts") + _print_success(" kittentts is already installed") + return + except ImportError: + pass + import subprocess + _print_info(" Installing kittentts (~25-80MB model, CPU-only)...") + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + capture_output=True, text=True, timeout=300, + ) + if result.returncode == 0: + _print_success(" kittentts installed") + _print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + _print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)") + else: + _print_warning(" kittentts install failed:") + _print_info(f" {result.stderr.strip()[:300]}") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + except subprocess.TimeoutExpired: + _print_warning(" kittentts install timed out (>5min)") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") @@ -833,7 +871,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool: browser_cfg = config.get("browser", {}) return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg if ts_key == "image_gen": - return not get_env_value("FAL_KEY") + return not fal_key_is_configured() return not _toolset_has_keys(ts_key, config) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index fe6b979e44..6cf1199253 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -114,6 +114,91 @@ def _require_token(request: Request) -> None: raise HTTPException(status_code=401, detail="Unauthorized") +# Accepted Host header values for loopback binds. DNS rebinding attacks +# point a victim browser at an attacker-controlled hostname (evil.test) +# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin +# checks because the browser now considers evil.test and our dashboard +# "same origin". Validating the Host header at the app layer rejects any +# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7. +_LOOPBACK_HOST_VALUES: frozenset = frozenset({ + "localhost", "127.0.0.1", "::1", +}) + + +def _is_accepted_host(host_header: str, bound_host: str) -> bool: + """True if the Host header targets the interface we bound to. + + Accepts: + - Exact bound host (with or without port suffix) + - Loopback aliases when bound to loopback + - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback, + no protection possible at this layer) + """ + if not host_header: + return False + # Strip port suffix. IPv6 addresses use bracket notation: + # [::1] — no port + # [::1]:9119 — with port + # Plain hosts/v4: + # localhost:9119 + # 127.0.0.1:9119 + h = host_header.strip() + if h.startswith("["): + # IPv6 bracketed — port (if any) follows "]:" + close = h.find("]") + if close != -1: + host_only = h[1:close] # strip brackets + else: + host_only = h.strip("[]") + else: + host_only = h.rsplit(":", 1)[0] if ":" in h else h + host_only = host_only.lower() + + # 0.0.0.0 bind means operator explicitly opted into all-interfaces + # (requires --insecure per web_server.start_server). No Host-layer + # defence can protect that mode; rely on operator network controls. + if bound_host in ("0.0.0.0", "::"): + return True + + # Loopback bind: accept the loopback names + bound_lc = bound_host.lower() + if bound_lc in _LOOPBACK_HOST_VALUES: + return host_only in _LOOPBACK_HOST_VALUES + + # Explicit non-loopback bind: require exact host match + return host_only == bound_lc + + +@app.middleware("http") +async def host_header_middleware(request: Request, call_next): + """Reject requests whose Host header doesn't match the bound interface. + + Defends against DNS rebinding: a victim browser on a localhost + dashboard is tricked into fetching from an attacker hostname that + TTL-flips to 127.0.0.1. CORS and same-origin checks don't help — + the browser now treats the attacker origin as same-origin with the + dashboard. Host-header validation at the app layer catches it. + + See GHSA-ppp5-vxwm-4cf7. + """ + # Store the bound host on app.state so this middleware can read it — + # set by start_server() at listen time. + bound_host = getattr(app.state, "bound_host", None) + if bound_host: + host_header = request.headers.get("host", "") + if not _is_accepted_host(host_header, bound_host): + return JSONResponse( + status_code=400, + content={ + "detail": ( + "Invalid Host header. Dashboard requests must use " + "the hostname the server was bound to." + ), + }, + ) + return await call_next(request) + + @app.middleware("http") async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" @@ -2323,6 +2408,10 @@ def start_server( "authentication. Only use on trusted networks.", host, ) + # Record the bound host so host_header_middleware can validate incoming + # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7). + app.state.bound_host = host + if open_browser: import webbrowser diff --git a/optional-skills/dogfood/DESCRIPTION.md b/optional-skills/dogfood/DESCRIPTION.md new file mode 100644 index 0000000000..f083fd72bd --- /dev/null +++ b/optional-skills/dogfood/DESCRIPTION.md @@ -0,0 +1,3 @@ +# Dogfood — Advanced QA & Testing Skills + +Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses. diff --git a/optional-skills/dogfood/adversarial-ux-test/SKILL.md b/optional-skills/dogfood/adversarial-ux-test/SKILL.md new file mode 100644 index 0000000000..1777e083d1 --- /dev/null +++ b/optional-skills/dogfood/adversarial-ux-test/SKILL.md @@ -0,0 +1,190 @@ +--- +name: adversarial-ux-test +description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only. +version: 1.0.0 +author: Omni @ Comelse +license: MIT +metadata: + hermes: + tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing] + related_skills: [dogfood] +--- + +# Adversarial UX Test + +Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise. + +Think of it as an automated "mom test" — but angry. + +## Why This Works + +Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches: +- Confusing terminology that makes sense to developers but not users +- Too many steps to accomplish basic tasks +- Missing onboarding or "aha moments" +- Accessibility issues (font size, contrast, click targets) +- Cold-start problems (empty states, no demo content) +- Paywall/signup friction that kills conversion + +The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs. + +## How to Use + +Tell the agent: +``` +"Run an adversarial UX test on [URL]" +"Be a grumpy [persona type] and test [app name]" +"Do an asshole user test on my staging site" +``` + +You can provide a persona or let the agent generate one based on your product's target audience. + +## Step 1: Define the Persona + +If no persona is provided, generate one by answering: + +1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way") +2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email) +3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list) +4. **What would make them give up?** (too many clicks, jargon, slow, confusing) +5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing) + +### Good Persona Example +> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords. + +### Bad Persona Example +> "A user who doesn't like the app" — too vague, no constraints, no voice. + +The persona must be **specific enough to stay in character** for 20 minutes of testing. + +## Step 2: Become the Asshole (Browse as the Persona) + +1. Read any available project docs for app context and URLs +2. **Fully inhabit the persona** — their frustrations, limitations, goals +3. Navigate to the app using browser tools +4. **Attempt the persona's ACTUAL TASKS** (not a feature tour): + - Can they do what they came to do? + - How many clicks/screens to accomplish it? + - What confuses them? + - What makes them angry? + - Where do they get lost? + - What would make them give up and go back to their old way? + +5. Test these friction categories: + - **First impression** — would they even bother past the landing page? + - **Core workflow** — the ONE thing they need to do most often + - **Error recovery** — what happens when they do something wrong? + - **Readability** — text size, contrast, information density + - **Speed** — does it feel faster than their current method? + - **Terminology** — any jargon they wouldn't understand? + - **Navigation** — can they find their way back? do they know where they are? + +6. Take screenshots of every pain point +7. Check browser console for JS errors on every page + +## Step 3: The Rant (Write Feedback in Character) + +Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting. + +``` +[PERSONA NAME]'s Review of [PRODUCT] + +Overall: [Would they keep using it? Yes/No/Maybe with conditions] + +THE GOOD (grudging admission): +- [things even they have to admit work] + +THE BAD (legitimate UX issues): +- [real problems that would stop them from using the product] + +THE UGLY (showstoppers): +- [things that would make them uninstall/cancel immediately] + +SPECIFIC COMPLAINTS: +1. [Page/feature]: "[quote in persona voice]" — [what happened, expected] +2. ... + +VERDICT: "[one-line persona quote summarizing their experience]" +``` + +## Step 4: The Pragmatism Filter (Critical — Do Not Skip) + +Step OUT of the persona. Evaluate each complaint as a product person: + +- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it. +- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it. +- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it. +- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it. + +### Filter Criteria +1. Would a 35-year-old competent-but-busy user have the same complaint? → RED +2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED +3. Is this "I want it to work like paper" resistance to digital? → WHITE +4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED +5. Would fixing this add complexity for the 80% who are fine? → WHITE +6. Does the complaint reveal a missing onboarding moment? → GREEN + +**This filter is MANDATORY.** Never ship raw persona complaints as tickets. + +## Step 5: Create Tickets + +For **RED** and **GREEN** items only: +- Clear, actionable title +- Include the persona's verbatim quote (entertaining + memorable) +- The real UX issue underneath (objective) +- A suggested fix (actionable) +- Tag/label: "ux-review" + +For **YELLOW** items: one catch-all ticket with all notes. + +**WHITE** items appear in the report only. No tickets. + +**Max 10 tickets per session** — focus on the worst issues. + +## Step 6: Report + +Deliver: +1. The persona rant (Step 3) — entertaining and visceral +2. The filtered assessment (Step 4) — pragmatic and actionable +3. Tickets created (Step 5) — with links +4. Screenshots of key issues + +## Tips + +- **One persona per session.** Don't mix perspectives. +- **Stay in character during Steps 2-3.** Break character only at Step 4. +- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages. +- **Empty states are gold.** New user experience reveals the most friction. +- **The best findings are RED items the persona found accidentally** while trying to do something else. +- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways. +- **Run this before demos, launches, or after shipping a batch of features.** +- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives. +- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona. +- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain. +- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage. +- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level. + +## Example Personas by Industry + +These are starting points — customize for your specific product: + +| Product Type | Persona | Age | Key Trait | +|-------------|---------|-----|-----------| +| CRM | Retirement home director | 68 | Filing cabinet is the current CRM | +| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper | +| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups | +| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes | +| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions | +| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls | +| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer | +| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders | + +## Rules + +- Stay in character during Steps 2-3 +- Be genuinely mean but fair — find real problems, not manufactured ones +- The pragmatism filter (Step 4) is **MANDATORY** +- Screenshots required for every complaint +- Max 10 tickets per session +- Test on staging/deployed app, not local dev +- One persona, one session, one report diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 96f48e77f5..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -# NOTE: This file is maintained for convenience only. -# The canonical dependency list is in pyproject.toml. -# Preferred install: pip install -e ".[all]" - -# Core dependencies -openai -python-dotenv -fire -httpx -rich -tenacity -prompt_toolkit -pyyaml -requests -jinja2 -pydantic>=2.0 -PyJWT[crypto] -debugpy - -# Web tools -firecrawl-py -parallel-web>=0.4.2 - -# Image generation -fal-client - -# Text-to-speech (Edge TTS is free, no API key needed) -edge-tts - -# Optional: For cron expression parsing (cronjob scheduling) -croniter - -# Optional: For messaging platform integrations (gateway) -python-telegram-bot[webhooks]>=22.6 -discord.py>=2.0 -aiohttp>=3.9.0 diff --git a/run_agent.py b/run_agent.py index 77b0b6c84e..a5d22f75c4 100644 --- a/run_agent.py +++ b/run_agent.py @@ -127,7 +127,7 @@ from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled +from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url @@ -190,7 +190,7 @@ def _get_proxy_from_env() -> Optional[str]: "https_proxy", "http_proxy", "all_proxy"): value = os.environ.get(key, "").strip() if value: - return value + return normalize_proxy_url(value) return None @@ -2358,6 +2358,13 @@ class AIAgent: cost reduction as direct Anthropic callers, provided their gateway implements the Anthropic cache_control contract (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + + Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct + Alibaba (DashScope) also honour Anthropic-style ``cache_control`` + markers on OpenAI-wire chat completions. Upstream pi-mono #3392 / + pi #3393 documented this for opencode-go Qwen. Without markers + these providers serve zero cache hits, re-billing the full prompt + on every turn. """ eff_provider = (provider if provider is not None else self.provider) or "" eff_base_url = base_url if base_url is not None else (self.base_url or "") @@ -2365,7 +2372,9 @@ class AIAgent: eff_model = (model if model is not None else self.model) or "" base_lower = eff_base_url.lower() - is_claude = "claude" in eff_model.lower() + model_lower = eff_model.lower() + provider_lower = eff_provider.lower() + is_claude = "claude" in model_lower is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai") is_anthropic_wire = eff_api_mode == "anthropic_messages" is_native_anthropic = ( @@ -2380,6 +2389,22 @@ class AIAgent: if is_anthropic_wire and is_claude: # Third-party Anthropic-compatible gateway. return True, True + + # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire + # transport that accepts Anthropic-style cache_control markers and + # rewards them with real cache hits. Without this branch + # qwen3.6-plus on opencode-go reports 0% cached tokens and burns + # through the subscription on every turn. + model_is_qwen = "qwen" in model_lower + provider_is_alibaba_family = provider_lower in { + "opencode", "opencode-zen", "opencode-go", "alibaba", + } + if provider_is_alibaba_family and model_is_qwen: + # Envelope layout (native_anthropic=False): markers on inner + # content parts, not top-level tool messages. Matches + # pi-mono's "alibaba" cacheControlFormat. + return True, False + return False, False @staticmethod @@ -6126,8 +6151,9 @@ class AIAgent: fb_base_url_hint = (fb.get("base_url") or "").strip() or None fb_api_key_hint = (fb.get("api_key") or "").strip() or None # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env - # when no explicit key is in the fallback config. - if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint: + # when no explicit key is in the fallback config. Host match + # (not substring) — see GHSA-76xc-57q6-vm5m. + if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint: fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None fb_client, _resolved_fb_model = resolve_provider_client( fb_provider, model=fb_model, raw_codex=True, @@ -6548,6 +6574,15 @@ class AIAgent: return suffix return "[A multimodal message was converted to text for Anthropic compatibility.]" + def _get_anthropic_transport(self): + """Return the cached AnthropicTransport instance (lazy singleton).""" + t = getattr(self, "_anthropic_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("anthropic_messages") + self._anthropic_transport = t + return t + def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list: if not any( isinstance(msg, dict) and self._content_has_image_parts(msg.get("content")) @@ -6664,20 +6699,14 @@ class AIAgent: def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs + _transport = self._get_anthropic_transport() anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) - # Pass context_length (total input+output window) so the adapter can - # clamp max_tokens (output cap) when the user configured a smaller - # context window than the model's native output limit. ctx_len = getattr(self, "context_compressor", None) ctx_len = ctx_len.context_length if ctx_len else None - # _ephemeral_max_output_tokens is set for one call when the API - # returns "max_tokens too large given prompt" — it caps output to - # the available window space without touching context_length. ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) if ephemeral_out is not None: self._ephemeral_max_output_tokens = None # consume immediately - return build_anthropic_kwargs( + return _transport.build_kwargs( model=self.model, messages=anthropic_messages, tools=self.tools, @@ -6909,6 +6938,34 @@ class AIAgent: # (the documented max output for qwen3-coder models) so the # model has adequate output budget for tool calls. api_kwargs.update(self._max_tokens_param(65536)) + elif ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ): + # Kimi/Moonshot defaults to a low max_tokens when omitted. + # Reasoning tokens share the output budget — without an explicit + # value the model can exhaust it on thinking alone, causing + # "Response truncated due to output length limit". 32000 matches + # Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()). + api_kwargs.update(self._max_tokens_param(32000)) + # Kimi requires reasoning_effort as a top-level chat completions + # parameter (not inside extra_body). Mirror Kimi CLI's + # with_generation_kwargs(reasoning_effort=...) / with_thinking(): + # when thinking is disabled, Kimi CLI omits reasoning_effort + # entirely (maps to None). + _kimi_thinking_off = bool( + self.reasoning_config + and isinstance(self.reasoning_config, dict) + and self.reasoning_config.get("enabled") is False + ) + if not _kimi_thinking_off: + _kimi_effort = "medium" + if self.reasoning_config and isinstance(self.reasoning_config, dict): + _e = (self.reasoning_config.get("effort") or "").strip().lower() + if _e in ("low", "medium", "high"): + _kimi_effort = _e + api_kwargs["reasoning_effort"] = _kimi_effort elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): # OpenRouter and Nous Portal translate requests to Anthropic's # Messages API, which requires max_tokens as a mandatory field. @@ -6940,6 +6997,24 @@ class AIAgent: extra_body["provider"] = provider_preferences _is_nous = "nousresearch" in self._base_url_lower + # Kimi/Moonshot API uses extra_body.thinking (separate from the + # top-level reasoning_effort) to enable/disable reasoning mode. + # Mirror Kimi CLI's with_thinking() behavior exactly — see + # MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py + _is_kimi = ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + if _is_kimi: + _kimi_thinking_enabled = True + if self.reasoning_config and isinstance(self.reasoning_config, dict): + if self.reasoning_config.get("enabled") is False: + _kimi_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _kimi_thinking_enabled else "disabled", + } + if self._supports_reasoning_extra_body(): if _is_github_models: github_reasoning = self._github_models_reasoning_extra_body() @@ -7362,9 +7437,9 @@ class AIAgent: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) elif not _aux_available and self.api_mode == "anthropic_messages": - # Native Anthropic — use the Anthropic client directly - from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs - ant_kwargs = _build_ant_kwargs( + # Native Anthropic — use the transport for kwargs + _tflush = self._get_anthropic_transport() + ant_kwargs = _tflush.build_kwargs( model=self.model, messages=api_messages, tools=[memory_tool_def], max_tokens=5120, reasoning_config=None, @@ -7392,10 +7467,15 @@ class AIAgent: if assistant_msg and assistant_msg.tool_calls: tool_calls = assistant_msg.tool_calls elif self.api_mode == "anthropic_messages" and not _aux_available: - from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush - _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth) - if _flush_msg and _flush_msg.tool_calls: - tool_calls = _flush_msg.tool_calls + _tfn = self._get_anthropic_transport() + _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) + if _flush_nr and _flush_nr.tool_calls: + tool_calls = [ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in _flush_nr.tool_calls + ] elif hasattr(response, "choices") and response.choices: assistant_message = response.choices[0].message if assistant_message.tool_calls: @@ -8455,14 +8535,14 @@ class AIAgent: summary_kwargs["extra_body"] = summary_extra_body if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar - _ant_kw = _bak(model=self.model, messages=api_messages, tools=None, + _tsum = self._get_anthropic_transport() + _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots()) summary_response = self._anthropic_messages_create(_ant_kw) - _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_msg.content or "").strip() + _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_sum_nr.content or "").strip() else: summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs) @@ -8487,14 +8567,14 @@ class AIAgent: retry_msg, _ = self._normalize_codex_response(retry_response) final_response = (retry_msg.content or "").strip() if retry_msg else "" elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2 - _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None, + _tretry = self._get_anthropic_transport() + _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None, is_oauth=self._is_anthropic_oauth, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, preserve_dots=self._anthropic_preserve_dots()) retry_response = self._anthropic_messages_create(_ant_kw2) - _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_retry_msg.content or "").strip() + _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_retry_nr.content or "").strip() else: summary_kwargs = { "model": self.model, @@ -9363,16 +9443,13 @@ class AIAgent: response_invalid = True error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": - content_blocks = getattr(response, "content", None) if response is not None else None - if response is None: + _tv = self._get_anthropic_transport() + if not _tv.validate_response(response): response_invalid = True - error_details.append("response is None") - elif not isinstance(content_blocks, list): - response_invalid = True - error_details.append("response.content is not a list") - elif not content_blocks: - response_invalid = True - error_details.append("response.content is empty") + if response is None: + error_details.append("response is None") + else: + error_details.append("response.content invalid (not a non-empty list)") else: if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices: response_invalid = True @@ -9533,8 +9610,8 @@ class AIAgent: else: finish_reason = "stop" elif self.api_mode == "anthropic_messages": - stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"} - finish_reason = stop_reason_map.get(response.stop_reason, "stop") + _tfr = self._get_anthropic_transport() + finish_reason = _tfr.map_finish_reason(response.stop_reason) else: finish_reason = response.choices[0].finish_reason assistant_message = response.choices[0].message @@ -9563,10 +9640,24 @@ class AIAgent: if self.api_mode in ("chat_completions", "bedrock_converse"): _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response - _trunc_msg, _ = normalize_anthropic_response( + _trunc_nr = self._get_anthropic_transport().normalize_response( response, strip_tool_prefix=self._is_anthropic_oauth ) + _trunc_msg = SimpleNamespace( + content=_trunc_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in (_trunc_nr.tool_calls or []) + ] or None, + reasoning=_trunc_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _trunc_nr.provider_data.get("reasoning_details") + if _trunc_nr.provider_data else None + ), + ) _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False @@ -9822,21 +9913,27 @@ class AIAgent: if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") - # Log cache hit stats when prompt caching is active - if self._use_prompt_caching: - if self.api_mode == "anthropic_messages": - # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens - cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0 - written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0 - else: - # OpenRouter uses prompt_tokens_details.cached_tokens - details = getattr(response.usage, 'prompt_tokens_details', None) - cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 - written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 - prompt = usage_dict["prompt_tokens"] + # Surface cache hit stats for any provider that reports + # them — not just those where we inject cache_control + # markers. OpenAI/Kimi/DeepSeek/Qwen all do automatic + # server-side prefix caching and return + # ``prompt_tokens_details.cached_tokens``; users + # previously could not see their cache % because this + # line was gated on ``_use_prompt_caching``, which is + # only True for Anthropic-style marker injection. + # ``canonical_usage`` is already normalised from all + # three API shapes (Anthropic / Codex / OpenAI-chat) + # so we can rely on its values directly. + cached = canonical_usage.cache_read_tokens + written = canonical_usage.cache_write_tokens + prompt = usage_dict["prompt_tokens"] + if (cached or written) and not self.quiet_mode: hit_pct = (cached / prompt * 100) if prompt > 0 else 0 - if not self.quiet_mode: - self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") + self._vprint( + f"{self.log_prefix} 💾 Cache: " + f"{cached:,}/{prompt:,} tokens " + f"({hit_pct:.0f}% hit, {written:,} written)" + ) has_retried_429 = False # Reset on success # Clear Nous rate limit state on successful request — @@ -10772,10 +10869,31 @@ class AIAgent: if self.api_mode == "codex_responses": assistant_message, finish_reason = self._normalize_codex_response(response) elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response - assistant_message, finish_reason = normalize_anthropic_response( + _transport = self._get_anthropic_transport() + _nr = _transport.normalize_response( response, strip_tool_prefix=self._is_anthropic_oauth ) + # Back-compat shim: downstream code expects SimpleNamespace with + # .content, .tool_calls, .reasoning, .reasoning_content, + # .reasoning_details attributes. + assistant_message = SimpleNamespace( + content=_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, + type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) + for tc in (_nr.tool_calls or []) + ] or None, + reasoning=_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _nr.provider_data.get("reasoning_details") + if _nr.provider_data else None + ), + ) + finish_reason = _nr.finish_reason else: assistant_message = response.choices[0].message diff --git a/scripts/release.py b/scripts/release.py index 68768992d8..f9b247e077 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -56,6 +56,8 @@ AUTHOR_MAP = { "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", "valdi.jorge@gmail.com": "jvcl", + "francip@gmail.com": "francip", + "omni@comelse.com": "omnissiah-comelse", "oussama.redcode@gmail.com": "mavrickdeveloper", "126368201+vilkasdev@users.noreply.github.com": "vilkasdev", "137614867+cutepawss@users.noreply.github.com": "cutepawss", @@ -95,20 +97,24 @@ AUTHOR_MAP = { "i@troy-y.org": "TroyMitchell911", "mygamez@163.com": "zhongyueming1121", "hansnow@users.noreply.github.com": "hansnow", + "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", "dmayhem93@gmail.com": "dmahan93", + "fr@tecompanytea.com": "ifrederico", "cdanis@gmail.com": "cdanis", "samherring99@gmail.com": "samherring99", "desaiaum08@gmail.com": "Aum08Desai", "shannon.sands.1979@gmail.com": "shannonsands", "shannon@nousresearch.com": "shannonsands", + "abdi.moya@gmail.com": "AxDSan", "eri@plasticlabs.ai": "Erosika", "hjcpuro@gmail.com": "hjc-puro", "xaydinoktay@gmail.com": "aydnOktay", "abdullahfarukozden@gmail.com": "Farukest", "lovre.pesut@gmail.com": "rovle", + "xjtumj@gmail.com": "mengjian-github", "kevinskysunny@gmail.com": "kevinskysunny", "xiewenxuan462@gmail.com": "yule975", "yiweimeng.dlut@hotmail.com": "meng93", @@ -308,6 +314,7 @@ AUTHOR_MAP = { "anthhub@163.com": "anthhub", "shenuu@gmail.com": "shenuu", "xiayh17@gmail.com": "xiayh0107", + "zhujianxyz@gmail.com": "opriz", "asurla@nvidia.com": "anniesurla", "limkuan24@gmail.com": "WideLee", "aviralarora002@gmail.com": "AviArora02-commits", @@ -323,6 +330,8 @@ AUTHOR_MAP = { "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80", "zheng.jerilyn@gmail.com": "jerilynzheng", "asslaenn5@gmail.com": "Aslaaen", + "shalompmc0505@naver.com": "pinion05", + "105142614+VTRiot@users.noreply.github.com": "VTRiot", } diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 401651c8a8..d1aeb73722 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -372,6 +372,37 @@ async function startSocket() { const app = express(); app.use(express.json()); +// Host-header validation — defends against DNS rebinding. +// The bridge binds loopback-only (127.0.0.1) but a victim browser on +// the same machine could be tricked into fetching from an attacker +// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host +// header doesn't resolve to a loopback alias. +// See GHSA-ppp5-vxwm-4cf7. +const _ACCEPTED_HOST_VALUES = new Set([ + 'localhost', + '127.0.0.1', + '[::1]', + '::1', +]); + +app.use((req, res, next) => { + const raw = (req.headers.host || '').trim(); + if (!raw) { + return res.status(400).json({ error: 'Missing Host header' }); + } + // Strip port suffix: "localhost:3000" → "localhost" + const hostOnly = (raw.includes(':') + ? raw.substring(0, raw.lastIndexOf(':')) + : raw + ).replace(/^\[|\]$/g, '').toLowerCase(); + if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) { + return res.status(400).json({ + error: 'Invalid Host header. Bridge accepts loopback hosts only.', + }); + } + next(); +}); + // Poll for new messages (long-poll style) app.get('/messages', (req, res) => { const msgs = messageQueue.splice(0, messageQueue.length); diff --git a/skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md index 9eded20866..d93692a4a6 100644 --- a/skills/productivity/maps/SKILL.md +++ b/skills/productivity/maps/SKILL.md @@ -2,7 +2,7 @@ name: maps description: > Location intelligence — geocode a place, reverse-geocode coordinates, - find nearby places (44 POI categories), driving/walking/cycling + find nearby places (46 POI categories), driving/walking/cycling distance + time, turn-by-turn directions, timezone lookup, bounding box + area for a named place, and POI search within a rectangle. Uses OpenStreetMap + Overpass + OSRM. Free, no API key. @@ -83,12 +83,13 @@ python3 $MAPS nearby --near "90210" --category pharmacy python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10 ``` -44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket, -atm, gas_station, parking, museum, park, school, university, bank, police, -fire_station, library, airport, train_station, bus_stop, church, mosque, -synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office, -convenience_store, bakery, bookshop, laundry, car_wash, car_rental, -bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub. +46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house, +camp_site, supermarket, atm, gas_station, parking, museum, park, school, +university, bank, police, fire_station, library, airport, train_station, +bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym, +swimming_pool, post_office, convenience_store, bakery, bookshop, laundry, +car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground, +stadium, nightclub. Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`, `maps_url` (clickable Google Maps link), `directions_url` (Google Maps diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py index db0de82d6d..06d775e824 100644 --- a/skills/productivity/maps/scripts/maps_client.py +++ b/skills/productivity/maps/scripts/maps_client.py @@ -58,7 +58,9 @@ CATEGORY_TAGS = { "restaurant": ("amenity", "restaurant"), "cafe": ("amenity", "cafe"), "bar": ("amenity", "bar"), - "bakery": ("shop", "bakery"), + # bakery is tagged as shop=bakery in the OSM wiki, but some mappers use + # amenity=bakery. Search both so small indie bakeries aren't missed. + "bakery": [("shop", "bakery"), ("amenity", "bakery")], "convenience_store": ("shop", "convenience"), # Health "hospital": ("amenity", "hospital"), @@ -68,6 +70,8 @@ CATEGORY_TAGS = { "veterinary": ("amenity", "veterinary"), # Accommodation "hotel": ("tourism", "hotel"), + "guest_house": ("tourism", "guest_house"), + "camp_site": ("tourism", "camp_site"), # Shopping & Services "supermarket": ("shop", "supermarket"), "bookshop": ("shop", "books"), @@ -120,6 +124,19 @@ RELIGION_FILTER = { VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys()) + +def _tags_for(category): + """Return the CATEGORY_TAGS entry as a list of (key, value) pairs. + + Most categories map to a single (tag_key, tag_val) tuple, but some + (e.g. ``bakery``) are tagged under more than one OSM key and are + represented as a list of tuples. Normalise both forms to a list. + """ + entry = CATEGORY_TAGS[category] + if isinstance(entry, list): + return list(entry) + return [entry] + OSRM_PROFILES = { "driving": "driving", "walking": "foot", @@ -338,36 +355,63 @@ def geocode_single(query): # --------------------------------------------------------------------------- def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, - religion=None): - """Build an Overpass QL query for nearby POIs around a point.""" + religion=None, tag_pairs=None): + """Build an Overpass QL query for nearby POIs around a point. + + If ``tag_pairs`` is provided, the query unions across every + ``(key, value)`` pair (used for categories like ``bakery`` that are + tagged under more than one OSM key). Otherwise falls back to the + single ``tag_key``/``tag_val`` pair for back-compat. + """ + pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)] religion_filter = "" if religion: religion_filter = f'["religion"="{religion}"]' + body_lines = [] + for k, v in pairs: + body_lines.append( + f' node["{k}"="{v}"]{religion_filter}' + f'(around:{radius},{lat},{lon});' + ) + body_lines.append( + f' way["{k}"="{v}"]{religion_filter}' + f'(around:{radius},{lat},{lon});' + ) + body = "\n".join(body_lines) return ( f'[out:json][timeout:25];\n' f'(\n' - f' node["{tag_key}"="{tag_val}"]{religion_filter}' - f'(around:{radius},{lat},{lon});\n' - f' way["{tag_key}"="{tag_val}"]{religion_filter}' - f'(around:{radius},{lat},{lon});\n' + f'{body}\n' f');\n' f'out center {limit};\n' ) def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit, - religion=None): - """Build an Overpass QL query for POIs within a bounding box.""" + religion=None, tag_pairs=None): + """Build an Overpass QL query for POIs within a bounding box. + + See ``build_overpass_nearby`` for ``tag_pairs`` semantics. + """ + pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)] religion_filter = "" if religion: religion_filter = f'["religion"="{religion}"]' + body_lines = [] + for k, v in pairs: + body_lines.append( + f' node["{k}"="{v}"]{religion_filter}' + f'({south},{west},{north},{east});' + ) + body_lines.append( + f' way["{k}"="{v}"]{religion_filter}' + f'({south},{west},{north},{east});' + ) + body = "\n".join(body_lines) return ( f'[out:json][timeout:25];\n' f'(\n' - f' node["{tag_key}"="{tag_val}"]{religion_filter}' - f'({south},{west},{north},{east});\n' - f' way["{tag_key}"="{tag_val}"]{religion_filter}' - f'({south},{west},{north},{east});\n' + f'{body}\n' f');\n' f'out center {limit};\n' ) @@ -605,10 +649,10 @@ def cmd_nearby(args): # appear twice. merged = {} for category in categories: - tag_key, tag_val = CATEGORY_TAGS[category] + tag_pairs = _tags_for(category) religion = RELIGION_FILTER.get(category) - query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, - religion=religion) + query = build_overpass_nearby(None, None, lat, lon, radius, limit, + religion=religion, tag_pairs=tag_pairs) raw = overpass_query(query) elements = raw.get("elements", []) for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon): @@ -945,10 +989,10 @@ def cmd_bbox(args): if limit <= 0: error_exit("Limit must be a positive integer.") - tag_key, tag_val = CATEGORY_TAGS[category] + tag_pairs = _tags_for(category) religion = RELIGION_FILTER.get(category) - query = build_overpass_bbox(tag_key, tag_val, south, west, north, east, - limit, religion=religion) + query = build_overpass_bbox(None, None, south, west, north, east, + limit, religion=religion, tag_pairs=tag_pairs) raw = overpass_query(query) diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md index 3ce1908084..1f47b2e6a0 100644 --- a/skills/social-media/xurl/SKILL.md +++ b/skills/social-media/xurl/SKILL.md @@ -1,7 +1,7 @@ --- name: xurl description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access. -version: 1.1.0 +version: 1.1.1 author: xdevplatform + openclaw + Hermes Agent license: MIT platforms: [linux, macos] @@ -95,6 +95,12 @@ These steps must be performed by the user directly, NOT by the agent, because th xurl auth oauth2 --app my-app ``` (This opens a browser for the OAuth 2.0 PKCE flow.) + + If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+): + ```bash + xurl auth oauth2 --app my-app YOUR_USERNAME + ``` + This binds the token to your handle and skips the broken `/2/users/me` call. 6. Set the app as default so all commands use it: ```bash xurl auth default my-app @@ -380,6 +386,7 @@ xurl --app staging /2/users/me # one-off against staging | --- | --- | --- | | Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` | | `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings | +| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly | | 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens | | `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment | | `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing | diff --git a/tests/acp/test_approval_isolation.py b/tests/acp/test_approval_isolation.py new file mode 100644 index 0000000000..90ea4e063e --- /dev/null +++ b/tests/acp/test_approval_isolation.py @@ -0,0 +1,170 @@ +"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr. + +Two related ACP approval-flow issues: +- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards` + took the non-interactive auto-approve path and never consulted the + ACP-supplied callback. +- qg5c: `_approval_callback` was a module-global in terminal_tool; + overlapping ACP sessions overwrote each other's callback slot. + +Both fixed together by: +1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call). +2. Storing the callback in thread-local state so concurrent executor + threads don't collide. +""" + +import os +import threading +from unittest.mock import MagicMock + +import pytest + + +class TestThreadLocalApprovalCallback: + """GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so + concurrent ACP sessions don't stomp on each other's handlers.""" + + def test_set_and_get_in_same_thread(self): + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb1 = lambda cmd, desc: "once" # noqa: E731 + set_approval_callback(cb1) + assert _get_approval_callback() is cb1 + + def test_callback_not_visible_in_different_thread(self): + """Thread A's callback is NOT visible to Thread B.""" + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb_a = lambda cmd, desc: "thread_a" # noqa: E731 + cb_b = lambda cmd, desc: "thread_b" # noqa: E731 + + seen_in_a = [] + seen_in_b = [] + + def thread_a(): + set_approval_callback(cb_a) + # Pause so thread B has time to set its own callback + import time + time.sleep(0.05) + seen_in_a.append(_get_approval_callback()) + + def thread_b(): + set_approval_callback(cb_b) + import time + time.sleep(0.05) + seen_in_b.append(_get_approval_callback()) + + ta = threading.Thread(target=thread_a) + tb = threading.Thread(target=thread_b) + ta.start() + tb.start() + ta.join() + tb.join() + + # Each thread must see ONLY its own callback — not the other's + assert seen_in_a == [cb_a] + assert seen_in_b == [cb_b] + + def test_main_thread_callback_not_leaked_to_worker(self): + """A callback set in the main thread does NOT leak into a + freshly-spawned worker thread.""" + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb_main = lambda cmd, desc: "main" # noqa: E731 + set_approval_callback(cb_main) + + worker_saw = [] + + def worker(): + worker_saw.append(_get_approval_callback()) + + t = threading.Thread(target=worker) + t.start() + t.join() + + # Worker thread has no callback set — TLS is empty for it + assert worker_saw == [None] + # Main thread still has its callback + assert _get_approval_callback() is cb_main + + def test_sudo_password_callback_also_thread_local(self): + """Same protection applies to the sudo password callback.""" + from tools.terminal_tool import ( + set_sudo_password_callback, + _get_sudo_password_callback, + ) + + cb_main = lambda: "main-password" # noqa: E731 + set_sudo_password_callback(cb_main) + + worker_saw = [] + + def worker(): + worker_saw.append(_get_sudo_password_callback()) + + t = threading.Thread(target=worker) + t.start() + t.join() + + assert worker_saw == [None] + assert _get_sudo_password_callback() is cb_main + + +class TestAcpExecAskGate: + """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so + that tools.approval.check_all_command_guards takes the CLI-interactive + path (consults the registered callback via prompt_dangerous_approval) + instead of the non-interactive auto-approve shortcut. + + (HERMES_EXEC_ASK takes the gateway-queue path which requires a + notify_cb registered in _gateway_notify_cbs — not applicable to ACP, + which uses a direct callback shape.)""" + + def test_interactive_env_var_routes_to_callback(self, monkeypatch): + """When HERMES_INTERACTIVE is set and an approval callback is + registered, a dangerous command must route through the callback.""" + # Clean env + monkeypatch.delenv("HERMES_INTERACTIVE", raising=False) + monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) + monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) + monkeypatch.delenv("HERMES_YOLO_MODE", raising=False) + + from tools.approval import check_all_command_guards + + called_with = [] + + def fake_cb(command, description, *, allow_permanent=True): + called_with.append((command, description)) + return "once" + + # Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called + result = check_all_command_guards( + "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb, + ) + assert result["approved"] is True + assert called_with == [], ( + "without HERMES_INTERACTIVE the non-interactive auto-approve " + "path should fire without consulting the callback" + ) + + # With HERMES_INTERACTIVE: callback IS called, approval flows through it + monkeypatch.setenv("HERMES_INTERACTIVE", "1") + called_with.clear() + result = check_all_command_guards( + "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb, + ) + assert called_with, ( + "with HERMES_INTERACTIVE the approval path should consult the " + "registered callback — this was the ACP bypass in " + "GHSA-96vc-wcxf-jjff" + ) + assert result["approved"] is True diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py index de83ebeffd..57e2bd4e5b 100644 --- a/tests/acp/test_permissions.py +++ b/tests/acp/test_permissions.py @@ -73,3 +73,17 @@ class TestApprovalMapping: result = cb("rm -rf /", "dangerous") assert result == "deny" + + def test_approval_none_response_returns_deny(self): + """When request_permission resolves to None, the callback should return 'deny'.""" + loop = MagicMock(spec=asyncio.AbstractEventLoop) + mock_rp = MagicMock(name="request_permission") + + future = MagicMock(spec=Future) + future.result.return_value = None + + with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future): + cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0) + result = cb("echo hi", "demo") + + assert result == "deny" diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 5893d79071..faa4c18a70 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -95,19 +95,37 @@ class TestInitialize: class TestAuthenticate: @pytest.mark.asyncio - async def test_authenticate_with_provider_configured(self, agent, monkeypatch): + async def test_authenticate_with_matching_method_id(self, agent, monkeypatch): monkeypatch.setattr( - "acp_adapter.server.has_provider", - lambda: True, + "acp_adapter.server.detect_provider", + lambda: "openrouter", ) resp = await agent.authenticate(method_id="openrouter") assert isinstance(resp, AuthenticateResponse) + @pytest.mark.asyncio + async def test_authenticate_is_case_insensitive(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: "openrouter", + ) + resp = await agent.authenticate(method_id="OpenRouter") + assert isinstance(resp, AuthenticateResponse) + + @pytest.mark.asyncio + async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: "openrouter", + ) + resp = await agent.authenticate(method_id="totally-invalid-method") + assert resp is None + @pytest.mark.asyncio async def test_authenticate_without_provider(self, agent, monkeypatch): monkeypatch.setattr( - "acp_adapter.server.has_provider", - lambda: False, + "acp_adapter.server.detect_provider", + lambda: None, ) resp = await agent.authenticate(method_id="openrouter") assert resp is None @@ -252,6 +270,57 @@ class TestListAndFork: mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3") + @pytest.mark.asyncio + async def test_list_sessions_pagination_first_page(self, agent): + from acp_adapter import server as acp_server + + infos = [ + {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0} + for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5) + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions() + + assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE + assert resp.next_cursor == resp.sessions[-1].session_id + + @pytest.mark.asyncio + async def test_list_sessions_pagination_no_more(self, agent): + infos = [ + {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0} + for i in range(3) + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions() + + assert len(resp.sessions) == 3 + assert resp.next_cursor is None + + @pytest.mark.asyncio + async def test_list_sessions_cursor_resumes_after_match(self, agent): + infos = [ + {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions(cursor="s1") + + assert [s.session_id for s in resp.sessions] == ["s2", "s3"] + assert resp.next_cursor is None + + @pytest.mark.asyncio + async def test_list_sessions_unknown_cursor_returns_empty(self, agent): + infos = [ + {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions(cursor="does-not-exist") + + assert resp.sessions == [] + assert resp.next_cursor is None + # --------------------------------------------------------------------------- # session configuration / model routing # --------------------------------------------------------------------------- diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 737db01a35..b947a2df85 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -414,7 +414,11 @@ class TestRunOauthSetupToken: token = run_oauth_setup_token() assert token == "from-cred-file" - mock_run.assert_called_once() + # Don't assert exact call count — the contract is "credentials flow + # through", not "exactly one subprocess call". xdist cross-test + # pollution (other tests shimming subprocess via plugins) has flaked + # assert_called_once() in CI. + assert mock_run.called def test_returns_token_from_env_var(self, monkeypatch, tmp_path): """Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files.""" diff --git a/tests/agent/test_anthropic_normalize_v2.py b/tests/agent/test_anthropic_normalize_v2.py new file mode 100644 index 0000000000..9d5c16139a --- /dev/null +++ b/tests/agent/test_anthropic_normalize_v2.py @@ -0,0 +1,238 @@ +"""Regression tests: normalize_anthropic_response_v2 vs v1. + +Constructs mock Anthropic responses and asserts that the v2 function +(returning NormalizedResponse) produces identical field values to the +original v1 function (returning SimpleNamespace + finish_reason). +""" + +import json +import pytest +from types import SimpleNamespace + +from agent.anthropic_adapter import ( + normalize_anthropic_response, + normalize_anthropic_response_v2, +) +from agent.transports.types import NormalizedResponse, ToolCall + + +# --------------------------------------------------------------------------- +# Helpers to build mock Anthropic SDK responses +# --------------------------------------------------------------------------- + +def _text_block(text: str): + return SimpleNamespace(type="text", text=text) + + +def _thinking_block(thinking: str, signature: str = "sig_abc"): + return SimpleNamespace(type="thinking", thinking=thinking, signature=signature) + + +def _tool_use_block(id: str, name: str, input: dict): + return SimpleNamespace(type="tool_use", id=id, name=name, input=input) + + +def _response(content_blocks, stop_reason="end_turn"): + return SimpleNamespace( + content=content_blocks, + stop_reason=stop_reason, + usage=SimpleNamespace( + input_tokens=10, + output_tokens=5, + ), + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestTextOnly: + """Text-only response — no tools, no thinking.""" + + def setup_method(self): + self.resp = _response([_text_block("Hello world")]) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_type(self): + assert isinstance(self.v2, NormalizedResponse) + + def test_content_matches(self): + assert self.v2.content == self.v1_msg.content + + def test_finish_reason_matches(self): + assert self.v2.finish_reason == self.v1_finish + + def test_no_tool_calls(self): + assert self.v2.tool_calls is None + assert self.v1_msg.tool_calls is None + + def test_no_reasoning(self): + assert self.v2.reasoning is None + assert self.v1_msg.reasoning is None + + +class TestWithToolCalls: + """Response with tool calls.""" + + def setup_method(self): + self.resp = _response( + [ + _text_block("I'll check that"), + _tool_use_block("toolu_abc", "terminal", {"command": "ls"}), + _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}), + ], + stop_reason="tool_use", + ) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_finish_reason(self): + assert self.v2.finish_reason == "tool_calls" + assert self.v1_finish == "tool_calls" + + def test_tool_call_count(self): + assert len(self.v2.tool_calls) == 2 + assert len(self.v1_msg.tool_calls) == 2 + + def test_tool_call_ids_match(self): + for i in range(2): + assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id + + def test_tool_call_names_match(self): + assert self.v2.tool_calls[0].name == "terminal" + assert self.v2.tool_calls[1].name == "read_file" + for i in range(2): + assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name + + def test_tool_call_arguments_match(self): + for i in range(2): + assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments + + def test_content_preserved(self): + assert self.v2.content == self.v1_msg.content + assert "check that" in self.v2.content + + +class TestWithThinking: + """Response with thinking blocks (Claude 3.5+ extended thinking).""" + + def setup_method(self): + self.resp = _response([ + _thinking_block("Let me think about this carefully..."), + _text_block("The answer is 42."), + ]) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_reasoning_matches(self): + assert self.v2.reasoning == self.v1_msg.reasoning + assert "think about this" in self.v2.reasoning + + def test_reasoning_details_in_provider_data(self): + v1_details = self.v1_msg.reasoning_details + v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None + assert v1_details is not None + assert v2_details is not None + assert len(v2_details) == len(v1_details) + + def test_content_excludes_thinking(self): + assert self.v2.content == "The answer is 42." + + +class TestMixed: + """Response with thinking + text + tool calls.""" + + def setup_method(self): + self.resp = _response( + [ + _thinking_block("Planning my approach..."), + _text_block("I'll run the command"), + _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}), + ], + stop_reason="tool_use", + ) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_all_fields_present(self): + assert self.v2.content is not None + assert self.v2.tool_calls is not None + assert self.v2.reasoning is not None + assert self.v2.finish_reason == "tool_calls" + + def test_content_matches(self): + assert self.v2.content == self.v1_msg.content + + def test_reasoning_matches(self): + assert self.v2.reasoning == self.v1_msg.reasoning + + def test_tool_call_matches(self): + assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id + assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name + + +class TestStopReasons: + """Verify finish_reason mapping matches between v1 and v2.""" + + @pytest.mark.parametrize("stop_reason,expected", [ + ("end_turn", "stop"), + ("tool_use", "tool_calls"), + ("max_tokens", "length"), + ("stop_sequence", "stop"), + ("refusal", "content_filter"), + ("model_context_window_exceeded", "length"), + ("unknown_future_reason", "stop"), + ]) + def test_stop_reason_mapping(self, stop_reason, expected): + resp = _response([_text_block("x")], stop_reason=stop_reason) + v1_msg, v1_finish = normalize_anthropic_response(resp) + v2 = normalize_anthropic_response_v2(resp) + assert v2.finish_reason == v1_finish == expected + + +class TestStripToolPrefix: + """Verify mcp_ prefix stripping works identically.""" + + def test_prefix_stripped(self): + resp = _response( + [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})], + stop_reason="tool_use", + ) + v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True) + v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True) + assert v1_msg.tool_calls[0].function.name == "terminal" + assert v2.tool_calls[0].name == "terminal" + + def test_prefix_kept(self): + resp = _response( + [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})], + stop_reason="tool_use", + ) + v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False) + v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False) + assert v1_msg.tool_calls[0].function.name == "mcp_terminal" + assert v2.tool_calls[0].name == "mcp_terminal" + + +class TestEdgeCases: + """Edge cases: empty content, no blocks, etc.""" + + def test_empty_content_blocks(self): + resp = _response([]) + v1_msg, v1_finish = normalize_anthropic_response(resp) + v2 = normalize_anthropic_response_v2(resp) + assert v2.content == v1_msg.content + assert v2.content is None + + def test_no_reasoning_details_means_none_provider_data(self): + resp = _response([_text_block("hi")]) + v2 = normalize_anthropic_response_v2(resp) + assert v2.provider_data is None + + def test_v2_returns_dataclass_not_namespace(self): + resp = _response([_text_block("hi")]) + v2 = normalize_anthropic_response_v2(resp) + assert isinstance(v2, NormalizedResponse) + assert not isinstance(v2, SimpleNamespace) diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py new file mode 100644 index 0000000000..52ad20a350 --- /dev/null +++ b/tests/agent/test_copilot_acp_client.py @@ -0,0 +1,146 @@ +"""Focused regressions for the Copilot ACP shim safety layer.""" + +from __future__ import annotations + +import io +import json +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from agent.copilot_acp_client import CopilotACPClient + + +class _FakeProcess: + def __init__(self) -> None: + self.stdin = io.StringIO() + + +class CopilotACPClientSafetyTests(unittest.TestCase): + def setUp(self) -> None: + self.client = CopilotACPClient(acp_cwd="/tmp") + + def _dispatch(self, message: dict, *, cwd: str) -> dict: + process = _FakeProcess() + handled = self.client._handle_server_message( + message, + process=process, + cwd=cwd, + text_parts=[], + reasoning_parts=[], + ) + self.assertTrue(handled) + payload = process.stdin.getvalue().strip() + self.assertTrue(payload) + return json.loads(payload) + + def test_request_permission_is_not_auto_allowed(self) -> None: + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 1, + "method": "session/request_permission", + "params": {}, + }, + cwd="/tmp", + ) + + outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome")) + self.assertEqual(outcome, "cancelled") + + def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) / "home" + blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json" + blocked.parent.mkdir(parents=True, exist_ok=True) + blocked.write_text('{"token":"sk-test-secret-1234567890"}') + + with patch.dict( + os.environ, + {"HOME": str(home), "HERMES_HOME": str(home / ".hermes")}, + clear=False, + ): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 2, + "method": "fs/read_text_file", + "params": {"path": str(blocked)}, + }, + cwd=str(home), + ) + + self.assertIn("error", response) + + def test_read_text_file_redacts_sensitive_content(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + secret_file = root / "config.env" + secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012") + + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 3, + "method": "fs/read_text_file", + "params": {"path": str(secret_file)}, + }, + cwd=str(root), + ) + + content = ((response.get("result") or {}).get("content") or "") + self.assertNotIn("abc123def456", content) + self.assertIn("OPENAI_API_KEY=", content) + + def test_write_text_file_reuses_write_denylist(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) / "home" + target = home / ".ssh" / "id_rsa" + target.parent.mkdir(parents=True, exist_ok=True) + + with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 4, + "method": "fs/write_text_file", + "params": { + "path": str(target), + "content": "fake-private-key", + }, + }, + cwd=str(home), + ) + + self.assertIn("error", response) + self.assertFalse(target.exists()) + + def test_write_text_file_respects_safe_root(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + safe_root = root / "workspace" + safe_root.mkdir() + outside = root / "outside.txt" + + with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 5, + "method": "fs/write_text_file", + "params": { + "path": str(outside), + "content": "should-not-write", + }, + }, + cwd=str(root), + ) + + self.assertIn("error", response) + self.assertFalse(outside.exists()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py index 4067c92157..2740daf096 100644 --- a/tests/agent/test_insights.py +++ b/tests/agent/test_insights.py @@ -516,13 +516,12 @@ class TestGatewayFormatting: assert "**" in text # Markdown bold def test_gateway_format_hides_cost(self, populated_db): + """Gateway format omits dollar figures and internal cache details.""" engine = InsightsEngine(populated_db) report = engine.generate(days=30) text = engine.format_gateway(report) - assert "$" in text - assert "Top Skills" in text - assert "Est. cost" in text + assert "$" not in text assert "cache" not in text.lower() def test_gateway_format_shows_models(self, populated_db): diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 85c9c95206..4356b61c5a 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -84,38 +84,6 @@ class TestMinimaxAuxModel: assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] -class TestMinimaxModelCatalog: - """Verify the model catalog matches official Anthropic-compat endpoint models. - - Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api - """ - - def test_catalog_includes_current_models(self): - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M2.7" in models - assert "MiniMax-M2.5" in models - assert "MiniMax-M2.1" in models - assert "MiniMax-M2" in models - - def test_catalog_excludes_m1_family(self): - """M1 models are not available on the /anthropic endpoint.""" - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M1" not in models - - def test_catalog_excludes_highspeed(self): - """Highspeed variants are available but not shown in default catalog - (users can still specify them manually).""" - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M2.7-highspeed" not in models - assert "MiniMax-M2.5-highspeed" not in models - - class TestMinimaxBetaHeaders: """MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta. diff --git a/tests/agent/test_proxy_and_url_validation.py b/tests/agent/test_proxy_and_url_validation.py index 4fd6138a4d..7d7268ed1f 100644 --- a/tests/agent/test_proxy_and_url_validation.py +++ b/tests/agent/test_proxy_and_url_validation.py @@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed. """ from __future__ import annotations +import os + import pytest from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls @@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch): _validate_proxy_env_urls() # should not raise +def test_proxy_env_normalizes_socks_alias(monkeypatch): + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + _validate_proxy_env_urls() + assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/" + + @pytest.mark.parametrize("key", [ "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy", diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index 57ac7d6b58..e399db619e 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -405,3 +405,191 @@ class TestPlanSkillHelpers: assert "Add a /plan command" in msg assert ".hermes/plans/plan.md" in msg assert "Runtime note:" in msg + + +class TestSkillDirectoryHeader: + """The activation message must expose the absolute skill directory and + explain how to resolve relative paths, so skills with bundled scripts + don't force the agent into a second ``skill_view()`` round-trip.""" + + def test_header_contains_absolute_skill_dir(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill(tmp_path, "abs-dir-skill") + scan_skill_commands() + msg = build_skill_invocation_message("/abs-dir-skill", "go") + + assert msg is not None + assert f"[Skill directory: {skill_dir}]" in msg + assert "Resolve any relative paths" in msg + + def test_supporting_files_shown_with_absolute_paths(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill(tmp_path, "scripted-skill") + (skill_dir / "scripts").mkdir() + (skill_dir / "scripts" / "run.js").write_text("console.log('hi')") + scan_skill_commands() + msg = build_skill_invocation_message("/scripted-skill") + + assert msg is not None + # The supporting-files block must emit both the relative form (so the + # agent can call skill_view on it) and the absolute form (so it can + # run the script directly via terminal). + assert "scripts/run.js" in msg + assert str(skill_dir / "scripts" / "run.js") in msg + assert f"node {skill_dir}/scripts/foo.js" in msg + + +class TestTemplateVarSubstitution: + """``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body + are replaced before the agent sees the content.""" + + def test_substitutes_skill_dir(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill( + tmp_path, + "templated", + body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/templated") + + assert msg is not None + assert f"node {skill_dir}/scripts/foo.js" in msg + # The literal template token must not leak through. + assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0] + + def test_substitutes_session_id_when_available(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "sess-templated", + body="Session: ${HERMES_SESSION_ID}", + ) + scan_skill_commands() + msg = build_skill_invocation_message( + "/sess-templated", task_id="abc-123" + ) + + assert msg is not None + assert "Session: abc-123" in msg + + def test_leaves_session_id_token_when_missing(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "sess-missing", + body="Session: ${HERMES_SESSION_ID}", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/sess-missing", task_id=None) + + assert msg is not None + # No session — token left intact so the author can spot it. + assert "Session: ${HERMES_SESSION_ID}" in msg + + def test_disable_template_vars_via_config(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": False}, + ), + ): + _make_skill( + tmp_path, + "no-sub", + body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/no-sub") + + assert msg is not None + # Template token must survive when substitution is disabled. + assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg + + +class TestInlineShellExpansion: + """Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the + content — but only when the user has opted in via config.""" + + def test_inline_shell_is_off_by_default(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "dyn-default-off", + body="Today is !`echo INLINE_RAN`.", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-default-off") + + assert msg is not None + # Default config has inline_shell=False — snippet must stay literal. + assert "!`echo INLINE_RAN`" in msg + assert "Today is INLINE_RAN." not in msg + + def test_inline_shell_runs_when_enabled(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 5}, + ), + ): + _make_skill( + tmp_path, + "dyn-on", + body="Marker: !`echo INLINE_RAN`.", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-on") + + assert msg is not None + assert "Marker: INLINE_RAN." in msg + assert "!`echo INLINE_RAN`" not in msg + + def test_inline_shell_runs_in_skill_directory(self, tmp_path): + """Inline snippets get the skill dir as CWD so relative paths work.""" + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 5}, + ), + ): + skill_dir = _make_skill( + tmp_path, + "dyn-cwd", + body="Here: !`pwd`", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-cwd") + + assert msg is not None + assert f"Here: {skill_dir}" in msg + + def test_inline_shell_timeout_does_not_break_message(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 1}, + ), + ): + _make_skill( + tmp_path, + "dyn-slow", + body="Slow: !`sleep 5 && printf DYN_MARKER`", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-slow") + + assert msg is not None + # Timeout is surfaced as a marker instead of propagating as an error, + # and the rest of the skill message still renders. + assert "inline-shell timeout" in msg + # The command's intended stdout never made it through — only the + # timeout marker (which echoes the command text) survives. + assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "") diff --git a/tests/agent/transports/__init__.py b/tests/agent/transports/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/agent/transports/test_transport.py b/tests/agent/transports/test_transport.py new file mode 100644 index 0000000000..b51336d962 --- /dev/null +++ b/tests/agent/transports/test_transport.py @@ -0,0 +1,220 @@ +"""Tests for the transport ABC, registry, and AnthropicTransport.""" + +import pytest +from types import SimpleNamespace +from unittest.mock import MagicMock + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage +from agent.transports import get_transport, register_transport, _REGISTRY + + +# ── ABC contract tests ────────────────────────────────────────────────── + +class TestProviderTransportABC: + """Verify the ABC contract is enforceable.""" + + def test_cannot_instantiate_abc(self): + with pytest.raises(TypeError): + ProviderTransport() + + def test_concrete_must_implement_all_abstract(self): + class Incomplete(ProviderTransport): + @property + def api_mode(self): + return "test" + with pytest.raises(TypeError): + Incomplete() + + def test_minimal_concrete(self): + class Minimal(ProviderTransport): + @property + def api_mode(self): + return "test_minimal" + def convert_messages(self, messages, **kw): + return messages + def convert_tools(self, tools): + return tools + def build_kwargs(self, model, messages, tools=None, **params): + return {"model": model, "messages": messages} + def normalize_response(self, response, **kw): + return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop") + + t = Minimal() + assert t.api_mode == "test_minimal" + assert t.validate_response(None) is True # default + assert t.extract_cache_stats(None) is None # default + assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough + + +# ── Registry tests ─────────────────────────────────────────────────────── + +class TestTransportRegistry: + + def test_get_unregistered_returns_none(self): + assert get_transport("nonexistent_mode") is None + + def test_anthropic_registered_on_import(self): + import agent.transports.anthropic # noqa: F401 + t = get_transport("anthropic_messages") + assert t is not None + assert t.api_mode == "anthropic_messages" + + def test_register_and_get(self): + class DummyTransport(ProviderTransport): + @property + def api_mode(self): + return "dummy_test" + def convert_messages(self, messages, **kw): + return messages + def convert_tools(self, tools): + return tools + def build_kwargs(self, model, messages, tools=None, **params): + return {} + def normalize_response(self, response, **kw): + return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop") + + register_transport("dummy_test", DummyTransport) + t = get_transport("dummy_test") + assert t.api_mode == "dummy_test" + # Cleanup + _REGISTRY.pop("dummy_test", None) + + +# ── AnthropicTransport tests ──────────────────────────────────────────── + +class TestAnthropicTransport: + + @pytest.fixture + def transport(self): + import agent.transports.anthropic # noqa: F401 + return get_transport("anthropic_messages") + + def test_api_mode(self, transport): + assert transport.api_mode == "anthropic_messages" + + def test_convert_tools_simple(self, transport): + tools = [{ + "type": "function", + "function": { + "name": "test_tool", + "description": "A test", + "parameters": {"type": "object", "properties": {}}, + } + }] + result = transport.convert_tools(tools) + assert len(result) == 1 + assert result[0]["name"] == "test_tool" + assert "input_schema" in result[0] + + def test_validate_response_none(self, transport): + assert transport.validate_response(None) is False + + def test_validate_response_empty_content(self, transport): + r = SimpleNamespace(content=[]) + assert transport.validate_response(r) is False + + def test_validate_response_valid(self, transport): + r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")]) + assert transport.validate_response(r) is True + + def test_map_finish_reason(self, transport): + assert transport.map_finish_reason("end_turn") == "stop" + assert transport.map_finish_reason("tool_use") == "tool_calls" + assert transport.map_finish_reason("max_tokens") == "length" + assert transport.map_finish_reason("stop_sequence") == "stop" + assert transport.map_finish_reason("refusal") == "content_filter" + assert transport.map_finish_reason("model_context_window_exceeded") == "length" + assert transport.map_finish_reason("unknown") == "stop" + + def test_extract_cache_stats_none_usage(self, transport): + r = SimpleNamespace(usage=None) + assert transport.extract_cache_stats(r) is None + + def test_extract_cache_stats_with_cache(self, transport): + usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50) + r = SimpleNamespace(usage=usage) + result = transport.extract_cache_stats(r) + assert result == {"cached_tokens": 100, "creation_tokens": 50} + + def test_extract_cache_stats_zero(self, transport): + usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0) + r = SimpleNamespace(usage=usage) + assert transport.extract_cache_stats(r) is None + + def test_normalize_response_text(self, transport): + """Test normalization of a simple text response.""" + r = SimpleNamespace( + content=[SimpleNamespace(type="text", text="Hello world")], + stop_reason="end_turn", + usage=SimpleNamespace(input_tokens=10, output_tokens=5), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert isinstance(nr, NormalizedResponse) + assert nr.content == "Hello world" + assert nr.tool_calls is None or nr.tool_calls == [] + assert nr.finish_reason == "stop" + + def test_normalize_response_tool_calls(self, transport): + """Test normalization of a tool-use response.""" + r = SimpleNamespace( + content=[ + SimpleNamespace( + type="tool_use", + id="toolu_123", + name="terminal", + input={"command": "ls"}, + ), + ], + stop_reason="tool_use", + usage=SimpleNamespace(input_tokens=10, output_tokens=20), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert nr.finish_reason == "tool_calls" + assert len(nr.tool_calls) == 1 + tc = nr.tool_calls[0] + assert tc.name == "terminal" + assert tc.id == "toolu_123" + assert '"command"' in tc.arguments + + def test_normalize_response_thinking(self, transport): + """Test normalization preserves thinking content.""" + r = SimpleNamespace( + content=[ + SimpleNamespace(type="thinking", thinking="Let me think..."), + SimpleNamespace(type="text", text="The answer is 42"), + ], + stop_reason="end_turn", + usage=SimpleNamespace(input_tokens=10, output_tokens=15), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert nr.content == "The answer is 42" + assert nr.reasoning == "Let me think..." + + def test_build_kwargs_returns_dict(self, transport): + """Test build_kwargs produces a usable kwargs dict.""" + messages = [{"role": "user", "content": "Hello"}] + kw = transport.build_kwargs( + model="claude-sonnet-4-6", + messages=messages, + max_tokens=1024, + ) + assert isinstance(kw, dict) + assert "model" in kw + assert "max_tokens" in kw + assert "messages" in kw + + def test_convert_messages_extracts_system(self, transport): + """Test convert_messages separates system from messages.""" + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hi"}, + ] + system, msgs = transport.convert_messages(messages) + # System should be extracted + assert system is not None + # Messages should only have user + assert len(msgs) >= 1 diff --git a/tests/agent/transports/test_types.py b/tests/agent/transports/test_types.py new file mode 100644 index 0000000000..0be18c688c --- /dev/null +++ b/tests/agent/transports/test_types.py @@ -0,0 +1,151 @@ +"""Tests for agent/transports/types.py — dataclass construction + helpers.""" + +import json +import pytest + +from agent.transports.types import ( + NormalizedResponse, + ToolCall, + Usage, + build_tool_call, + map_finish_reason, +) + + +# --------------------------------------------------------------------------- +# ToolCall +# --------------------------------------------------------------------------- + +class TestToolCall: + def test_basic_construction(self): + tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}') + assert tc.id == "call_abc" + assert tc.name == "terminal" + assert tc.arguments == '{"cmd": "ls"}' + assert tc.provider_data is None + + def test_none_id(self): + tc = ToolCall(id=None, name="read_file", arguments="{}") + assert tc.id is None + + def test_provider_data(self): + tc = ToolCall( + id="call_x", + name="t", + arguments="{}", + provider_data={"call_id": "call_x", "response_item_id": "fc_x"}, + ) + assert tc.provider_data["call_id"] == "call_x" + assert tc.provider_data["response_item_id"] == "fc_x" + + +# --------------------------------------------------------------------------- +# Usage +# --------------------------------------------------------------------------- + +class TestUsage: + def test_defaults(self): + u = Usage() + assert u.prompt_tokens == 0 + assert u.completion_tokens == 0 + assert u.total_tokens == 0 + assert u.cached_tokens == 0 + + def test_explicit(self): + u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80) + assert u.total_tokens == 150 + + +# --------------------------------------------------------------------------- +# NormalizedResponse +# --------------------------------------------------------------------------- + +class TestNormalizedResponse: + def test_text_only(self): + r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop") + assert r.content == "hello" + assert r.tool_calls is None + assert r.finish_reason == "stop" + assert r.reasoning is None + assert r.usage is None + assert r.provider_data is None + + def test_with_tool_calls(self): + tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')] + r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls") + assert r.finish_reason == "tool_calls" + assert len(r.tool_calls) == 1 + assert r.tool_calls[0].name == "terminal" + + def test_with_reasoning(self): + r = NormalizedResponse( + content="answer", + tool_calls=None, + finish_reason="stop", + reasoning="I thought about it", + ) + assert r.reasoning == "I thought about it" + + def test_with_provider_data(self): + r = NormalizedResponse( + content=None, + tool_calls=None, + finish_reason="stop", + provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]}, + ) + assert r.provider_data["reasoning_details"][0]["type"] == "thinking" + + +# --------------------------------------------------------------------------- +# build_tool_call +# --------------------------------------------------------------------------- + +class TestBuildToolCall: + def test_dict_arguments_serialized(self): + tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"}) + assert tc.arguments == json.dumps({"cmd": "ls"}) + assert tc.provider_data is None + + def test_string_arguments_passthrough(self): + tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}') + assert tc.arguments == '{"path": "/tmp"}' + + def test_provider_fields(self): + tc = build_tool_call( + id="call_3", + name="terminal", + arguments="{}", + call_id="call_3", + response_item_id="fc_3", + ) + assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"} + + def test_none_id(self): + tc = build_tool_call(id=None, name="t", arguments="{}") + assert tc.id is None + + +# --------------------------------------------------------------------------- +# map_finish_reason +# --------------------------------------------------------------------------- + +class TestMapFinishReason: + ANTHROPIC_MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "refusal": "content_filter", + } + + def test_known_reason(self): + assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop" + assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls" + assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length" + assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter" + + def test_unknown_reason_defaults_to_stop(self): + assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop" + + def test_none_reason(self): + assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop" diff --git a/tests/cli/test_cli_steer_busy_path.py b/tests/cli/test_cli_steer_busy_path.py new file mode 100644 index 0000000000..071c741fbe --- /dev/null +++ b/tests/cli/test_cli_steer_busy_path.py @@ -0,0 +1,146 @@ +"""Regression tests for classic-CLI mid-run /steer dispatch. + +Background +---------- +/steer sent while the agent is running used to be queued through +``self._pending_input`` alongside ordinary user input. ``process_loop`` +pulls from that queue and calls ``process_command()`` — but while the +agent is running, ``process_loop`` is blocked inside ``self.chat()``. +By the time the queued /steer was pulled, ``_agent_running`` had +already flipped back to False, so ``process_command()`` took the idle +fallback (``"No agent running; queued as next turn"``) and delivered +the steer as an ordinary next-turn message. + +The fix dispatches /steer inline on the UI thread when the agent is +running — matching the existing pattern for /model — so the steer +reaches ``agent.steer()`` (thread-safe) without touching the queue. + +These tests exercise the detector + inline dispatch without starting a +prompt_toolkit app. +""" + +from __future__ import annotations + +import importlib +import sys +from unittest.mock import MagicMock, patch + + +def _make_cli(): + """Create a HermesCLI instance with prompt_toolkit stubbed out.""" + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""} + prompt_toolkit_stubs = { + "prompt_toolkit": MagicMock(), + "prompt_toolkit.history": MagicMock(), + "prompt_toolkit.styles": MagicMock(), + "prompt_toolkit.patch_stdout": MagicMock(), + "prompt_toolkit.application": MagicMock(), + "prompt_toolkit.layout": MagicMock(), + "prompt_toolkit.layout.processors": MagicMock(), + "prompt_toolkit.filters": MagicMock(), + "prompt_toolkit.layout.dimension": MagicMock(), + "prompt_toolkit.layout.menus": MagicMock(), + "prompt_toolkit.widgets": MagicMock(), + "prompt_toolkit.key_binding": MagicMock(), + "prompt_toolkit.completion": MagicMock(), + "prompt_toolkit.formatted_text": MagicMock(), + "prompt_toolkit.auto_suggest": MagicMock(), + } + with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict( + "os.environ", clean_env, clear=False + ): + import cli as _cli_mod + + _cli_mod = importlib.reload(_cli_mod) + with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict( + _cli_mod.__dict__, {"CLI_CONFIG": _clean_config} + ): + return _cli_mod.HermesCLI() + + +class TestSteerInlineDetector: + """_should_handle_steer_command_inline gates the busy-path fast dispatch.""" + + def test_detects_steer_when_agent_running(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True + + def test_ignores_steer_when_agent_idle(self): + """Idle-path /steer should fall through to the normal process_loop + dispatch so the queue-style fallback message is emitted.""" + cli = _make_cli() + cli._agent_running = False + assert cli._should_handle_steer_command_inline("/steer do something") is False + + def test_ignores_non_slash_input(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("steer without slash") is False + assert cli._should_handle_steer_command_inline("") is False + + def test_ignores_other_slash_commands(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/queue hello") is False + assert cli._should_handle_steer_command_inline("/stop") is False + assert cli._should_handle_steer_command_inline("/help") is False + + def test_ignores_steer_with_attached_images(self): + """Image payloads take the normal path; steer doesn't accept images.""" + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False + + +class TestSteerBusyPathDispatch: + """When the detector fires, process_command('/steer ...') must call + agent.steer() directly rather than the idle-path fallback.""" + + def test_process_command_routes_to_agent_steer(self): + """With _agent_running=True and agent.steer present, /steer reaches + agent.steer(payload), NOT _pending_input.""" + cli = _make_cli() + cli._agent_running = True + cli.agent = MagicMock() + cli.agent.steer = MagicMock(return_value=True) + # Make sure the idle-path fallback would be observable if taken + cli._pending_input = MagicMock() + + cli.process_command("/steer focus on errors") + + cli.agent.steer.assert_called_once_with("focus on errors") + cli._pending_input.put.assert_not_called() + + def test_idle_path_queues_as_next_turn(self): + """Control — when the agent is NOT running, /steer correctly falls + back to next-turn queue semantics. Demonstrates why the fix was + needed: the queue path only works when you can actually drain it.""" + cli = _make_cli() + cli._agent_running = False + cli.agent = MagicMock() + cli.agent.steer = MagicMock(return_value=True) + cli._pending_input = MagicMock() + + cli.process_command("/steer would-be-next-turn") + + # Idle path does NOT call agent.steer + cli.agent.steer.assert_not_called() + # It puts the payload in the queue as a normal next-turn message + cli._pending_input.put.assert_called_once_with("would-be-next-turn") + + +if __name__ == "__main__": # pragma: no cover + import pytest + + pytest.main([__file__, "-v"]) diff --git a/tests/conftest.py b/tests/conftest.py index ca4a9a9709..0258e034f9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({ "HERMES_HOME_MODE", "BROWSER_CDP_URL", "CAMOFOX_URL", + # Platform allowlists — not credentials, but if set from any source + # (user shell, earlier leaky test, CI env), they change gateway auth + # behavior and flake button-authorization tests. + "TELEGRAM_ALLOWED_USERS", + "DISCORD_ALLOWED_USERS", + "WHATSAPP_ALLOWED_USERS", + "SLACK_ALLOWED_USERS", + "SIGNAL_ALLOWED_USERS", + "SIGNAL_GROUP_ALLOWED_USERS", + "EMAIL_ALLOWED_USERS", + "SMS_ALLOWED_USERS", + "MATTERMOST_ALLOWED_USERS", + "MATRIX_ALLOWED_USERS", + "DINGTALK_ALLOWED_USERS", + "FEISHU_ALLOWED_USERS", + "WECOM_ALLOWED_USERS", + "GATEWAY_ALLOWED_USERS", + "GATEWAY_ALLOW_ALL_USERS", + "TELEGRAM_ALLOW_ALL_USERS", + "DISCORD_ALLOW_ALL_USERS", + "WHATSAPP_ALLOW_ALL_USERS", + "SLACK_ALLOW_ALL_USERS", + "SIGNAL_ALLOW_ALL_USERS", + "EMAIL_ALLOW_ALL_USERS", + "SMS_ALLOW_ALL_USERS", }) @@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment): return None +# ── Module-level state reset ─────────────────────────────────────────────── +# +# Python modules are singletons per process, and pytest-xdist workers are +# long-lived. Module-level dicts/sets (tool registries, approval state, +# interrupt flags) and ContextVars persist across tests in the same worker, +# causing tests that pass alone to fail when run with siblings. +# +# Each entry in this fixture clears state that belongs to a specific module. +# New state buckets go here too — this is the single gate that prevents +# "works alone, flakes in CI" bugs from state leakage. +# +# The skill `test-suite-cascade-diagnosis` documents the concrete patterns +# this closes; the running example was `test_command_guards` failing 12/15 +# CI runs because ``tools.approval._session_approved`` carried approvals +# from one test's session into another's. + +@pytest.fixture(autouse=True) +def _reset_module_state(): + """Clear module-level mutable state and ContextVars between tests. + + Keeps state from leaking across tests on the same xdist worker. Modules + that don't exist yet (test collection before production import) are + skipped silently — production import later creates fresh empty state. + """ + # --- tools.approval — the single biggest source of cross-test pollution --- + try: + from tools import approval as _approval_mod + _approval_mod._session_approved.clear() + _approval_mod._session_yolo.clear() + _approval_mod._permanent_approved.clear() + _approval_mod._pending.clear() + _approval_mod._gateway_queues.clear() + _approval_mod._gateway_notify_cbs.clear() + # ContextVar: reset to empty string so get_current_session_key() + # falls through to the env var / default path, matching a fresh + # process. + _approval_mod._approval_session_key.set("") + except Exception: + pass + + # --- tools.interrupt — per-thread interrupt flag set --- + try: + from tools import interrupt as _interrupt_mod + with _interrupt_mod._lock: + _interrupt_mod._interrupted_threads.clear() + except Exception: + pass + + # --- gateway.session_context — 9 ContextVars that represent + # the active gateway session. If set in one test and not reset, + # the next test's get_session_env() reads stale values. + try: + from gateway import session_context as _sc_mod + for _cv in ( + _sc_mod._SESSION_PLATFORM, + _sc_mod._SESSION_CHAT_ID, + _sc_mod._SESSION_CHAT_NAME, + _sc_mod._SESSION_THREAD_ID, + _sc_mod._SESSION_USER_ID, + _sc_mod._SESSION_USER_NAME, + _sc_mod._SESSION_KEY, + _sc_mod._CRON_AUTO_DELIVER_PLATFORM, + _sc_mod._CRON_AUTO_DELIVER_CHAT_ID, + _sc_mod._CRON_AUTO_DELIVER_THREAD_ID, + ): + _cv.set(_sc_mod._UNSET) + except Exception: + pass + + # --- tools.env_passthrough — ContextVar with no default --- + # LookupError is normal if the test never set it. Setting it to an + # empty set unconditionally normalizes the starting state. + try: + from tools import env_passthrough as _envp_mod + _envp_mod._allowed_env_vars_var.set(set()) + except Exception: + pass + + # --- tools.credential_files — ContextVar --- + try: + from tools import credential_files as _credf_mod + _credf_mod._registered_files_var.set({}) + except Exception: + pass + + # --- tools.file_tools — per-task read history + file-ops cache --- + # _read_tracker accumulates per-task_id read history for loop detection, + # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the + # cap is hit faster than expected and capacity-related tests flake. + try: + from tools import file_tools as _ft_mod + with _ft_mod._read_tracker_lock: + _ft_mod._read_tracker.clear() + with _ft_mod._file_ops_lock: + _ft_mod._file_ops_cache.clear() + except Exception: + pass + + yield + + @pytest.fixture() def tmp_dir(tmp_path): """Provide a temporary directory that is cleaned up automatically.""" diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index e862638eee..524490eb09 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1580,3 +1580,128 @@ class TestParallelTick: end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0] start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0] assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1" + + +class TestDeliverResultTimeoutCancelsFuture: + """When future.result(timeout=60) raises TimeoutError in the live + adapter delivery path, _deliver_result must cancel the orphan + coroutine so it cannot duplicate-send after the standalone fallback. + """ + + def test_live_adapter_timeout_cancels_future_and_falls_back(self): + """End-to-end: live adapter hangs past the 60s budget, _deliver_result + patches the timeout down to a fast value, confirms future.cancel() fires, + and verifies the standalone fallback path still delivers.""" + from gateway.config import Platform + from concurrent.futures import Future + + # Live adapter whose send() coroutine never resolves within the budget + adapter = AsyncMock() + adapter.send.return_value = MagicMock(success=True) + + pconfig = MagicMock() + pconfig.enabled = True + mock_cfg = MagicMock() + mock_cfg.platforms = {Platform.TELEGRAM: pconfig} + + loop = MagicMock() + loop.is_running.return_value = True + + # A real concurrent.futures.Future so .cancel() has real semantics, + # but we override .result() to raise TimeoutError exactly like the + # 60s wait firing in production. + captured_future = Future() + cancel_calls = [] + original_cancel = captured_future.cancel + + def tracking_cancel(): + cancel_calls.append(True) + return original_cancel() + + captured_future.cancel = tracking_cancel + captured_future.result = MagicMock(side_effect=TimeoutError("timed out")) + + def fake_run_coro(coro, _loop): + coro.close() + return captured_future + + job = { + "id": "timeout-job", + "deliver": "origin", + "origin": {"platform": "telegram", "chat_id": "123"}, + } + + standalone_send = AsyncMock(return_value={"success": True}) + + with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \ + patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \ + patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \ + patch("tools.send_message_tool._send_to_platform", new=standalone_send): + result = _deliver_result( + job, + "Hello world", + adapters={Platform.TELEGRAM: adapter}, + loop=loop, + ) + + # 1. The orphan future was cancelled on timeout (the bug fix) + assert cancel_calls == [True], "future.cancel() must fire on TimeoutError" + # 2. The standalone fallback delivered — no double send, no silent drop + assert result is None, f"expected successful delivery, got error: {result!r}" + standalone_send.assert_awaited_once() + + +class TestSendMediaTimeoutCancelsFuture: + """Same orphan-coroutine guarantee for _send_media_via_adapter's + future.result(timeout=30) call. If this times out mid-batch, the + in-flight coroutine must be cancelled before the next file is tried. + """ + + def test_media_send_timeout_cancels_future_and_continues(self): + """End-to-end: _send_media_via_adapter with a future whose .result() + raises TimeoutError. Assert cancel() fires and the loop proceeds + to the next file rather than hanging or crashing.""" + from concurrent.futures import Future + + adapter = MagicMock() + adapter.send_image_file = AsyncMock() + adapter.send_video = AsyncMock() + + # First file: future that times out. Second file: future that resolves OK. + timeout_future = Future() + timeout_cancel_calls = [] + original_cancel = timeout_future.cancel + + def tracking_cancel(): + timeout_cancel_calls.append(True) + return original_cancel() + + timeout_future.cancel = tracking_cancel + timeout_future.result = MagicMock(side_effect=TimeoutError("timed out")) + + ok_future = Future() + ok_future.set_result(MagicMock(success=True)) + + futures_iter = iter([timeout_future, ok_future]) + + def fake_run_coro(coro, _loop): + coro.close() + return next(futures_iter) + + media_files = [ + ("/tmp/slow.png", False), # times out + ("/tmp/fast.mp4", False), # succeeds + ] + + loop = MagicMock() + job = {"id": "media-timeout"} + + with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro): + # Should not raise — the except Exception clause swallows the timeout + _send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job) + + # 1. The timed-out future was cancelled (the bug fix) + assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError" + # 2. Second file still got dispatched — one timeout doesn't abort the batch + adapter.send_video.assert_called_once() + assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4" diff --git a/tests/gateway/test_internal_event_bypass_pairing.py b/tests/gateway/test_internal_event_bypass_pairing.py index d10195b2d5..8878842538 100644 --- a/tests/gateway/test_internal_event_bypass_pairing.py +++ b/tests/gateway/test_internal_event_bypass_pairing.py @@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path): """Verify the normal (non-internal) path still triggers pairing for unknown users.""" import gateway.run as gateway_run + import gateway.pairing as pairing_mod monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + # gateway.pairing.PAIRING_DIR is a module-level constant captured at + # import time from whichever HERMES_HOME was set then. Per-test + # HERMES_HOME redirection in conftest doesn't retroactively move it. + # Override directly so pairing rate-limit state lives in this test's + # tmp_path (and so stale state from prior xdist workers can't leak in). + pairing_dir = tmp_path / "pairing" + pairing_dir.mkdir() + monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir) (tmp_path / "config.yaml").write_text("", encoding="utf-8") # Clear env vars that could let all users through (loaded by diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py index 11180639e8..e25f226ee9 100644 --- a/tests/gateway/test_proxy_mode.py +++ b/tests/gateway/test_proxy_mode.py @@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from gateway.config import Platform, StreamingConfig +from gateway.platforms.base import resolve_proxy_url from gateway.run import GatewayRunner from gateway.session import SessionSource @@ -133,6 +134,15 @@ class TestGetProxyUrl: assert runner._get_proxy_url() is None +class TestResolveProxyUrl: + def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + assert resolve_proxy_url() == "socks5://127.0.0.1:1080/" + + class TestRunAgentProxyDispatch: """Test that _run_agent() delegates to proxy when configured.""" diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index 96d5d4627b..83ffc0d4d0 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p async def stop(self): return None - monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42) - monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None) + # get_running_pid returns 42 before we kill the old gateway, then None + # after remove_pid_file() clears the record (reflects real behavior). + _pid_state = {"alive": True} + def _mock_get_running_pid(): + return 42 if _pid_state["alive"] else None + def _mock_remove_pid_file(): + _pid_state["alive"] = False + monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) + monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force))) monkeypatch.setattr("gateway.run.os.getpid", lambda: 100) @@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm( async def stop(self): return None - monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42) - monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None) + _pid_state = {"alive": True} + def _mock_get_running_pid(): + return 42 if _pid_state["alive"] else None + def _mock_remove_pid_file(): + _pid_state["alive"] = False + monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) + monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker) monkeypatch.setattr("gateway.status.terminate_pid", record_terminate) diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 39e4aad3d6..bf1eba51df 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt: assert "**User:** Alice" in prompt assert "Multi-user thread" not in prompt + def test_shared_non_thread_group_prompt_hides_single_user(self): + """Shared non-thread group sessions should avoid pinning one user.""" + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + group_sessions_per_user=False, + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + ctx = build_session_context(source, config) + prompt = build_session_context_prompt(ctx) + + assert "Multi-user session" in prompt + assert "[sender name]" in prompt + assert "**User:** Alice" not in prompt + def test_dm_thread_shows_user_not_multi(self): """DM threads are single-user and should show User, not multi-user note.""" config = GatewayConfig( diff --git a/tests/gateway/test_shared_group_sender_prefix.py b/tests/gateway/test_shared_group_sender_prefix.py new file mode 100644 index 0000000000..9f0e525f64 --- /dev/null +++ b/tests/gateway/test_shared_group_sender_prefix.py @@ -0,0 +1,70 @@ +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.run import GatewayRunner +from gateway.session import SessionSource + + +def _make_runner(config: GatewayConfig) -> GatewayRunner: + runner = object.__new__(GatewayRunner) + runner.config = config + runner.adapters = {} + runner._model = "openai/gpt-4.1-mini" + runner._base_url = None + return runner + + +@pytest.mark.asyncio +async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session(): + runner = _make_runner( + GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + group_sessions_per_user=False, + ) + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + event = MessageEvent(text="hello", source=source) + + result = await runner._prepare_inbound_message_text( + event=event, + source=source, + history=[], + ) + + assert result == "[Alice] hello" + + +@pytest.mark.asyncio +async def test_preprocess_keeps_plain_text_for_default_group_sessions(): + runner = _make_runner( + GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + ) + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + event = MessageEvent(text="hello", source=source) + + result = await runner._prepare_inbound_message_text( + event=event, + source=source, + history=[], + ) + + assert result == "hello" diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py index d7943b7f92..b51ec713f2 100644 --- a/tests/gateway/test_signal.py +++ b/tests/gateway/test_signal.py @@ -306,7 +306,13 @@ class TestSignalSessionSource: class TestSignalPhoneRedaction: @pytest.fixture(autouse=True) def _ensure_redaction_enabled(self, monkeypatch): + # agent.redact snapshots _REDACT_ENABLED at import time from the + # HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late — + # the module was already imported during test collection with + # whatever value was in the env then. Force the flag directly. + # See skill: xdist-cross-test-pollution Pattern 5. monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + monkeypatch.setattr("agent.redact._REDACT_ENABLED", True) def test_us_number(self): from agent.redact import redact_sensitive_text diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py index 04a0856f60..6c371cfbea 100644 --- a/tests/gateway/test_status.py +++ b/tests/gateway/test_status.py @@ -19,6 +19,30 @@ class TestGatewayPidState: assert isinstance(payload["argv"], list) assert payload["argv"] + def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch): + """Regression: two concurrent --replace invocations must not both win. + + Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s + termination-wait would both write to gateway.pid, silently overwriting + each other and leaving multiple gateway instances alive (#11718). + """ + import pytest + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + # First write wins. + status.write_pid_file() + assert (tmp_path / "gateway.pid").exists() + + # Second write (simulating a racing --replace that missed the earlier + # guards) must raise FileExistsError rather than clobber the record. + with pytest.raises(FileExistsError): + status.write_pid_file() + + # Original record is preserved. + payload = json.loads((tmp_path / "gateway.pid").read_text()) + assert payload["pid"] == os.getpid() + def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) pid_path = tmp_path / "gateway.pid" diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py index 15ffca9ec3..0381cf6f46 100644 --- a/tests/gateway/test_telegram_group_gating.py +++ b/tests/gateway/test_telegram_group_gating.py @@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config(): assert adapter._should_process_message(_group_message("hello everyone")) is False assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True - assert adapter._should_process_message(_group_message("/status"), is_command=True) is True + # Commands must also respect require_mention when it is enabled + assert adapter._should_process_message(_group_message("/status"), is_command=True) is False + # But commands with @mention still pass (Telegram emits a MENTION entity + # for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler + # rely on this same mechanism) + assert adapter._should_process_message( + _group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")]) + ) is True + # And commands still pass unconditionally when require_mention is disabled + adapter_no_mention = _make_adapter(require_mention=False) + assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True def test_free_response_chats_bypass_mention_requirement(): diff --git a/tests/gateway/test_telegram_webhook_secret.py b/tests/gateway/test_telegram_webhook_secret.py new file mode 100644 index 0000000000..0f1e786367 --- /dev/null +++ b/tests/gateway/test_telegram_webhook_secret.py @@ -0,0 +1,100 @@ +"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required. + +Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET +was not, python-telegram-bot received secret_token=None and the webhook +endpoint accepted any HTTP POST. + +The fix refuses to start the adapter in webhook mode without the secret. +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import pytest + +_repo = str(Path(__file__).resolve().parents[2]) +if _repo not in sys.path: + sys.path.insert(0, _repo) + + +class TestTelegramWebhookSecretRequired: + """Direct source-level check of the webhook-secret guard. + + The guard is embedded in TelegramAdapter.connect() and hard to isolate + via mocks (requires a full python-telegram-bot ApplicationBuilder + chain). These tests exercise it via source inspection — verifying the + check exists, raises RuntimeError with the advisory link, and only + fires in webhook mode. End-to-end validation is covered by CI + + manual deployment tests. + """ + + def _get_source(self) -> str: + path = Path(_repo) / "gateway" / "platforms" / "telegram.py" + return path.read_text(encoding="utf-8") + + def test_webhook_branch_checks_secret(self): + """The webhook-mode branch of connect() must read + TELEGRAM_WEBHOOK_SECRET and refuse when empty.""" + src = self._get_source() + # The guard must appear after TELEGRAM_WEBHOOK_URL is set + assert re.search( + r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:', + src, re.DOTALL, + ), ( + "TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET " + "and raise when the secret is empty — see GHSA-3vpc-7q5r-276h" + ) + + def test_guard_raises_runtime_error(self): + """The guard raises RuntimeError (not a silent log) so operators + see the failure at startup.""" + src = self._get_source() + # Between the "if not webhook_secret:" line and the next blank + # line block, we should see a RuntimeError being raised + guard_match = re.search( + r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(', + src, + ) + assert guard_match, ( + "Missing webhook secret must raise RuntimeError — silent " + "fall-through was the original GHSA-3vpc-7q5r-276h bypass" + ) + + def test_guard_message_includes_advisory_link(self): + """The RuntimeError message should reference the advisory so + operators can read the full context.""" + src = self._get_source() + assert "GHSA-3vpc-7q5r-276h" in src, ( + "Guard error message must cite the advisory for operator context" + ) + + def test_guard_message_explains_remediation(self): + """The error should tell the operator how to fix it.""" + src = self._get_source() + # Should mention how to generate a secret + assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, ( + "Guard error message should show operators how to set " + "TELEGRAM_WEBHOOK_SECRET" + ) + + def test_polling_branch_has_no_secret_guard(self): + """Polling mode (else-branch) must NOT require the webhook secret — + polling authenticates via the bot token, not a webhook secret.""" + src = self._get_source() + # The guard should appear inside the `if webhook_url:` branch, + # not the `else:` polling branch. Rough check: the raise is + # followed (within ~60 lines) by an `else:` that starts the + # polling branch, and there's no secret-check in that polling + # branch. + webhook_block = re.search( + r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n', + src, re.DOTALL, + ) + if webhook_block: + webhook_body = webhook_block.group(1) + polling_body = webhook_block.group(2) + assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body + assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py index 2915810891..feced75b25 100644 --- a/tests/gateway/test_usage_command.py +++ b/tests/gateway/test_usage_command.py @@ -175,3 +175,79 @@ class TestUsageCachedAgent: result = await runner._handle_usage_command(event) assert "Cost: included" in result + + +class TestUsageAccountSection: + """Account-limits section appended to /usage output (PR #2486).""" + + @pytest.mark.asyncio + async def test_usage_command_includes_account_section(self, monkeypatch): + agent = _make_mock_agent(provider="openai-codex") + agent.base_url = "https://chatgpt.com/backend-api/codex" + agent.api_key = "unused" + runner = _make_runner(SK, cached_agent=agent) + event = MagicMock() + + monkeypatch.setattr( + "gateway.run.fetch_account_usage", + lambda provider, base_url=None, api_key=None: object(), + ) + monkeypatch.setattr( + "gateway.run.render_account_usage_lines", + lambda snapshot, markdown=False: [ + "📈 **Account limits**", + "Provider: openai-codex (Pro)", + "Session: 85% remaining (15% used)", + ], + ) + with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \ + patch("agent.usage_pricing.estimate_usage_cost") as mock_cost: + mock_cost.return_value = MagicMock(amount_usd=None, status="included") + result = await runner._handle_usage_command(event) + + assert "📊 **Session Token Usage**" in result + assert "📈 **Account limits**" in result + assert "Provider: openai-codex (Pro)" in result + + @pytest.mark.asyncio + async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch): + runner = _make_runner(SK) + runner._session_db = MagicMock() + runner._session_db.get_session.return_value = { + "billing_provider": "openai-codex", + "billing_base_url": "https://chatgpt.com/backend-api/codex", + } + session_entry = MagicMock() + session_entry.session_id = "sess-1" + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = [ + {"role": "user", "content": "earlier"}, + ] + + calls = {} + + async def _fake_to_thread(fn, *args, **kwargs): + calls["args"] = args + calls["kwargs"] = kwargs + return fn(*args, **kwargs) + + monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread) + monkeypatch.setattr( + "gateway.run.fetch_account_usage", + lambda provider, base_url=None, api_key=None: object(), + ) + monkeypatch.setattr( + "gateway.run.render_account_usage_lines", + lambda snapshot, markdown=False: [ + "📈 **Account limits**", + "Provider: openai-codex (Pro)", + ], + ) + + event = MagicMock() + result = await runner._handle_usage_command(event) + + assert calls["args"] == ("openai-codex",) + assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex" + assert "📊 **Session Info**" in result + assert "📈 **Account limits**" in result diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index c56edc4bb2..2af003ea08 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation: leaked = set(moonshot_models) & coding_plan_only assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}" - def test_moonshot_list_contains_shared_models(self): + def test_moonshot_list_non_empty(self): from hermes_cli.main import _PROVIDER_MODELS - moonshot_models = _PROVIDER_MODELS["moonshot"] - assert "kimi-k2.5" in moonshot_models - assert "kimi-k2-thinking" in moonshot_models + assert len(_PROVIDER_MODELS["moonshot"]) >= 1 - def test_coding_plan_list_contains_plan_specific_models(self): + def test_coding_plan_list_non_empty(self): from hermes_cli.main import _PROVIDER_MODELS - coding_models = _PROVIDER_MODELS["kimi-coding"] - assert "kimi-for-coding" in coding_models - assert "kimi-k2-thinking-turbo" in coding_models + assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1 # ============================================================================= @@ -944,14 +940,12 @@ class TestHuggingFaceModels: def test_main_provider_models_has_huggingface(self): from hermes_cli.main import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 6, "Expected at least 6 curated HF models" + assert len(_PROVIDER_MODELS["huggingface"]) >= 1 def test_models_py_has_huggingface(self): from hermes_cli.models import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 6 + assert len(_PROVIDER_MODELS["huggingface"]) >= 1 def test_model_lists_match(self): """Model lists in main.py and models.py should be identical.""" diff --git a/tests/hermes_cli/test_arcee_provider.py b/tests/hermes_cli/test_arcee_provider.py index a3dd6e1e67..c1437b6d60 100644 --- a/tests/hermes_cli/test_arcee_provider.py +++ b/tests/hermes_cli/test_arcee_provider.py @@ -115,12 +115,12 @@ class TestArceeCredentials: class TestArceeModelCatalog: def test_static_model_list(self): + """Arcee has a static _PROVIDER_MODELS catalog entry. Specific model + names change with releases and don't belong in tests. + """ from hermes_cli.models import _PROVIDER_MODELS assert "arcee" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["arcee"] - assert "trinity-large-thinking" in models - assert "trinity-large-preview" in models - assert "trinity-mini" in models + assert len(_PROVIDER_MODELS["arcee"]) >= 1 def test_canonical_provider_entry(self): from hermes_cli.models import CANONICAL_PROVIDERS diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py index 5b0d9062b9..fb749b6ae7 100644 --- a/tests/hermes_cli/test_auth_commands.py +++ b/tests/hermes_cli/test_auth_commands.py @@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch): # Verify the auth store was NOT modified (no auto-import happened) after = json.loads((hermes_home / "auth.json").read_text()) assert "openai-codex" not in after.get("providers", {}) + + +def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys): + """`hermes auth remove xai 1` must stick even when the env var is exported + by the shell (not written into ~/.hermes/.env). Before PR for #13371 the + removal silently restored on next load_pool() because _seed_from_env() + re-read os.environ. Now env: is suppressed in auth.json. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Simulate shell export (NOT written to .env) + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + (hermes_home / ".env").write_text("") + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "xai": [{ + "id": "env-1", + "label": "XAI_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:XAI_API_KEY", + "access_token": "sk-xai-shell-export", + "base_url": "https://api.x.ai/v1", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth_commands import auth_remove_command + auth_remove_command(SimpleNamespace(provider="xai", target="1")) + + # Suppression marker written + after = json.loads((hermes_home / "auth.json").read_text()) + assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", []) + + # Diagnostic printed pointing at the shell + out = capsys.readouterr().out + assert "still set in your shell environment" in out + assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env + + # Fresh simulation: shell re-exports, reload pool + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + from agent.credential_pool import load_pool + pool = load_pool("xai") + assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed" + + +def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys): + """When the env var lives only in ~/.hermes/.env (not the shell), the + shell-hint should NOT be printed — avoid scaring the user about a + non-existent shell export. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Key ONLY in .env, shell must not have it + monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False) + (hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n") + # Mimic load_env() populating os.environ + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only") + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "deepseek": [{ + "id": "env-1", + "label": "DEEPSEEK_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:DEEPSEEK_API_KEY", + "access_token": "sk-ds-only", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth_commands import auth_remove_command + auth_remove_command(SimpleNamespace(provider="deepseek", target="1")) + + out = capsys.readouterr().out + assert "Cleared DEEPSEEK_API_KEY from .env" in out + assert "still set in your shell environment" not in out + assert (hermes_home / ".env").read_text().strip() == "" + + +def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch): + """Re-adding a credential via `hermes auth add ` clears any + env: suppression marker — strong signal the user wants auth back. + Matches the Codex device_code re-link behaviour. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("XAI_API_KEY", raising=False) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {}, + "suppressed_sources": {"xai": ["env:XAI_API_KEY"]}, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_add_command + + assert is_source_suppressed("xai", "env:XAI_API_KEY") is True + auth_add_command(SimpleNamespace( + provider="xai", auth_type="api_key", + api_key="sk-xai-manual", label="manual", + )) + assert is_source_suppressed("xai", "env:XAI_API_KEY") is False + + +def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch): + """_seed_from_env() must skip env: sources that the user suppressed + via `hermes auth remove`. This is the gate that prevents shell-exported + keys from resurrecting removed credentials. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"xai": ["env:XAI_API_KEY"]}, + })) + + from agent.credential_pool import _seed_from_env + + entries = [] + changed, active = _seed_from_env("xai", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch): + """OpenRouter is the special-case branch in _seed_from_env; verify it + honours suppression too. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]}, + })) + + from agent.credential_pool import _seed_from_env + + entries = [] + changed, active = _seed_from_env("openrouter", entries) + assert changed is False + assert entries == [] + assert active == set() + + +# ============================================================================= +# Unified credential-source stickiness — every source Hermes reads from has a +# registered RemovalStep in agent.credential_sources, and every seeding path +# gates on is_source_suppressed. Below: one test per source proving remove +# sticks across a fresh load_pool() call. +# ============================================================================= + + +def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch): + """nous device_code must not re-seed from auth.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}}, + "suppressed_sources": {"nous": ["device_code"]}, + })) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("nous", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch): + """copilot gh_cli must not re-seed when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"copilot": ["gh_cli"]}, + })) + + # Stub resolve_copilot_token to return a live token + import hermes_cli.copilot_auth as ca + monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token")) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("copilot", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch): + """qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"qwen-oauth": ["qwen-cli"]}, + })) + + import hermes_cli.auth as ha + monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: { + "api_key": "tok", "source": "qwen-cli", "base_url": "https://q", + }) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("qwen-oauth", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch): + """anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import yaml + (hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}})) + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"anthropic": ["hermes_pkce"]}, + })) + + # Stub the readers so only hermes_pkce is "available"; claude_code returns None + import agent.anthropic_adapter as aa + monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: { + "accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000, + }) + monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("anthropic", entries) + # hermes_pkce suppressed, claude_code returns None → nothing should be seeded + assert entries == [] + assert "hermes_pkce" not in active + + +def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch): + """Custom provider config: source must not re-seed when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import yaml + (hermes_home / "config.yaml").write_text(yaml.dump({ + "model": {}, + "custom_providers": [ + {"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"}, + ], + })) + + from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key + pool_key = get_custom_provider_pool_key("https://c.example.com") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {pool_key: ["config:my"]}, + })) + + entries = [] + changed, active = _seed_custom_pool(pool_key, entries) + assert changed is False + assert entries == [] + assert "config:my" not in active + + +def test_credential_sources_registry_has_expected_steps(): + """Sanity check — the registry contains the expected RemovalSteps. + + Guards against accidentally dropping a step during future refactors. + If you add a new credential source, add it to the expected set below. + """ + from agent.credential_sources import _REGISTRY + + descriptions = {step.description for step in _REGISTRY} + expected = { + "gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN", + "Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)", + "~/.claude/.credentials.json", + "~/.hermes/.anthropic_oauth.json", + "auth.json providers.nous", + "auth.json providers.openai-codex + ~/.codex/auth.json", + "~/.qwen/oauth_creds.json", + "Custom provider config.yaml api_key field", + } + assert descriptions == expected, f"Registry mismatch. Got: {descriptions}" + + +def test_credential_sources_find_step_returns_none_for_manual(): + """Manual entries have nothing external to clean up — no step registered.""" + from agent.credential_sources import find_removal_step + assert find_removal_step("openrouter", "manual") is None + assert find_removal_step("xai", "manual") is None + + +def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch): + """copilot env:GH_TOKEN must dispatch to the copilot step, not the + generic env-var step. The copilot step handles the duplicate-source + problem (same token seeded as both gh_cli and env:); the generic + env step would only suppress one of the variants. + """ + from agent.credential_sources import find_removal_step + + step = find_removal_step("copilot", "env:GH_TOKEN") + assert step is not None + assert "copilot" in step.description.lower() or "gh" in step.description.lower() + + # Generic step still matches any other provider's env var + step = find_removal_step("xai", "env:XAI_API_KEY") + assert step is not None + assert "env-seeded" in step.description.lower() + + +def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch): + """Removing any copilot source must suppress gh_cli + all env:* variants + so the duplicate-seed paths don't resurrect the credential. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "copilot": [{ + "id": "c1", + "label": "gh auth token", + "auth_type": "api_key", + "priority": 0, + "source": "gh_cli", + "access_token": "ghp_fake", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_remove_command + + auth_remove_command(SimpleNamespace(provider="copilot", target="1")) + + assert is_source_suppressed("copilot", "gh_cli") + assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN") + assert is_source_suppressed("copilot", "env:GH_TOKEN") + assert is_source_suppressed("copilot", "env:GITHUB_TOKEN") + + +def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch): + """Re-adding a credential via `hermes auth add ` clears ALL + suppression markers for the provider, not just env:*. This matches + the single "re-engage" semantic — the user wants auth back, period. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {}, + "suppressed_sources": { + "copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"], + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_add_command + + auth_add_command(SimpleNamespace( + provider="copilot", auth_type="api_key", + api_key="ghp-manual", label="m", + )) + + assert not is_source_suppressed("copilot", "gh_cli") + assert not is_source_suppressed("copilot", "env:GH_TOKEN") + assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN") + + +def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch): + """Removing a manual:device_code entry (from `hermes auth add openai-codex`) + must suppress the canonical ``device_code`` key, not ``manual:device_code``. + The re-seed gate in _seed_from_singletons checks ``device_code``. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}}, + "credential_pool": { + "openai-codex": [{ + "id": "cdx", + "label": "manual-codex", + "auth_type": "oauth", + "priority": 0, + "source": "manual:device_code", + "access_token": "t", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_remove_command + + auth_remove_command(SimpleNamespace(provider="openai-codex", target="1")) + assert is_source_suppressed("openai-codex", "device_code") diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 8c94902e68..5c719cbc21 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -459,7 +459,8 @@ class TestCustomProviderCompatibility: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["providers"]["openai-direct"] == { "api": "https://api.openai.com/v1", "api_key": "test-key", @@ -501,7 +502,8 @@ class TestCustomProviderCompatibility: assert compatible[0]["provider_key"] == "openai-direct" assert compatible[0]["api_mode"] == "codex_responses" - def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path): + def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path): + """URL field precedence is base_url > url > api (PR #9332).""" config_path = tmp_path / "config.yaml" config_path.write_text( yaml.safe_dump( @@ -526,7 +528,7 @@ class TestCustomProviderCompatibility: assert compatible == [ { "name": "My Provider", - "base_url": "https://api.example.com/v1", + "base_url": "https://base.example.com/v1", "provider_key": "my-provider", } ] @@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["display"]["tool_progress"] == "off" assert raw["display"]["interim_assistant_messages"] is True @@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["discord"]["auto_thread"] is True assert raw["discord"]["channel_prompts"] == {} diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index 7f9348be43..1daeb281f0 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -125,18 +125,12 @@ class TestGeminiCredentials: # ── Model Catalog ── class TestGeminiModelCatalog: - def test_provider_models_exist(self): + def test_provider_entry_exists(self): + """Gemini provider has a model catalog entry. Specific model names + are data that changes with Google releases and don't belong in tests. + """ assert "gemini" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["gemini"] - assert "gemini-2.5-pro" in models - assert "gemini-2.5-flash" in models - assert "gemma-4-31b-it" not in models - - def test_provider_models_has_3x(self): - models = _PROVIDER_MODELS["gemini"] - assert "gemini-3.1-pro-preview" in models - assert "gemini-3-flash-preview" in models - assert "gemini-3.1-flash-lite-preview" in models + assert len(_PROVIDER_MODELS["gemini"]) >= 1 def test_provider_label(self): assert "gemini" in _PROVIDER_LABELS diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 65405d909f..72ffc5216d 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -457,29 +457,62 @@ class TestValidateApiNotFound: assert "not found" in result["message"] -# -- validate — API unreachable — reject with guidance ---------------- +# -- validate — API unreachable — soft-accept via catalog or warning -------- class TestValidateApiFallback: - def test_any_model_rejected_when_api_down(self): - result = _validate("anthropic/claude-opus-4.6", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + """When /models is unreachable, the validator must accept the model (with + a warning) rather than reject it outright — otherwise provider switches + fail in the gateway for any provider whose /models endpoint is down or + doesn't exist (e.g. opencode-go returns 404 HTML). - def test_unknown_model_also_rejected_when_api_down(self): - result = _validate("anthropic/claude-next-gen", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False - assert "could not reach" in result["message"].lower() + Two paths: + 1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch): + validate against it (recognized=True for known models, + recognized=False with 'Note:' for unknown). + 2. Provider has no catalog: accept with a generic 'Note:' warning. - def test_zai_model_rejected_when_api_down(self): + In both cases ``accepted`` and ``persist`` must be True so the gateway can + write the ``_session_model_overrides`` entry. + """ + + def test_known_model_accepted_via_catalog_when_api_down(self): + # Force the openrouter catalog lookup to return a deterministic list. + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-opus-4.6", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + + def test_unknown_model_accepted_with_note_when_api_down(self): + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-next-gen", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # Message flags it as unverified against the catalog. + assert "not found" in result["message"].lower() or "note" in result["message"].lower() + + def test_zai_known_model_accepted_via_catalog_when_api_down(self): + # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]). result = _validate("glm-5", provider="zai", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True - def test_unknown_provider_rejected_when_api_down(self): - result = _validate("some-model", provider="totally-unknown", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + def test_unknown_provider_soft_accepted_when_api_down(self): + # No catalog for unknown providers — soft-accept with a Note. + with patch("hermes_cli.models.provider_model_ids", return_value=[]): + result = _validate("some-model", provider="totally-unknown", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "note" in result["message"].lower() def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self): with patch( diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py index fc86caeeb5..ea2f3057f4 100644 --- a/tests/hermes_cli/test_models.py +++ b/tests/hermes_cli/test_models.py @@ -88,6 +88,131 @@ class TestFetchOpenRouterModels: assert models == OPENROUTER_MODELS + def test_filters_out_models_without_tool_support(self, monkeypatch): + """Models whose supported_parameters omits 'tools' must not appear in the picker. + + hermes-agent is tool-calling-first — surfacing a non-tool model leads to + immediate runtime failures when the user selects it. Ported from + Kilo-Org/kilocode#9068. + """ + class _Resp: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self): + # opus-4.6 advertises tools → kept + # nano-image has explicit supported_parameters that OMITS tools → dropped + # qwen3.6-plus advertises tools → kept + return ( + b'{"data":[' + b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},' + b'"supported_parameters":["temperature","tools","tool_choice"]},' + b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},' + b'"supported_parameters":["temperature","response_format"]},' + b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},' + b'"supported_parameters":["tools","temperature"]}' + b']}' + ) + + # Include the image-only id in the curated list so it has a chance to be surfaced. + monkeypatch.setattr( + _models_mod, + "OPENROUTER_MODELS", + [ + ("anthropic/claude-opus-4.6", ""), + ("google/gemini-3-pro-image-preview", ""), + ("qwen/qwen3.6-plus", ""), + ], + ) + monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None) + with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()): + models = fetch_openrouter_models(force_refresh=True) + + ids = [mid for mid, _ in models] + assert "anthropic/claude-opus-4.6" in ids + assert "qwen/qwen3.6-plus" in ids + # Image-only model advertised supported_parameters WITHOUT tools → must be dropped. + assert "google/gemini-3-pro-image-preview" not in ids + + def test_permissive_when_supported_parameters_missing(self, monkeypatch): + """Models missing the supported_parameters field keep appearing in the picker. + + Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older + catalog snapshots) don't populate supported_parameters. Treating missing + as 'unknown → allow' prevents the picker from silently emptying on + those gateways. + """ + class _Resp: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self): + # No supported_parameters field at all on either entry. + return ( + b'{"data":[' + b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},' + b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}' + b']}' + ) + + monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None) + with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()): + models = fetch_openrouter_models(force_refresh=True) + + ids = [mid for mid, _ in models] + assert "anthropic/claude-opus-4.6" in ids + assert "qwen/qwen3.6-plus" in ids + + +class TestOpenRouterToolSupportHelper: + """Unit tests for _openrouter_model_supports_tools (Kilo port #9068).""" + + def test_tools_in_supported_parameters(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": ["temperature", "tools"]} + ) is True + + def test_tools_missing_from_supported_parameters(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": ["temperature", "response_format"]} + ) is False + + def test_supported_parameters_absent_is_permissive(self): + """Missing field → allow (so older / non-OR gateways still work).""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools({"id": "x"}) is True + + def test_supported_parameters_none_is_permissive(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True + + def test_supported_parameters_malformed_is_permissive(self): + """Malformed (non-list) value → allow rather than silently drop.""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": "tools,temperature"} + ) is True + + def test_non_dict_item_is_permissive(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools(None) is True + assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True + + def test_empty_supported_parameters_list_drops_model(self): + """Explicit empty list → no tools → drop.""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": []} + ) is False + class TestFindOpenrouterSlug: def test_exact_match(self): diff --git a/tests/hermes_cli/test_opencode_go_in_model_list.py b/tests/hermes_cli/test_opencode_go_in_model_list.py index a84701f09c..647ee2bee8 100644 --- a/tests/hermes_cli/test_opencode_go_in_model_list.py +++ b/tests/hermes_cli/test_opencode_go_in_model_list.py @@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set(): opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None) assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set" - assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] + assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when # the API is unavailable, e.g. in CI). diff --git a/tests/hermes_cli/test_opencode_go_validation_fallback.py b/tests/hermes_cli/test_opencode_go_validation_fallback.py new file mode 100644 index 0000000000..f0ae76098e --- /dev/null +++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py @@ -0,0 +1,133 @@ +"""Tests for the static-catalog fallback in validate_requested_model. + +OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do +NOT expose ``/models`` (the path returns the marketing site's HTML 404). This +caused ``validate_requested_model`` to return ``accepted=False`` for every +model on those providers, which in turn made ``switch_model()`` fail and the +gateway's ``/model --provider opencode-go`` command never write to +``_session_model_overrides``. + +These tests cover the catalog-fallback path: when ``fetch_api_models`` returns +``None``, the validator must consult ``provider_model_ids()`` for the provider +(populated from ``_PROVIDER_MODELS``) rather than rejecting outright. +""" + +from unittest.mock import patch + +from hermes_cli.models import validate_requested_model + + +_UNREACHABLE_PROBE = { + "models": None, + "probed_url": "https://opencode.ai/zen/go/v1/models", + "resolved_base_url": "https://opencode.ai/zen/go/v1", + "suggested_base_url": None, + "used_fallback": False, +} + + +def _patched(func): + """Decorator: force fetch_api_models / probe_api_models to simulate an + unreachable /models endpoint, proving the catalog path is used.""" + def wrapper(*args, **kwargs): + with patch("hermes_cli.models.fetch_api_models", return_value=None), \ + patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE): + return func(*args, **kwargs) + wrapper.__name__ = func.__name__ + return wrapper + + +# --------------------------------------------------------------------------- +# opencode-go: curated catalog in _PROVIDER_MODELS +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_go_known_model_accepted(): + """A model present in the opencode-go curated catalog must be accepted + even when /models is unreachable.""" + result = validate_requested_model("kimi-k2.6", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + assert result["message"] is None + + +@_patched +def test_opencode_go_known_model_case_insensitive(): + """Catalog lookup is case-insensitive.""" + result = validate_requested_model("KIMI-K2.6", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + + +@_patched +def test_opencode_go_typo_auto_corrected(): + """A close typo (>= 0.9 similarity) is auto-corrected to the catalog + entry.""" + # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff. + result = validate_requested_model("kimi-k2.55", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + assert result.get("corrected_model") == "kimi-k2.5" + + +@_patched +def test_opencode_go_unknown_model_accepted_with_suggestion(): + """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9) + is accepted with recognized=False and a 'similar models' hint. The key + invariant: the gateway MUST be able to persist this override, so + accepted/persist must both be True.""" + # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct. + result = validate_requested_model("kimi-k3-preview", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "kimi-k3-preview" in result["message"] + assert "curated catalog" in result["message"] + + +@_patched +def test_opencode_go_totally_unknown_model_still_accepted(): + """A model with zero similarity to the catalog is still accepted (no + suggestion line) so the user can try a model that hasn't made it into the + curated list yet.""" + result = validate_requested_model("some-brand-new-model", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # No suggestion text (no close matches) + assert "Similar models" not in result["message"] + assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower() + + +# --------------------------------------------------------------------------- +# opencode-zen: same pattern as opencode-go +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_zen_known_model_accepted(): + """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog.""" + result = validate_requested_model("kimi-k2", "opencode-zen") + assert result["accepted"] is True + assert result["recognized"] is True + + +# --------------------------------------------------------------------------- +# Unknown provider with no catalog: soft-accept (honors the comment's intent) +# --------------------------------------------------------------------------- + + +@_patched +def test_provider_without_catalog_accepts_with_warning(): + """When a provider has no entry in _PROVIDER_MODELS and /models is + unreachable, accept the model with a 'Note:' warning rather than reject. + This matches the in-code comment: 'Accept and persist, but warn so typos + don't silently break things.'""" + # Use a made-up provider name that won't resolve to any catalog. + result = validate_requested_model("some-model", "provider-that-does-not-exist") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "Note:" in result["message"] diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index c7510a55b8..9d2232f39c 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch): resolved = rp.resolve_runtime_provider(requested="my-server") assert "model" not in resolved + + +# --------------------------------------------------------------------------- +# GHSA-76xc-57q6-vm5m — Ollama URL substring leak +# +# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter). +# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY +# when the base_url "looks like" ollama.com. Previous implementation used +# raw substring match; a custom base_url whose PATH or look-alike host +# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint. +# Fix: use base_url_host_matches (same helper as the OpenRouter sweep). +# --------------------------------------------------------------------------- + +class TestOllamaUrlSubstringLeak: + """Call-site regression tests for the fix in _resolve_openrouter_runtime.""" + + def _make_cfg(self, base_url): + return {"base_url": base_url, "api_key": "", "provider": "custom"} + + def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch): + """http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with + ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "http://127.0.0.1:9000/ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert "ol-SECRET" not in resolved["api_key"], ( + "OLLAMA_API_KEY must not be sent to an endpoint whose " + "hostname is not ollama.com (GHSA-76xc-57q6-vm5m)" + ) + assert resolved["api_key"] == "oa-secret" + + def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch): + """ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY + must not be sent.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "http://ollama.com.attacker.test:9000/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert "ol-SECRET" not in resolved["api_key"] + assert resolved["api_key"] == "oa-secret" + + def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch): + """https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY + should be used.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "ol-legit-key" + + def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch): + """https://api.ollama.com/v1 — legit subdomain.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://api.ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "ol-legit-key" diff --git a/tests/hermes_cli/test_web_server_host_header.py b/tests/hermes_cli/test_web_server_host_header.py new file mode 100644 index 0000000000..966127b05c --- /dev/null +++ b/tests/hermes_cli/test_web_server_host_header.py @@ -0,0 +1,148 @@ +"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation. + +DNS rebinding defence: a victim browser that has the dashboard open +could be tricked into fetching from an attacker-controlled hostname +that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help — +the browser now treats the attacker origin as same-origin. Validating +the Host header at the application layer rejects the attack. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_repo = str(Path(__file__).resolve().parents[1]) +if _repo not in sys.path: + sys.path.insert(0, _repo) + + +class TestHostHeaderValidator: + """Unit test the _is_accepted_host helper directly — cheaper and + more thorough than spinning up the full FastAPI app.""" + + def test_loopback_bind_accepts_loopback_names(self): + from hermes_cli.web_server import _is_accepted_host + + for bound in ("127.0.0.1", "localhost", "::1"): + for host_header in ( + "127.0.0.1", "127.0.0.1:9119", + "localhost", "localhost:9119", + "[::1]", "[::1]:9119", + ): + assert _is_accepted_host(host_header, bound), ( + f"bound={bound} must accept host={host_header}" + ) + + def test_loopback_bind_rejects_attacker_hostnames(self): + """The core rebinding defence: attacker-controlled hosts that + TTL-flip to 127.0.0.1 must be rejected.""" + from hermes_cli.web_server import _is_accepted_host + + for bound in ("127.0.0.1", "localhost"): + for attacker in ( + "evil.example", + "evil.example:9119", + "rebind.attacker.test:80", + "localhost.attacker.test", # subdomain trick + "127.0.0.1.evil.test", # lookalike IP prefix + "", # missing Host + ): + assert not _is_accepted_host(attacker, bound), ( + f"bound={bound} must reject attacker host={attacker!r}" + ) + + def test_zero_zero_bind_accepts_anything(self): + """0.0.0.0 means operator explicitly opted into all-interfaces + (requires --insecure). No Host-layer defence is possible — rely + on operator network controls.""" + from hermes_cli.web_server import _is_accepted_host + + for host in ("10.0.0.5", "evil.example", "my-server.corp.net"): + assert _is_accepted_host(host, "0.0.0.0") + assert _is_accepted_host(host + ":9119", "0.0.0.0") + + def test_explicit_non_loopback_bind_requires_exact_match(self): + """If the operator bound to a specific non-loopback hostname, + the Host header must match exactly.""" + from hermes_cli.web_server import _is_accepted_host + + assert _is_accepted_host("my-server.corp.net", "my-server.corp.net") + assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net") + # Different host — reject + assert not _is_accepted_host("evil.example", "my-server.corp.net") + # Loopback — reject (we bound to a specific non-loopback name) + assert not _is_accepted_host("localhost", "my-server.corp.net") + + def test_case_insensitive_comparison(self): + """Host headers are case-insensitive per RFC — accept variations.""" + from hermes_cli.web_server import _is_accepted_host + + assert _is_accepted_host("LOCALHOST", "127.0.0.1") + assert _is_accepted_host("LocalHost:9119", "127.0.0.1") + + +class TestHostHeaderMiddleware: + """End-to-end test via the FastAPI app — verify the middleware + rejects bad Host headers with 400.""" + + def test_rebinding_request_rejected(self): + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + # Simulate start_server having set the bound_host + app.state.bound_host = "127.0.0.1" + try: + client = TestClient(app) + # The TestClient sends Host: testserver by default — which is + # NOT a loopback alias, so the middleware must reject it. + resp = client.get( + "/api/status", + headers={"Host": "evil.example"}, + ) + assert resp.status_code == 400 + assert "Invalid Host header" in resp.json()["detail"] + finally: + # Clean up so other tests don't inherit the bound_host + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + def test_legit_loopback_request_accepted(self): + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + app.state.bound_host = "127.0.0.1" + try: + client = TestClient(app) + # /api/status is in _PUBLIC_API_PATHS — passes auth — so the + # only thing that can reject is the host header middleware + resp = client.get( + "/api/status", + headers={"Host": "localhost:9119"}, + ) + # Either 200 (endpoint served) or some other non-400 — + # just not the host-rejection 400 + assert resp.status_code != 400 or ( + "Invalid Host header" not in resp.json().get("detail", "") + ) + finally: + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + def test_no_bound_host_skips_validation(self): + """If app.state.bound_host isn't set (e.g. running under test + infra without calling start_server), middleware must pass through + rather than crash.""" + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + # Make sure bound_host isn't set + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + client = TestClient(app) + resp = client.get("/api/status") + # Should get through to the status endpoint, not a 400 + assert resp.status_code != 400 diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py index 57e5bdda85..f26740483c 100644 --- a/tests/hermes_cli/test_xiaomi_provider.py +++ b/tests/hermes_cli/test_xiaomi_provider.py @@ -136,13 +136,15 @@ class TestXiaomiModelCatalog: assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi" def test_static_model_list_fallback(self): - """Static _PROVIDER_MODELS fallback must exist for model picker.""" + """Static _PROVIDER_MODELS fallback must exist for model picker. + + We only assert the provider key is present — the specific model + names are data that changes with upstream releases and doesn't + belong in tests. + """ from hermes_cli.models import _PROVIDER_MODELS assert "xiaomi" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["xiaomi"] - assert "mimo-v2-pro" in models - assert "mimo-v2-omni" in models - assert "mimo-v2-flash" in models + assert len(_PROVIDER_MODELS["xiaomi"]) >= 1 def test_list_agentic_models_mock(self, monkeypatch): """When models.dev returns Xiaomi data, list_agentic_models should return models.""" diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 7d5a166544..7a85022a5c 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider: assert agent._anthropic_prompt_cache_policy() == (False, False) +class TestQwenAlibabaFamily: + """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire. + + Upstream pi-mono #3392 / #3393 documented that these providers serve + zero cache hits without Anthropic-style markers. Regression reported + by community user (Qwen3.6 on opencode-go burning through + subscription with no cache). Envelope layout, not native, because the + wire format is OpenAI chat.completions. + """ + + def test_qwen_on_opencode_go_caches_with_envelope_layout(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.6-plus", + ) + should, native = agent._anthropic_prompt_cache_policy() + assert should is True, "Qwen on opencode-go must cache" + assert native is False, "opencode-go is OpenAI-wire; envelope layout" + + def test_qwen35_plus_on_opencode_go(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.5-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_opencode_zen_caches(self): + agent = _make_agent( + provider="opencode", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3-coder-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_direct_alibaba_caches(self): + agent = _make_agent( + provider="alibaba", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + api_mode="chat_completions", + model="qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_non_qwen_on_opencode_go_does_not_cache(self): + # GLM / Kimi on opencode-go don't need markers (they have automatic + # server-side caching or none at all). + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="glm-5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_kimi_on_opencode_go_does_not_cache(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="kimi-k2.5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_qwen_on_openrouter_not_affected(self): + # Qwen via OpenRouter falls through — OpenRouter has its own + # upstream caching arrangement for Qwen (provider-dependent). + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="qwen/qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + class TestExplicitOverrides: """Policy accepts keyword overrides for switch_model / fallback activation.""" diff --git a/tests/run_agent/test_create_openai_client_proxy_env.py b/tests/run_agent/test_create_openai_client_proxy_env.py index 7ac9b7e16e..9ef8e3dcd1 100644 --- a/tests/run_agent/test_create_openai_client_proxy_env.py +++ b/tests/run_agent/test_create_openai_client_proxy_env.py @@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch): assert _get_proxy_from_env() == "http://real-proxy:8080" +def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/" + + @patch("run_agent.OpenAI") def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch): """With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool. diff --git a/tests/run_agent/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py index ed1f21bfa1..9dd8ce327e 100644 --- a/tests/run_agent/test_interrupt_propagation.py +++ b/tests/run_agent/test_interrupt_propagation.py @@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase): agent._active_children = [] agent._active_children_lock = threading.Lock() agent.quiet_mode = True + # Provider/model/base_url are read by stale-timeout resolution paths; + # the specific values don't matter for interrupt tests. + agent.provider = "openrouter" + agent.model = "test/model" + agent._base_url = "http://localhost:1234" return agent def test_parent_interrupt_sets_child_flag(self): diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9f3341101a..e7a96e5dee 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -952,6 +952,84 @@ class TestBuildApiKwargs: assert "temperature" not in kwargs + def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): + """Kimi endpoint should send max_tokens=32000 and reasoning_effort as + top-level params, matching Kimi CLI's default behavior.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + + def test_kimi_coding_endpoint_respects_custom_effort(self, agent): + """reasoning_effort should reflect reasoning_config.effort when set.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": True, "effort": "high"} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["reasoning_effort"] == "high" + + def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent): + """Kimi endpoint should send extra_body.thinking={"type":"enabled"} + to activate reasoning mode, mirroring Kimi CLI's with_thinking().""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_kimi_coding_endpoint_disables_thinking(self, agent): + """When reasoning_config.enabled=False, thinking should be disabled + and reasoning_effort should be omitted entirely — mirroring Kimi + CLI's with_thinking("off") which maps to reasoning_effort=None.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": False} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in kwargs + + def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.ai should get the same Kimi-compatible params.""" + agent.base_url = "https://api.moonshot.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.cn (China endpoint) should get the same params.""" + agent.base_url = "https://api.moonshot.cn/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" agent.providers_allowed = ["Anthropic"] diff --git a/tests/test_account_usage.py b/tests/test_account_usage.py new file mode 100644 index 0000000000..072dc21c6f --- /dev/null +++ b/tests/test_account_usage.py @@ -0,0 +1,203 @@ +from datetime import datetime, timezone + +from agent.account_usage import ( + AccountUsageSnapshot, + AccountUsageWindow, + fetch_account_usage, + render_account_usage_lines, +) + + +class _Response: + def __init__(self, payload, status_code=200): + self._payload = payload + self.status_code = status_code + + def raise_for_status(self): + if self.status_code >= 400: + raise RuntimeError(f"HTTP {self.status_code}") + + def json(self): + return self._payload + + +class _Client: + def __init__(self, payload): + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url, headers=None): + return _Response(self._payload) + + +class _RoutingClient: + def __init__(self, payloads): + self._payloads = payloads + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url, headers=None): + return _Response(self._payloads[url]) + + +def test_fetch_account_usage_codex(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_codex_runtime_credentials", + lambda refresh_if_expiring=True: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "access-token", + }, + ) + monkeypatch.setattr( + "agent.account_usage._read_codex_tokens", + lambda: {"tokens": {"account_id": "acct_123"}}, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=15.0: _Client( + { + "plan_type": "pro", + "rate_limit": { + "primary_window": { + "used_percent": 15, + "reset_at": 1_900_000_000, + "limit_window_seconds": 18000, + }, + "secondary_window": { + "used_percent": 40, + "reset_at": 1_900_500_000, + "limit_window_seconds": 604800, + }, + }, + "credits": {"has_credits": True, "balance": 12.5}, + } + ), + ) + + snapshot = fetch_account_usage("openai-codex") + + assert snapshot is not None + assert snapshot.plan == "Pro" + assert len(snapshot.windows) == 2 + assert snapshot.windows[0].label == "Session" + assert snapshot.windows[0].used_percent == 15.0 + assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc) + assert "Credits balance: $12.50" in snapshot.details + + +def test_render_account_usage_lines_includes_reset_and_provider(): + snapshot = AccountUsageSnapshot( + provider="openai-codex", + source="usage_api", + fetched_at=datetime.now(timezone.utc), + plan="Pro", + windows=( + AccountUsageWindow( + label="Session", + used_percent=25, + reset_at=datetime.now(timezone.utc), + ), + ), + details=("Credits balance: $9.99",), + ) + lines = render_account_usage_lines(snapshot) + + assert lines[0] == "📈 Account limits" + assert "openai-codex (Pro)" in lines[1] + assert "Session: 75% remaining (25% used)" in lines[2] + assert "Credits balance: $9.99" in lines[3] + + +def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_runtime_provider", + lambda requested, explicit_base_url=None, explicit_api_key=None: { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-test", + }, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=10.0: _RoutingClient( + { + "https://openrouter.ai/api/v1/credits": { + "data": {"total_credits": 300.0, "total_usage": 10.92} + }, + "https://openrouter.ai/api/v1/key": { + "data": { + "limit": 100.0, + "limit_remaining": 70.0, + "limit_reset": "monthly", + "usage": 12.5, + "usage_daily": 0.5, + "usage_weekly": 2.0, + "usage_monthly": 8.0, + "rate_limit": {"requests": -1, "interval": "10s"}, + } + }, + } + ), + ) + + snapshot = fetch_account_usage("openrouter") + + assert snapshot is not None + assert snapshot.windows == ( + AccountUsageWindow( + label="API key quota", + used_percent=30.0, + detail="$70.00 of $100.00 remaining • resets monthly", + ), + ) + assert "Credits balance: $289.08" in snapshot.details + assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details + assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot)) + + +def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_runtime_provider", + lambda requested, explicit_base_url=None, explicit_api_key=None: { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-test", + }, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=10.0: _RoutingClient( + { + "https://openrouter.ai/api/v1/credits": { + "data": {"total_credits": 100.0, "total_usage": 25.5} + }, + "https://openrouter.ai/api/v1/key": { + "data": { + "limit": None, + "limit_remaining": None, + "usage": 25.5, + "usage_daily": 1.25, + "usage_weekly": 4.5, + "usage_monthly": 18.0, + } + }, + } + ), + ) + + snapshot = fetch_account_usage("openrouter") + + assert snapshot is not None + assert snapshot.windows == () + assert "Credits balance: $74.50" in snapshot.details + assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details diff --git a/tests/test_base_url_hostname.py b/tests/test_base_url_hostname.py index 54aca08c02..cdf8450a25 100644 --- a/tests/test_base_url_hostname.py +++ b/tests/test_base_url_hostname.py @@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases: def test_trailing_dot_on_domain_stripped(self): assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True + + +class TestOllamaUrlHostCheck: + """GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for + credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter). + These tests lock in that the base_url_host_matches fix correctly rejects + the same attack vectors for Ollama. + """ + + def test_ollama_com_path_injection_rejected(self): + """http://evil.test/ollama.com/v1 — ollama.com appears in the path, + not the host. Must not be treated as Ollama Cloud.""" + assert base_url_host_matches( + "http://127.0.0.1:9000/ollama.com/v1", "ollama.com" + ) is False + + def test_ollama_com_subdomain_lookalike_rejected(self): + """ollama.com.attacker.test is a separate host, not ollama.com.""" + assert base_url_host_matches( + "http://ollama.com.attacker.test:9000/v1", "ollama.com" + ) is False + + def test_ollama_com_localtest_me_rejected(self): + """ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me + but its true hostname is localtest.me, not ollama.com.""" + assert base_url_host_matches( + "http://ollama.com.localtest.me:9000/v1", "ollama.com" + ) is False + + def test_ollama_ai_is_not_ollama_com(self): + """Different TLD. ollama.ai is not ollama.com.""" + assert base_url_host_matches( + "https://ollama.ai/v1", "ollama.com" + ) is False + + def test_localhost_ollama_port_is_not_ollama_com(self): + """http://localhost:11434/v1 is a local Ollama install, but its + hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret) + must not be sent.""" + assert base_url_host_matches( + "http://localhost:11434/v1", "ollama.com" + ) is False + + def test_genuine_ollama_com_matches(self): + assert base_url_host_matches( + "https://ollama.com/api/generate", "ollama.com" + ) is True + + def test_ollama_com_subdomain_matches(self): + assert base_url_host_matches( + "https://api.ollama.com/v1", "ollama.com" + ) is True diff --git a/tests/test_transform_tool_result_hook.py b/tests/test_transform_tool_result_hook.py index 159446fd57..508c0bdc0c 100644 --- a/tests/test_transform_tool_result_hook.py +++ b/tests/test_transform_tool_result_hook.py @@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch): def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path): """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results.""" + import yaml + hermes_home = Path(os.environ["HERMES_HOME"]) plugins_dir = hermes_home / "plugins" plugin_dir = plugins_dir / "transform_result_canon" @@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat 'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n', encoding="utf-8", ) + # Plugins are opt-in — must be listed in plugins.enabled to load. + cfg_path = hermes_home / "config.yaml" + cfg_path.write_text( + yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}), + encoding="utf-8", + ) + # Force a fresh plugin manager so the new config is picked up. + plugins_mod._plugin_manager = plugins_mod.PluginManager() plugins_mod.discover_plugins() out = _run_handle_function_call( diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py index f726dd777c..9ce3d13202 100644 --- a/tests/tools/test_browser_camofox_state.py +++ b/tests/tools/test_browser_camofox_state.py @@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults: browser_cfg = DEFAULT_CONFIG["browser"] assert browser_cfg["camofox"]["managed_persistence"] is False - - def test_config_version_matches_current_schema(self): - from hermes_cli.config import DEFAULT_CONFIG - - # The current schema version is tracked globally; unrelated default - # options may bump it after browser defaults are added. - assert DEFAULT_CONFIG["_config_version"] == 20 diff --git a/tests/tools/test_env_passthrough.py b/tests/tools/test_env_passthrough.py index 6e48ee5c30..eba84bdb2c 100644 --- a/tests/tools/test_env_passthrough.py +++ b/tests/tools/test_env_passthrough.py @@ -172,28 +172,60 @@ class TestTerminalIntegration: assert blocked_var not in result assert "PATH" in result - def test_passthrough_allows_blocklisted_var(self): - from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST + def test_passthrough_cannot_override_provider_blocklist(self): + """GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept + Hermes provider credentials — that was the bypass where a skill + could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and + defeat the execute_code sandbox scrubbing.""" + from tools.environments.local import ( + _sanitize_subprocess_env, + _HERMES_PROVIDER_ENV_BLOCKLIST, + ) blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST)) + # Attempt to register — must be silently refused (logged warning). register_env_passthrough([blocked_var]) + # is_env_passthrough must NOT report it as allowed + assert not is_env_passthrough(blocked_var) + + # Sanitizer still strips the var from subprocess env env = {blocked_var: "secret_value", "PATH": "/usr/bin"} result = _sanitize_subprocess_env(env) - assert blocked_var in result - assert result[blocked_var] == "secret_value" + assert blocked_var not in result + assert "PATH" in result - def test_make_run_env_passthrough(self, monkeypatch): - from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST + def test_make_run_env_blocklist_override_rejected(self): + """_make_run_env must NOT expose a blocklisted var to subprocess env + even after a skill attempts to register it via passthrough.""" + import os + from tools.environments.local import ( + _make_run_env, + _HERMES_PROVIDER_ENV_BLOCKLIST, + ) blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST)) - monkeypatch.setenv(blocked_var, "secret_value") + os.environ[blocked_var] = "secret_value" + try: + # Without passthrough — blocked + result_before = _make_run_env({}) + assert blocked_var not in result_before - # Without passthrough — blocked - result_before = _make_run_env({}) - assert blocked_var not in result_before + # Skill tries to register it — must be refused, so still blocked + register_env_passthrough([blocked_var]) + result_after = _make_run_env({}) + assert blocked_var not in result_after + finally: + os.environ.pop(blocked_var, None) - # With passthrough — allowed - register_env_passthrough([blocked_var]) - result_after = _make_run_env({}) - assert blocked_var in result_after + def test_non_hermes_api_key_still_registerable(self): + """Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT + Hermes provider credentials and must still pass through — skills + that legitimately wrap third-party APIs must keep working.""" + # TENOR_API_KEY is a real example — used by the gif-search skill + register_env_passthrough(["TENOR_API_KEY"]) + assert is_env_passthrough("TENOR_API_KEY") + + # Arbitrary skill-specific var + register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"]) + assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG") diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py index 7a03065f4e..3f7d315820 100644 --- a/tests/tools/test_fuzzy_match.py +++ b/tests/tools/test_fuzzy_match.py @@ -230,3 +230,102 @@ class TestEscapeDriftGuard: new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string) assert err is None assert count == 1 + + +class TestFindClosestLines: + def setup_method(self): + from tools.fuzzy_match import find_closest_lines + self.find_closest_lines = find_closest_lines + + def test_finds_similar_line(self): + content = "def foo():\n pass\ndef bar():\n return 1\n" + result = self.find_closest_lines("def baz():", content) + assert "def foo" in result or "def bar" in result + + def test_returns_empty_for_no_match(self): + content = "completely different content here" + result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content) + assert result == "" + + def test_returns_empty_for_empty_inputs(self): + assert self.find_closest_lines("", "some content") == "" + assert self.find_closest_lines("old string", "") == "" + + def test_includes_context_lines(self): + content = "line1\nline2\ndef target():\n pass\nline5\n" + result = self.find_closest_lines("def target():", content) + assert "target" in result + + def test_includes_line_numbers(self): + content = "line1\nline2\ndef foo():\n pass\n" + result = self.find_closest_lines("def foo():", content) + # Should include line numbers in format "N| content" + assert "|" in result + + +class TestFormatNoMatchHint: + """Gating tests for format_no_match_hint — the shared helper that decides + whether a 'Did you mean?' snippet should be appended to an error. + """ + + def setup_method(self): + from tools.fuzzy_match import format_no_match_hint + self.fmt = format_no_match_hint + + def test_fires_on_could_not_find_with_match(self): + """Classic no-match: similar content exists → hint fires.""" + content = "def foo():\n pass\ndef bar():\n pass\n" + result = self.fmt( + "Could not find a match for old_string in the file", + 0, "def baz():", content, + ) + assert "Did you mean" in result + assert "foo" in result or "bar" in result + + def test_silent_on_ambiguous_match_error(self): + """'Found N matches' is not a missing-match failure — no hint.""" + content = "aaa bbb aaa\n" + result = self.fmt( + "Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.", + 0, "aaa", content, + ) + assert result == "" + + def test_silent_on_escape_drift_error(self): + """Escape-drift errors are intentional blocks — hint would mislead.""" + content = "x = 1\n" + result = self.fmt( + "Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...", + 0, "x = \\'1\\'", content, + ) + assert result == "" + + def test_silent_on_identical_strings(self): + """old_string == new_string — hint irrelevant.""" + result = self.fmt( + "old_string and new_string are identical", + 0, "foo", "foo bar\n", + ) + assert result == "" + + def test_silent_when_match_count_nonzero(self): + """If match succeeded, we shouldn't be in the error path — defense in depth.""" + result = self.fmt( + "Could not find a match for old_string in the file", + 1, "foo", "foo bar\n", + ) + assert result == "" + + def test_silent_on_none_error(self): + """No error at all — no hint.""" + result = self.fmt(None, 0, "foo", "bar\n") + assert result == "" + + def test_silent_when_no_similar_content(self): + """Even for a valid no-match error, skip hint when nothing similar exists.""" + result = self.fmt( + "Could not find a match for old_string in the file", + 0, "totally_unique_xyzzy_qux", "abc\nxyz\n", + ) + assert result == "" + diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py new file mode 100644 index 0000000000..fc4e655334 --- /dev/null +++ b/tests/tools/test_image_generation_env.py @@ -0,0 +1,39 @@ +"""FAL_KEY env var normalization (whitespace-only treated as unset).""" + + +def test_fal_key_whitespace_is_unset(monkeypatch): + # Whitespace-only FAL_KEY must NOT register as configured, and the managed + # gateway fallback must be disabled for this assertion to be meaningful. + monkeypatch.setenv("FAL_KEY", " ") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is False + + +def test_fal_key_valid(monkeypatch): + monkeypatch.setenv("FAL_KEY", "sk-test") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is True + + +def test_fal_key_empty_is_unset(monkeypatch): + monkeypatch.setenv("FAL_KEY", "") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is False diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py new file mode 100644 index 0000000000..96e26e7357 --- /dev/null +++ b/tests/tools/test_local_shell_init.py @@ -0,0 +1,162 @@ +"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc. + +A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that +register themselves there (nvm, asdf, pyenv) stay invisible to the +environment snapshot built by ``LocalEnvironment.init_session``. These +tests verify the config-driven prelude that fixes that. +""" + +import os +from unittest.mock import patch + +import pytest + +from tools.environments.local import ( + LocalEnvironment, + _prepend_shell_init, + _read_terminal_shell_init_config, + _resolve_shell_init_files, +) + + +class TestResolveShellInitFiles: + def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export MARKER=seen\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + # Default config: auto_source_bashrc on, no explicit list. + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [str(bashrc)] + + def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch): + # No bashrc written. + monkeypatch.setenv("HOME", str(tmp_path)) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export MARKER=seen\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], False), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export FROM_BASHRC=1\n') + custom = tmp_path / "custom.sh" + custom.write_text('export FROM_CUSTOM=1\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + # auto_source_bashrc stays True but the explicit list takes precedence. + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(custom)], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [str(custom)] + assert str(bashrc) not in resolved + + def test_expands_home_and_env_vars(self, tmp_path, monkeypatch): + target = tmp_path / "rc" / "custom.sh" + target.parent.mkdir() + target.write_text('export A=1\n') + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc")) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=(["~/rc/custom.sh"], False), + ): + resolved_home = _resolve_shell_init_files() + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False), + ): + resolved_var = _resolve_shell_init_files() + + assert resolved_home == [str(target)] + assert resolved_var == [str(target)] + + def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch): + monkeypatch.setenv("HOME", str(tmp_path)) + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(tmp_path / "does-not-exist.sh")], False), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + +class TestPrependShellInit: + def test_empty_list_returns_command_unchanged(self): + assert _prepend_shell_init("echo hi", []) == "echo hi" + + def test_prepends_guarded_source_lines(self): + wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"]) + assert "echo hi" in wrapped + # Each file is sourced through a guarded [ -r … ] && . '…' || true + # pattern so a missing/broken rc can't abort the bootstrap. + assert "/tmp/a.sh" in wrapped + assert "/tmp/b.sh" in wrapped + assert "|| true" in wrapped + assert "set +e" in wrapped + + def test_escapes_single_quotes(self): + wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"]) + # The path must survive as the shell receives it; embedded single + # quote is escaped as '\'' rather than breaking the outer quoting. + assert "o'\\''malley" in wrapped + + +@pytest.mark.skipif( + os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"), + reason="Requires bash; CI sandbox may strip it.", +) +class TestSnapshotEndToEnd: + """Spin up a real LocalEnvironment and confirm the snapshot sources + extra init files.""" + + def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch): + init_file = tmp_path / "custom-init.sh" + init_file.write_text( + 'export HERMES_SHELL_INIT_PROBE="probe-ok"\n' + 'export PATH="/opt/shell-init-probe/bin:$PATH"\n' + ) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(init_file)], False), + ): + env = LocalEnvironment(cwd=str(tmp_path), timeout=15) + try: + result = env.execute( + 'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"' + ) + finally: + env.cleanup() + + output = result.get("output", "") + assert "PROBE=probe-ok" in output + assert "/opt/shell-init-probe/bin" in output diff --git a/tests/tools/test_mcp_circuit_breaker.py b/tests/tools/test_mcp_circuit_breaker.py new file mode 100644 index 0000000000..0173fa52af --- /dev/null +++ b/tests/tools/test_mcp_circuit_breaker.py @@ -0,0 +1,252 @@ +"""Tests for MCP tool-handler circuit-breaker recovery. + +The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit +calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD`` +consecutive times, then *transition back to a usable state* once the +server has had time to recover (or an explicit reconnect succeeds). + +The original implementation only had two states — closed and open — with +no mechanism to transition back to closed, so a tripped breaker stayed +tripped for the lifetime of the process. These tests lock in the +half-open / cooldown / reconnect-resets-breaker behavior that fixes +that. +""" +import json +from unittest.mock import MagicMock + +import pytest + + +pytest.importorskip("mcp.client.auth.oauth2") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _install_stub_server(mcp_tool_module, name: str, call_tool_impl): + """Install a fake MCP server in the module's registry. + + ``call_tool_impl`` is an async function stored at ``session.call_tool`` + (it's what the tool handler invokes). + """ + server = MagicMock() + server.name = name + session = MagicMock() + session.call_tool = call_tool_impl + server.session = session + server._reconnect_event = MagicMock() + server._ready = MagicMock() + server._ready.is_set.return_value = True + + mcp_tool_module._servers[name] = server + mcp_tool_module._server_error_counts.pop(name, None) + if hasattr(mcp_tool_module, "_server_breaker_opened_at"): + mcp_tool_module._server_breaker_opened_at.pop(name, None) + return server + + +def _cleanup(mcp_tool_module, name: str) -> None: + mcp_tool_module._servers.pop(name, None) + mcp_tool_module._server_error_counts.pop(name, None) + if hasattr(mcp_tool_module, "_server_breaker_opened_at"): + mcp_tool_module._server_breaker_opened_at.pop(name, None) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path): + """After a tripped breaker's cooldown elapses, the *next* call must + actually execute against the session (half-open probe). When the + probe succeeds, the breaker resets to fully closed. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + call_count = {"n": 0} + + async def _call_tool_success(*a, **kw): + call_count["n"] += 1 + result = MagicMock() + result.isError = False + block = MagicMock() + block.text = "ok" + result.content = [block] + result.structuredContent = None + return result + + _install_stub_server(mcp_tool, "srv", _call_tool_success) + mcp_tool._ensure_mcp_loop() + + try: + # Trip the breaker by setting the count at/above threshold and + # stamping the open-time to "now". + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + fake_now = [1000.0] + + def _fake_monotonic(): + return fake_now[0] + + monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic) + # The breaker-open timestamp dict is introduced by the fix; on + # a pre-fix build it won't exist, which will cause the test to + # fail at the .get() inside the gate (correct — the fix is + # required for this state to be tracked at all). + if hasattr(mcp_tool, "_server_breaker_opened_at"): + mcp_tool._server_breaker_opened_at["srv"] = fake_now[0] + cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0) + + handler = _make_tool_handler("srv", "tool1", 10.0) + + # Before cooldown: must short-circuit (no session call). + result = handler({}) + parsed = json.loads(result) + assert "error" in parsed, parsed + assert "unreachable" in parsed["error"].lower() + assert call_count["n"] == 0, ( + "breaker should short-circuit before cooldown elapses" + ) + + # Advance past cooldown → next call is a half-open probe that + # actually hits the session. + fake_now[0] += cooldown + 1.0 + + result = handler({}) + parsed = json.loads(result) + assert parsed.get("result") == "ok", parsed + assert call_count["n"] == 1, "half-open probe should invoke session" + + # On probe success the breaker must close (count reset to 0). + assert mcp_tool._server_error_counts.get("srv", 0) == 0 + finally: + _cleanup(mcp_tool, "srv") + + +def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path): + """If the half-open probe fails, the breaker must re-arm the + cooldown (not let every subsequent call through). + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + call_count = {"n": 0} + + async def _call_tool_fails(*a, **kw): + call_count["n"] += 1 + raise RuntimeError("still broken") + + _install_stub_server(mcp_tool, "srv", _call_tool_fails) + mcp_tool._ensure_mcp_loop() + + try: + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + fake_now = [1000.0] + + def _fake_monotonic(): + return fake_now[0] + + monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic) + if hasattr(mcp_tool, "_server_breaker_opened_at"): + mcp_tool._server_breaker_opened_at["srv"] = fake_now[0] + cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0) + + handler = _make_tool_handler("srv", "tool1", 10.0) + + # Advance past cooldown, run probe, expect failure. + fake_now[0] += cooldown + 1.0 + result = handler({}) + parsed = json.loads(result) + assert "error" in parsed + assert call_count["n"] == 1, "probe should invoke session once" + + # The probe failure must have re-armed the cooldown — another + # immediate call should short-circuit, not invoke session again. + result = handler({}) + parsed = json.loads(result) + assert "unreachable" in parsed.get("error", "").lower() + assert call_count["n"] == 1, ( + "breaker should re-open and block further calls after probe failure" + ) + finally: + _cleanup(mcp_tool, "srv") + + +def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path): + """When the auth-recovery path successfully reconnects the server, + the breaker should be cleared so subsequent calls aren't gated on a + stale failure count — even if the post-reconnect retry itself fails. + + This locks in the fix-#2 contract: a successful reconnect is + sufficient evidence that the server is viable again. Under the old + implementation, reset only happened on retry *success*, so a + reconnect+retry-failure left the counter pinned above threshold + forever. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests + from mcp.client.auth import OAuthFlowError + + reset_manager_for_tests() + + async def _call_tool_unused(*a, **kw): # pragma: no cover + raise AssertionError("session.call_tool should not be reached in this test") + + _install_stub_server(mcp_tool, "srv", _call_tool_unused) + mcp_tool._ensure_mcp_loop() + + # Open the breaker well above threshold, with a recent open-time so + # it would short-circuit everything without a reset. + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2 + if hasattr(mcp_tool, "_server_breaker_opened_at"): + import time as _time + mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic() + + # Force handle_401 to claim recovery succeeded. + mgr = get_manager() + + async def _h401(name, token=None): + return True + + monkeypatch.setattr(mgr, "handle_401", _h401) + + try: + # Retry fails *after* the successful reconnect. Under the old + # implementation this bumps an already-tripped counter even + # higher. Under fix #2 the reset happens on successful + # reconnect, and the post-retry bump only raises the fresh + # count to 1 — still below threshold. + def _retry_call(): + raise OAuthFlowError("still failing post-reconnect") + + result = mcp_tool._handle_auth_error_and_retry( + "srv", + OAuthFlowError("initial"), + _retry_call, + "tools/call test", + ) + # The call as a whole still surfaces needs_reauth because the + # retry itself didn't succeed, but the breaker state must + # reflect the successful reconnect. + assert result is not None + parsed = json.loads(result) + assert parsed.get("needs_reauth") is True, parsed + + # Post-reconnect count was reset to 0, then the failing retry + # bumped it to exactly 1 — well below threshold. + count = mcp_tool._server_error_counts.get("srv", 0) + assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, ( + f"successful reconnect must reset the breaker below threshold; " + f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}" + ) + finally: + _cleanup(mcp_tool, "srv") diff --git a/tests/tools/test_terminal_output_transform_hook.py b/tests/tools/test_terminal_output_transform_hook.py index bdbdcc0f5d..ccba7f77c1 100644 --- a/tests/tools/test_terminal_output_transform_hook.py +++ b/tests/tools/test_terminal_output_transform_hook.py @@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path): + import yaml + hermes_home = Path(os.environ["HERMES_HOME"]) plugins_dir = hermes_home / "plugins" plugin_dir = plugins_dir / "terminal_transform" @@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp 'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n', encoding="utf-8", ) + # Plugins are opt-in — must be listed in plugins.enabled to load. + cfg_path = hermes_home / "config.yaml" + cfg_path.write_text( + yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}), + encoding="utf-8", + ) + # Force a fresh plugin manager so the new config is picked up. + plugins_mod._plugin_manager = plugins_mod.PluginManager() plugins_mod.discover_plugins() long_output = "X" * 60000 diff --git a/tests/tools/test_tts_kittentts.py b/tests/tools/test_tts_kittentts.py new file mode 100644 index 0000000000..ab841f59f4 --- /dev/null +++ b/tests/tools/test_tts_kittentts.py @@ -0,0 +1,198 @@ +"""Tests for the KittenTTS local provider in tools/tts_tool.py.""" + +import json +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + + +@pytest.fixture(autouse=True) +def clean_env(monkeypatch): + for key in ("HERMES_SESSION_PLATFORM",): + monkeypatch.delenv(key, raising=False) + + +@pytest.fixture(autouse=True) +def clear_kittentts_cache(): + """Reset the module-level model cache between tests.""" + from tools import tts_tool as _tt + _tt._kittentts_model_cache.clear() + yield + _tt._kittentts_model_cache.clear() + + +@pytest.fixture +def mock_kittentts_module(): + """Inject a fake kittentts + soundfile module that return stub objects.""" + fake_model = MagicMock() + # 24kHz float32 PCM at ~2s of silence + fake_model.generate.return_value = np.zeros(48000, dtype=np.float32) + fake_cls = MagicMock(return_value=fake_model) + fake_kittentts = MagicMock() + fake_kittentts.KittenTTS = fake_cls + + # Stub soundfile — the real package isn't installed in CI venv, and + # _generate_kittentts does `import soundfile as sf` at runtime. + fake_sf = MagicMock() + def _fake_write(path, audio, samplerate): + # Emulate writing a real file so downstream path checks succeed. + import pathlib + pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake") + fake_sf.write = _fake_write + + with patch.dict( + "sys.modules", + {"kittentts": fake_kittentts, "soundfile": fake_sf}, + ): + yield fake_model, fake_cls + + +class TestGenerateKittenTts: + def test_successful_wav_generation(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + fake_model, fake_cls = mock_kittentts_module + output_path = str(tmp_path / "test.wav") + result = _generate_kittentts("Hello world", output_path, {}) + + assert result == output_path + assert (tmp_path / "test.wav").exists() + fake_cls.assert_called_once() + fake_model.generate.assert_called_once() + + def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + fake_model, _ = mock_kittentts_module + config = { + "kittentts": { + "model": "KittenML/kitten-tts-mini-0.8", + "voice": "Luna", + "speed": 1.25, + "clean_text": False, + } + } + _generate_kittentts("Hi there", str(tmp_path / "out.wav"), config) + + call_kwargs = fake_model.generate.call_args.kwargs + assert call_kwargs["voice"] == "Luna" + assert call_kwargs["speed"] == 1.25 + assert call_kwargs["clean_text"] is False + + def test_default_model_and_voice(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import ( + DEFAULT_KITTENTTS_MODEL, + DEFAULT_KITTENTTS_VOICE, + _generate_kittentts, + ) + + fake_model, fake_cls = mock_kittentts_module + _generate_kittentts("Hi", str(tmp_path / "out.wav"), {}) + + fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL) + assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE + + def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + _, fake_cls = mock_kittentts_module + _generate_kittentts("One", str(tmp_path / "a.wav"), {}) + _generate_kittentts("Two", str(tmp_path / "b.wav"), {}) + + # Same model name → class instantiated exactly once + assert fake_cls.call_count == 1 + + def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + _, fake_cls = mock_kittentts_module + _generate_kittentts( + "A", str(tmp_path / "a.wav"), + {"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}}, + ) + _generate_kittentts( + "B", str(tmp_path / "b.wav"), + {"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}}, + ) + + assert fake_cls.call_count == 2 + + def test_non_wav_extension_triggers_ffmpeg_conversion( + self, tmp_path, mock_kittentts_module, monkeypatch + ): + """Non-.wav output path causes WAV → target ffmpeg conversion.""" + from tools import tts_tool as _tt + + calls = [] + + def fake_shutil_which(cmd): + return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None + + def fake_run(cmd, check=False, timeout=None, **kw): + calls.append(cmd) + # Emulate ffmpeg writing the output file + import pathlib + out_path = cmd[-1] + pathlib.Path(out_path).write_bytes(b"fake-mp3-data") + return MagicMock(returncode=0) + + monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which) + monkeypatch.setattr(_tt.subprocess, "run", fake_run) + + output_path = str(tmp_path / "test.mp3") + result = _tt._generate_kittentts("Hi", output_path, {}) + + assert result == output_path + assert len(calls) == 1 + assert calls[0][0] == "/usr/bin/ffmpeg" + + def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch): + """When kittentts package is not installed, _import_kittentts raises.""" + import sys + monkeypatch.setitem(sys.modules, "kittentts", None) + from tools.tts_tool import _generate_kittentts + + with pytest.raises((ImportError, TypeError)): + _generate_kittentts("Hi", str(tmp_path / "out.wav"), {}) + + +class TestCheckKittenttsAvailable: + def test_reports_available_when_package_present(self, monkeypatch): + import importlib.util + from tools.tts_tool import _check_kittentts_available + + fake_spec = MagicMock() + monkeypatch.setattr( + importlib.util, "find_spec", + lambda name: fake_spec if name == "kittentts" else None, + ) + assert _check_kittentts_available() is True + + def test_reports_unavailable_when_package_missing(self, monkeypatch): + import importlib.util + from tools.tts_tool import _check_kittentts_available + + monkeypatch.setattr(importlib.util, "find_spec", lambda name: None) + assert _check_kittentts_available() is False + + +class TestDispatcherBranch: + def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path): + """When provider=kittentts but package missing, return JSON error with setup hint.""" + import sys + monkeypatch.setitem(sys.modules, "kittentts", None) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools.tts_tool import text_to_speech_tool + + # Write a config telling it to use kittentts + import yaml + (tmp_path / "config.yaml").write_text( + yaml.safe_dump({"tts": {"provider": "kittentts"}}) + ) + + result = json.loads(text_to_speech_tool(text="Hello")) + assert result["success"] is False + assert "kittentts" in result["error"].lower() + assert "hermes setup tts" in result["error"].lower() diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index da500996a1..e7d8811e02 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -933,6 +933,58 @@ class TestEnableVoiceModeReal: assert cli._voice_mode is True +class TestVoiceBeepConfigReal: + """Tests the CLI voice beep toggle.""" + + @patch("hermes_cli.config.load_config", return_value={"voice": {}}) + def test_beeps_enabled_by_default(self, _cfg): + cli = _make_voice_cli() + assert cli._voice_beeps_enabled() is True + + @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}}) + def test_beeps_can_be_disabled(self, _cfg): + cli = _make_voice_cli() + assert cli._voice_beeps_enabled() is False + + @patch("cli._cprint") + @patch("cli.threading.Thread") + @patch("tools.voice_mode.play_beep") + @patch("tools.voice_mode.create_audio_recorder") + @patch( + "tools.voice_mode.check_voice_requirements", + return_value={ + "available": True, + "audio_available": True, + "stt_available": True, + "details": "OK", + "missing_packages": [], + }, + ) + @patch( + "hermes_cli.config.load_config", + return_value={ + "voice": { + "beep_enabled": False, + "silence_threshold": 200, + "silence_duration": 3.0, + } + }, + ) + def test_start_recording_skips_beep_when_disabled( + self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp + ): + recorder = MagicMock() + recorder.supports_silence_autostop = True + mock_create.return_value = recorder + mock_thread.return_value = MagicMock(start=MagicMock()) + + cli = _make_voice_cli() + cli._voice_start_recording() + + recorder.start.assert_called_once() + mock_beep.assert_not_called() + + class TestDisableVoiceModeReal: """Tests _disable_voice_mode with real CLI instance.""" @@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal: cli._voice_stop_and_transcribe() assert cli._pending_input.empty() + @patch("cli._cprint") + @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}}) + @patch("tools.voice_mode.play_beep") + def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp): + recorder = MagicMock() + recorder.stop.return_value = None + cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder) + cli._voice_stop_and_transcribe() + mock_beep.assert_not_called() + @patch("cli._cprint") @patch("cli.os.unlink") @patch("cli.os.path.isfile", return_value=True) diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py index b4686cb13f..07bf333a60 100644 --- a/tools/env_passthrough.py +++ b/tools/env_passthrough.py @@ -44,16 +44,59 @@ def _get_allowed() -> set[str]: _config_passthrough: frozenset[str] | None = None +def _is_hermes_provider_credential(name: str) -> bool: + """True if ``name`` is a Hermes-managed provider credential (API key, + token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``. + + Skill-declared ``required_environment_variables`` frontmatter must + not be able to override this list — that was the bypass in + GHSA-rhgp-j443-p4rf where a malicious skill registered + ``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received + the credential in the ``execute_code`` child process, defeating the + sandbox's scrubbing guarantee. + + Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT + in the blocklist and remain legitimately registerable — skills that + wrap third-party APIs still work. + """ + try: + from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST + except Exception: + return False + return name in _HERMES_PROVIDER_ENV_BLOCKLIST + + def register_env_passthrough(var_names: Iterable[str]) -> None: """Register environment variable names as allowed in sandboxed environments. Typically called when a skill declares ``required_environment_variables``. + + Variables that are Hermes-managed provider credentials (from + ``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve + the ``execute_code`` sandbox's credential-scrubbing guarantee per + GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed + provider should do so via the agent's main-process tools (web_search, + web_extract, etc.) where the credential remains safely in the main + process. + + Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) + pass through normally — they were never in the sandbox scrub list. """ for name in var_names: name = name.strip() - if name: - _get_allowed().add(name) - logger.debug("env passthrough: registered %s", name) + if not name: + continue + if _is_hermes_provider_credential(name): + logger.warning( + "env passthrough: refusing to register Hermes provider " + "credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). " + "Skills must not override the execute_code sandbox's " + "credential scrubbing; see GHSA-rhgp-j443-p4rf.", + name, + ) + continue + _get_allowed().add(name) + logger.debug("env passthrough: registered %s", name) def _load_config_passthrough() -> frozenset[str]: diff --git a/tools/environments/local.py b/tools/environments/local.py index a1ab676d30..06fd66a2d0 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict: return run_env +def _read_terminal_shell_init_config() -> tuple[list[str], bool]: + """Return (shell_init_files, auto_source_bashrc) from config.yaml. + + Best-effort — returns sensible defaults on any failure so terminal + execution never breaks because the config file is unreadable. + """ + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + terminal_cfg = cfg.get("terminal") or {} + files = terminal_cfg.get("shell_init_files") or [] + if not isinstance(files, list): + files = [] + auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True)) + return [str(f) for f in files if f], auto_bashrc + except Exception: + return [], True + + +def _resolve_shell_init_files() -> list[str]: + """Resolve the list of files to source before the login-shell snapshot. + + Expands ``~`` and ``${VAR}`` references and drops anything that doesn't + exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot. + The ``auto_source_bashrc`` path runs only when the user hasn't supplied + an explicit list — once they have, Hermes trusts them. + """ + explicit, auto_bashrc = _read_terminal_shell_init_config() + + candidates: list[str] = [] + if explicit: + candidates.extend(explicit) + elif auto_bashrc and not _IS_WINDOWS: + # Bash's login-shell invocation does NOT source ~/.bashrc by default, + # so tools like nvm / asdf / pyenv that self-install there stay + # invisible to the snapshot without this nudge. + candidates.append("~/.bashrc") + + resolved: list[str] = [] + for raw in candidates: + try: + path = os.path.expandvars(os.path.expanduser(raw)) + except Exception: + continue + if path and os.path.isfile(path): + resolved.append(path) + return resolved + + +def _prepend_shell_init(cmd_string: str, files: list[str]) -> str: + """Prepend ``source `` lines (guarded + silent) to a bash script. + + Each file is wrapped so a failing rc file doesn't abort the whole + bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides + noisy prompts, and ``|| true`` neutralises the exit status. + """ + if not files: + return cmd_string + + prelude_parts = ["set +e"] + for path in files: + # shlex.quote isn't available here without an import; the files list + # comes from os.path.expanduser output so it's a concrete absolute + # path. Escape single quotes defensively anyway. + safe = path.replace("'", "'\\''") + prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true") + prelude = "\n".join(prelude_parts) + "\n" + return prelude + cmd_string + + class LocalEnvironment(BaseEnvironment): """Run commands directly on the host machine. @@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment): timeout: int = 120, stdin_data: str | None = None) -> subprocess.Popen: bash = _find_bash() + # For login-shell invocations (used by init_session to build the + # environment snapshot), prepend sources for the user's bashrc / + # custom init files so tools registered outside bash_profile + # (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot. + # Non-login invocations are already sourcing the snapshot and + # don't need this. + if login: + init_files = _resolve_shell_init_files() + if init_files: + cmd_string = _prepend_shell_init(cmd_string, init_files) args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string] run_env = _make_run_env(self.env) diff --git a/tools/file_operations.py b/tools/file_operations.py index 8c3897bb2b..87ad139689 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -35,6 +35,13 @@ from pathlib import Path from hermes_constants import get_hermes_home from tools.binary_extensions import BINARY_EXTENSIONS +from agent.file_safety import ( + build_write_denied_paths, + build_write_denied_prefixes, + get_safe_write_root as _shared_get_safe_write_root, + is_write_denied as _shared_is_write_denied, +) + # --------------------------------------------------------------------------- # Write-path deny list — blocks writes to sensitive system/credential files @@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS _HOME = str(Path.home()) -WRITE_DENIED_PATHS = { - os.path.realpath(p) for p in [ - os.path.join(_HOME, ".ssh", "authorized_keys"), - os.path.join(_HOME, ".ssh", "id_rsa"), - os.path.join(_HOME, ".ssh", "id_ed25519"), - os.path.join(_HOME, ".ssh", "config"), - str(get_hermes_home() / ".env"), - os.path.join(_HOME, ".bashrc"), - os.path.join(_HOME, ".zshrc"), - os.path.join(_HOME, ".profile"), - os.path.join(_HOME, ".bash_profile"), - os.path.join(_HOME, ".zprofile"), - os.path.join(_HOME, ".netrc"), - os.path.join(_HOME, ".pgpass"), - os.path.join(_HOME, ".npmrc"), - os.path.join(_HOME, ".pypirc"), - "/etc/sudoers", - "/etc/passwd", - "/etc/shadow", - ] -} +WRITE_DENIED_PATHS = build_write_denied_paths(_HOME) -WRITE_DENIED_PREFIXES = [ - os.path.realpath(p) + os.sep for p in [ - os.path.join(_HOME, ".ssh"), - os.path.join(_HOME, ".aws"), - os.path.join(_HOME, ".gnupg"), - os.path.join(_HOME, ".kube"), - "/etc/sudoers.d", - "/etc/systemd", - os.path.join(_HOME, ".docker"), - os.path.join(_HOME, ".azure"), - os.path.join(_HOME, ".config", "gh"), - ] -] +WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME) def _get_safe_write_root() -> Optional[str]: @@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]: not on the static deny list. Opt-in hardening for gateway/messaging deployments that should only touch a workspace checkout. """ - root = os.getenv("HERMES_WRITE_SAFE_ROOT", "") - if not root: - return None - try: - return os.path.realpath(os.path.expanduser(root)) - except Exception: - return None + return _shared_get_safe_write_root() def _is_write_denied(path: str) -> bool: """Return True if path is on the write deny list.""" - resolved = os.path.realpath(os.path.expanduser(str(path))) - - # 1) Static deny list - if resolved in WRITE_DENIED_PATHS: - return True - for prefix in WRITE_DENIED_PREFIXES: - if resolved.startswith(prefix): - return True - - # 2) Optional safe-root sandbox - safe_root = _get_safe_write_root() - if safe_root: - if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)): - return True - - return False + return _shared_is_write_denied(path) # ============================================================================= @@ -784,12 +738,14 @@ class ShellFileOperations(FileOperations): content, old_string, new_string, replace_all ) - if error: - return PatchResult(error=error) - - if match_count == 0: - return PatchResult(error=f"Could not find match for old_string in {path}") - + if error or match_count == 0: + err_msg = error or f"Could not find match for old_string in {path}" + try: + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(err_msg, match_count, old_string, content) + except Exception: + pass + return PatchResult(error=err_msg) # Write back write_result = self.write_file(path, new_content) if write_result.error: diff --git a/tools/file_tools.py b/tools/file_tools.py index 3b2044c9da..5b44ff03d3 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -7,6 +7,9 @@ import logging import os import threading from pathlib import Path +from typing import Optional + +from agent.file_safety import get_read_block_error from tools.binary_extensions import has_binary_extension from tools.file_operations import ShellFileOperations from agent.redact import redact_sensitive_text @@ -373,24 +376,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Hermes internal path guard ──────────────────────────────── # Prevent prompt injection via catalog or hub metadata files. - from hermes_constants import get_hermes_home as _get_hh - _hermes_home = _get_hh().resolve() - _blocked_dirs = [ - _hermes_home / "skills" / ".hub" / "index-cache", - _hermes_home / "skills" / ".hub", - ] - for _blocked in _blocked_dirs: - try: - _resolved.relative_to(_blocked) - return json.dumps({ - "error": ( - f"Access denied: {path} is an internal Hermes cache file " - "and cannot be read directly to prevent prompt injection. " - "Use the skills_list or skill_view tools instead." - ) - }) - except ValueError: - pass + block_error = get_read_block_error(path) + if block_error: + return json.dumps({"error": block_error}) # ── Dedup check ─────────────────────────────────────────────── # If we already read this exact (path, offset, limit) and the @@ -682,8 +670,11 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, result_json = json.dumps(result_dict, ensure_ascii=False) # Hint when old_string not found — saves iterations where the agent # retries with stale content instead of re-reading the file. + # Suppressed when patch_replace already attached a rich "Did you mean?" + # snippet (which is strictly more useful than the generic hint). if result_dict.get("error") and "Could not find" in str(result_dict["error"]): - result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" + if "Did you mean one of these sections?" not in str(result_dict["error"]): + result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" return result_json except Exception as e: return tool_error(str(e)) diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index a9dc4272ef..9a922cd9b3 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -619,3 +619,86 @@ def _map_normalized_positions(original: str, normalized: str, original_matches.append((orig_start, min(orig_end, len(original)))) return original_matches + + +def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str: + """Find lines in content most similar to old_string for "did you mean?" feedback. + + Returns a formatted string showing the closest matching lines with context, + or empty string if no useful match is found. + """ + if not old_string or not content: + return "" + + old_lines = old_string.splitlines() + content_lines = content.splitlines() + + if not old_lines or not content_lines: + return "" + + # Use first line of old_string as anchor for search + anchor = old_lines[0].strip() + if not anchor: + # Try second line if first is blank + candidates = [l.strip() for l in old_lines if l.strip()] + if not candidates: + return "" + anchor = candidates[0] + + # Score each line in content by similarity to anchor + scored = [] + for i, line in enumerate(content_lines): + stripped = line.strip() + if not stripped: + continue + ratio = SequenceMatcher(None, anchor, stripped).ratio() + if ratio > 0.3: + scored.append((ratio, i)) + + if not scored: + return "" + + # Take top matches + scored.sort(key=lambda x: -x[0]) + top = scored[:max_results] + + parts = [] + seen_ranges = set() + for _, line_idx in top: + start = max(0, line_idx - context_lines) + end = min(len(content_lines), line_idx + len(old_lines) + context_lines) + key = (start, end) + if key in seen_ranges: + continue + seen_ranges.add(key) + snippet = "\n".join( + f"{start + j + 1:4d}| {content_lines[start + j]}" + for j in range(end - start) + ) + parts.append(snippet) + + if not parts: + return "" + + return "\n---\n".join(parts) + + +def format_no_match_hint(error: Optional[str], match_count: int, + old_string: str, content: str) -> str: + """Return a '\\n\\nDid you mean...' snippet for plain no-match errors. + + Gated so the hint only fires for actual "old_string not found" failures. + Ambiguous-match ("Found N matches"), escape-drift, and identical-strings + errors all have ``match_count == 0`` but a "did you mean?" snippet would + be misleading — those failed for unrelated reasons. + + Returns an empty string when there's nothing useful to append. + """ + if match_count != 0: + return "" + if not error or not error.startswith("Could not find"): + return "" + hint = find_closest_lines(old_string, content) + if not hint: + return "" + return "\n\nDid you mean one of these sections?\n" + hint diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index 858dfa2156..2865a10c0f 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -34,7 +34,11 @@ import httpx from tools.debug_helpers import DebugSession from tools.managed_tool_gateway import resolve_managed_tool_gateway -from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway +from tools.tool_backend_helpers import ( + fal_key_is_configured, + managed_nous_tools_enabled, + prefers_gateway, +) logger = logging.getLogger(__name__) @@ -287,7 +291,7 @@ _managed_fal_client_lock = threading.Lock() def _resolve_managed_fal_gateway(): """Return managed fal-queue gateway config when the user prefers the gateway or direct FAL credentials are absent.""" - if os.getenv("FAL_KEY") and not prefers_gateway("image_gen"): + if fal_key_is_configured() and not prefers_gateway("image_gen"): return None return resolve_managed_tool_gateway("fal-queue") @@ -630,7 +634,7 @@ def image_generate_tool( if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0: raise ValueError("Prompt is required and must be a non-empty string") - if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()): + if not (fal_key_is_configured() or _resolve_managed_fal_gateway()): message = "FAL_KEY environment variable not set" if managed_nous_tools_enabled(): message += " and managed FAL gateway is unavailable" @@ -741,7 +745,7 @@ def image_generate_tool( def check_fal_api_key() -> bool: """True if the FAL.ai API key (direct or managed gateway) is available.""" - return bool(os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()) + return bool(fal_key_is_configured() or _resolve_managed_fal_gateway()) def check_image_generation_requirements() -> bool: diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index cdd1035594..a7c8313ad2 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1249,9 +1249,47 @@ _servers: Dict[str, MCPServerTask] = {} # _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns # a "server unreachable" message that tells the model to stop retrying, # preventing the 90-iteration burn loop described in #10447. -# Reset to 0 on any successful call. +# +# State machine: +# closed — error count below threshold; all calls go through. +# open — threshold reached; calls short-circuit until the +# cooldown elapses. +# half-open — cooldown elapsed; the next call is a probe that +# actually hits the session. Probe success → closed. +# Probe failure → reopens (cooldown re-armed). +# +# ``_server_breaker_opened_at`` records the monotonic timestamp when +# the breaker most recently transitioned into the open state. Use the +# ``_bump_server_error`` / ``_reset_server_error`` helpers to mutate +# this state — they keep the count and timestamp in sync. _server_error_counts: Dict[str, int] = {} +_server_breaker_opened_at: Dict[str, float] = {} _CIRCUIT_BREAKER_THRESHOLD = 3 +_CIRCUIT_BREAKER_COOLDOWN_SEC = 60.0 + + +def _bump_server_error(server_name: str) -> None: + """Increment the consecutive-failure count for ``server_name``. + + When the count crosses :data:`_CIRCUIT_BREAKER_THRESHOLD`, stamp the + breaker-open timestamp so the cooldown clock starts (or re-starts, + for probe failures in the half-open state). + """ + n = _server_error_counts.get(server_name, 0) + 1 + _server_error_counts[server_name] = n + if n >= _CIRCUIT_BREAKER_THRESHOLD: + _server_breaker_opened_at[server_name] = time.monotonic() + + +def _reset_server_error(server_name: str) -> None: + """Fully close the breaker for ``server_name``. + + Clears both the failure count and the breaker-open timestamp. Call + this on any unambiguous success signal (successful tool call, + successful reconnect, manual /mcp refresh). + """ + _server_error_counts[server_name] = 0 + _server_breaker_opened_at.pop(server_name, None) # --------------------------------------------------------------------------- # Auth-failure detection helpers (Task 6 of MCP OAuth consolidation) @@ -1391,15 +1429,25 @@ def _handle_auth_error_and_retry( break time.sleep(0.25) + # A successful OAuth recovery is independent evidence that the + # server is viable again, so close the circuit breaker here — + # not only on retry success. Without this, a reconnect + # followed by a failing retry would leave the breaker pinned + # above threshold forever (the retry-exception branch below + # bumps the count again). The post-reset retry still goes + # through _bump_server_error on failure, so a genuinely broken + # server will re-trip the breaker as normal. + _reset_server_error(server_name) + try: result = retry_call() try: parsed = json.loads(result) if "error" not in parsed: - _server_error_counts[server_name] = 0 + _reset_server_error(server_name) return result except (json.JSONDecodeError, TypeError): - _server_error_counts[server_name] = 0 + _reset_server_error(server_name) return result except Exception as retry_exc: logger.warning( @@ -1410,7 +1458,7 @@ def _handle_auth_error_and_retry( # No recovery available, or retry also failed: surface a structured # needs_reauth error. Bumps the circuit breaker so the model stops # retrying the tool. - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) return json.dumps({ "error": ( f"MCP server '{server_name}' requires re-authentication. " @@ -1612,20 +1660,33 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): # Circuit breaker: if this server has failed too many times # consecutively, short-circuit with a clear message so the model # stops retrying and uses alternative approaches (#10447). + # + # Once the cooldown elapses, the breaker transitions to + # half-open: we let the *next* call through as a probe. On + # success the success-path below resets the breaker; on + # failure the error paths below bump the count again, which + # re-stamps the open-time via _bump_server_error (re-arming + # the cooldown). if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD: - return json.dumps({ - "error": ( - f"MCP server '{server_name}' is unreachable after " - f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. " - f"Do NOT retry this tool — use alternative approaches " - f"or ask the user to check the MCP server." - ) - }, ensure_ascii=False) + opened_at = _server_breaker_opened_at.get(server_name, 0.0) + age = time.monotonic() - opened_at + if age < _CIRCUIT_BREAKER_COOLDOWN_SEC: + remaining = max(1, int(_CIRCUIT_BREAKER_COOLDOWN_SEC - age)) + return json.dumps({ + "error": ( + f"MCP server '{server_name}' is unreachable after " + f"{_server_error_counts[server_name]} consecutive " + f"failures. Auto-retry available in ~{remaining}s. " + f"Do NOT retry this tool yet — use alternative " + f"approaches or ask the user to check the MCP server." + ) + }, ensure_ascii=False) + # Cooldown elapsed → fall through as a half-open probe. with _lock: server = _servers.get(server_name) if not server or not server.session: - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) return json.dumps({ "error": f"MCP server '{server_name}' is not connected" }, ensure_ascii=False) @@ -1674,11 +1735,11 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): try: parsed = json.loads(result) if "error" in parsed: - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) else: - _server_error_counts[server_name] = 0 # success — reset + _reset_server_error(server_name) # success — reset except (json.JSONDecodeError, TypeError): - _server_error_counts[server_name] = 0 # non-JSON = success + _reset_server_error(server_name) # non-JSON = success return result except InterruptedError: return _interrupted_call_result() @@ -1693,7 +1754,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): if recovered is not None: return recovered - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) logger.error( "MCP tool %s/%s call failed: %s", server_name, tool_name, exc, diff --git a/tools/patch_parser.py b/tools/patch_parser.py index dd52ff92c3..dc6034f182 100644 --- a/tools/patch_parser.py +++ b/tools/patch_parser.py @@ -293,10 +293,16 @@ def _validate_operations( ) if count == 0: label = f"'{hunk.context_hint}'" if hunk.context_hint else "(no hint)" - errors.append( + msg = ( f"{op.file_path}: hunk {label} not found" + (f" — {match_error}" if match_error else "") ) + try: + from tools.fuzzy_match import format_no_match_hint + msg += format_no_match_hint(match_error, count, search_pattern, simulated) + except Exception: + pass + errors.append(msg) else: # Advance simulation so subsequent hunks validate correctly. # Reuse the result from the call above — no second fuzzy run. @@ -540,7 +546,13 @@ def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]: error = None if error: - return False, f"Could not apply hunk: {error}" + err_msg = f"Could not apply hunk: {error}" + try: + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(error, 0, search_pattern, new_content) + except Exception: + pass + return False, err_msg else: # Addition-only hunk (no context or removed lines). # Insert at the location indicated by the context hint, or at end of file. diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 33d3976ea8..493b434c51 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -449,9 +449,15 @@ def _patch_skill( if match_error: # Show a short preview of the file so the model can self-correct preview = content[:500] + ("..." if len(content) > 500 else "") + err_msg = match_error + try: + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(match_error, match_count, old_string, content) + except Exception: + pass return { "success": False, - "error": match_error, + "error": err_msg, "file_preview": preview, } diff --git a/tools/skills_tool.py b/tools/skills_tool.py index dcd1f8c5d1..6ff54230d5 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -507,13 +507,33 @@ def _get_disabled_skill_names() -> Set[str]: return get_disabled_skill_names() +def _get_session_platform() -> str: + """Resolve the current platform from gateway session context. + + Mirrors the platform-resolution logic in + ``agent.skill_utils.get_disabled_skill_names`` so that + ``_is_skill_disabled`` respects ``HERMES_SESSION_PLATFORM``. + """ + try: + from gateway.session_context import get_session_env + return get_session_env("HERMES_SESSION_PLATFORM") or "" + except Exception: + return "" + + def _is_skill_disabled(name: str, platform: str = None) -> bool: - """Check if a skill is disabled in config.""" + """Check if a skill is disabled in config. + + Resolves the active platform from (in order of precedence): + 1. Explicit ``platform`` argument + 2. ``HERMES_PLATFORM`` environment variable + 3. ``HERMES_SESSION_PLATFORM`` from gateway session context + """ try: from hermes_cli.config import load_config config = load_config() skills_cfg = config.get("skills", {}) - resolved_platform = platform or os.getenv("HERMES_PLATFORM") + resolved_platform = platform or os.getenv("HERMES_PLATFORM") or _get_session_platform() if resolved_platform: platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform) if platform_disabled is not None: diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 7a7dc9c1a6..4a2a5fc0be 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -114,22 +114,44 @@ _cached_sudo_password: str = "" # Optional UI callbacks for interactive prompts. When set, these are called # instead of the default /dev/tty or input() readers. The CLI registers these # so prompts route through prompt_toolkit's event loop. -# _sudo_password_callback() -> str (return password or "" to skip) -# _approval_callback(command, description) -> str ("once"/"session"/"always"/"deny") -_sudo_password_callback = None -_approval_callback = None +# Callback slots used by the approval prompt and sudo password prompt +# routines. Stored in thread-local state so overlapping ACP sessions — +# each running in its own ThreadPoolExecutor thread — don't stomp on +# each other's callbacks. See GHSA-qg5c-hvr5-hjgr. +# +# CLI mode is single-threaded, so each thread (the only one) holds its +# own callback exactly like before. Gateway mode resolves approvals via +# the per-session queue in tools.approval, not through these callbacks, +# so it's unaffected. +import threading +_callback_tls = threading.local() + + +def _get_sudo_password_callback(): + return getattr(_callback_tls, "sudo_password", None) + + +def _get_approval_callback(): + return getattr(_callback_tls, "approval", None) def set_sudo_password_callback(cb): - """Register a callback for sudo password prompts (used by CLI).""" - global _sudo_password_callback - _sudo_password_callback = cb + """Register a callback for sudo password prompts (used by CLI). + + Per-thread scope — ACP sessions that run concurrently in a + ThreadPoolExecutor each have their own callback slot. + """ + _callback_tls.sudo_password = cb def set_approval_callback(cb): - """Register a callback for dangerous command approval prompts (used by CLI).""" - global _approval_callback - _approval_callback = cb + """Register a callback for dangerous command approval prompts. + + Per-thread scope — ACP sessions that run concurrently in a + ThreadPoolExecutor each have their own callback slot. See + GHSA-qg5c-hvr5-hjgr. + """ + _callback_tls.approval = cb # ============================================================================= # Dangerous Command Approval System @@ -144,7 +166,7 @@ from tools.approval import ( def _check_all_guards(command: str, env_type: str) -> dict: """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback.""" return _check_all_guards_impl(command, env_type, - approval_callback=_approval_callback) + approval_callback=_get_approval_callback()) # Allowlist: characters that can legitimately appear in directory paths. @@ -219,9 +241,10 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str: import sys # Use the registered callback when available (prompt_toolkit-compatible) - if _sudo_password_callback is not None: + _sudo_cb = _get_sudo_password_callback() + if _sudo_cb is not None: try: - return _sudo_password_callback() or "" + return _sudo_cb() or "" except Exception: return "" diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py index a770fe7470..810a51c63d 100644 --- a/tools/tool_backend_helpers.py +++ b/tools/tool_backend_helpers.py @@ -119,3 +119,24 @@ def prefers_gateway(config_section: str) -> bool: except Exception: pass return False + + +def fal_key_is_configured() -> bool: + """Return True when FAL_KEY is set to a non-whitespace value. + + Consults both ``os.environ`` and ``~/.hermes/.env`` (via + ``hermes_cli.config.get_env_value`` when available) so tool-side + checks and CLI setup-time checks agree. A whitespace-only value + is treated as unset everywhere. + """ + value = os.getenv("FAL_KEY") + if value is None: + # Fall back to the .env file for CLI paths that may run before + # dotenv is loaded into os.environ. + try: + from hermes_cli.config import get_env_value + + value = get_env_value("FAL_KEY") + except Exception: + value = None + return bool(value and value.strip()) diff --git a/tools/tts_tool.py b/tools/tts_tool.py index adc6524c46..b83fa4d73e 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -79,6 +79,12 @@ def _import_sounddevice(): return sd +def _import_kittentts(): + """Lazy import KittenTTS. Returns the class or raises ImportError.""" + from kittentts import KittenTTS + return KittenTTS + + # =========================================================================== # Defaults # =========================================================================== @@ -88,6 +94,8 @@ DEFAULT_ELEVENLABS_VOICE_ID = "pNInz6obpgDQGcFmaJgB" # Adam DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2" DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5" DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts" +DEFAULT_KITTENTTS_MODEL = "KittenML/kitten-tts-nano-0.8-int8" # 25MB +DEFAULT_KITTENTTS_VOICE = "Jasper" DEFAULT_OPENAI_VOICE = "alloy" DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1" DEFAULT_MINIMAX_MODEL = "speech-2.8-hd" @@ -695,6 +703,15 @@ def _check_neutts_available() -> bool: return False +def _check_kittentts_available() -> bool: + """Check if the kittentts engine is importable (installed locally).""" + try: + import importlib.util + return importlib.util.find_spec("kittentts") is not None + except Exception: + return False + + def _default_neutts_ref_audio() -> str: """Return path to the bundled default voice reference audio.""" return str(Path(__file__).parent / "neutts_samples" / "jo.wav") @@ -758,6 +775,69 @@ def _generate_neutts(text: str, output_path: str, tts_config: Dict[str, Any]) -> return output_path +# =========================================================================== +# Provider: KittenTTS (local, lightweight) +# =========================================================================== + +# Module-level cache for KittenTTS model instance +_kittentts_model_cache: Dict[str, Any] = {} + + +def _generate_kittentts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str: + """Generate speech using KittenTTS local ONNX model. + + KittenTTS is a lightweight TTS engine (25-80MB models) that runs + entirely on CPU without requiring a GPU or API key. + + Args: + text: Text to convert to speech. + output_path: Where to save the audio file. + tts_config: TTS config dict. + + Returns: + Path to the saved audio file. + """ + KittenTTS = _import_kittentts() + kt_config = tts_config.get("kittentts", {}) + model_name = kt_config.get("model", DEFAULT_KITTENTTS_MODEL) + voice = kt_config.get("voice", DEFAULT_KITTENTTS_VOICE) + speed = kt_config.get("speed", 1.0) + clean_text = kt_config.get("clean_text", True) + + # Use cached model instance if available + global _kittentts_model_cache + if model_name not in _kittentts_model_cache: + logger.info("[KittenTTS] Loading model: %s", model_name) + _kittentts_model_cache[model_name] = KittenTTS(model_name) + logger.info("[KittenTTS] Model loaded successfully") + + model = _kittentts_model_cache[model_name] + + # Generate audio (returns numpy array at 24kHz) + audio = model.generate(text, voice=voice, speed=speed, clean_text=clean_text) + + # Save as WAV + import soundfile as sf + wav_path = output_path + if not output_path.endswith(".wav"): + wav_path = output_path.rsplit(".", 1)[0] + ".wav" + + sf.write(wav_path, audio, 24000) + + # Convert to desired format if needed + if wav_path != output_path: + ffmpeg = shutil.which("ffmpeg") + if ffmpeg: + conv_cmd = [ffmpeg, "-i", wav_path, "-y", "-loglevel", "error", output_path] + subprocess.run(conv_cmd, check=True, timeout=30) + os.remove(wav_path) + else: + # No ffmpeg — rename the WAV to the expected path + os.rename(wav_path, output_path) + + return output_path + + # =========================================================================== # Main tool function # =========================================================================== @@ -877,6 +957,19 @@ def text_to_speech_tool( logger.info("Generating speech with NeuTTS (local)...") _generate_neutts(text, file_str, tts_config) + elif provider == "kittentts": + try: + _import_kittentts() + except ImportError: + return json.dumps({ + "success": False, + "error": "KittenTTS provider selected but 'kittentts' package not installed. " + "Run 'hermes setup tts' and choose KittenTTS, or install manually: " + "pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl" + }, ensure_ascii=False) + logger.info("Generating speech with KittenTTS (local, ~25MB)...") + _generate_kittentts(text, file_str, tts_config) + else: # Default: Edge TTS (free), with NeuTTS as local fallback edge_available = True @@ -914,9 +1007,9 @@ def text_to_speech_tool( }, ensure_ascii=False) # Try Opus conversion for Telegram compatibility - # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion + # Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV — all need ffmpeg conversion voice_compatible = False - if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"): + if provider in ("edge", "neutts", "minimax", "xai", "kittentts") and not file_str.endswith(".ogg"): opus_path = _convert_to_opus(file_str) if opus_path: file_str = opus_path @@ -1001,6 +1094,8 @@ def check_tts_requirements() -> bool: pass if _check_neutts_available(): return True + if _check_kittentts_available(): + return True return False diff --git a/utils.py b/utils.py index 6b998e2230..f3d38006d1 100644 --- a/utils.py +++ b/utils.py @@ -197,6 +197,39 @@ def env_bool(key: str, default: bool = False) -> bool: return is_truthy_value(os.getenv(key, ""), default=default) +# ─── Proxy Helpers ──────────────────────────────────────────────────────────── + + +_PROXY_ENV_KEYS = ( + "HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", +) + + +def normalize_proxy_url(proxy_url: str | None) -> str | None: + """Normalize proxy URLs for httpx/aiohttp compatibility. + + WSL/Clash-style environments often export SOCKS proxies as + ``socks://127.0.0.1:PORT``. httpx rejects that alias and expects the + explicit ``socks5://`` scheme instead. + """ + candidate = str(proxy_url or "").strip() + if not candidate: + return None + if candidate.lower().startswith("socks://"): + return f"socks5://{candidate[len('socks://'):]}" + return candidate + + +def normalize_proxy_env_vars() -> None: + """Rewrite supported proxy env vars to canonical URL forms in-place.""" + for key in _PROXY_ENV_KEYS: + value = os.getenv(key, "") + normalized = normalize_proxy_url(value) + if normalized and normalized != value: + os.environ[key] = normalized + + # ─── URL Parsing Helpers ────────────────────────────────────────────────────── @@ -236,4 +269,3 @@ def base_url_host_matches(base_url: str, domain: str) -> bool: if not domain: return False return hostname == domain or hostname.endswith("." + domain) - diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md index 9fdb7fd115..43f088a9a3 100644 --- a/website/docs/developer-guide/creating-skills.md +++ b/website/docs/developer-guide/creating-skills.md @@ -272,6 +272,45 @@ Put the most common workflow first. Edge cases and advanced usage go at the bott For XML/JSON parsing or complex logic, include helper scripts in `scripts/` — don't expect the LLM to write parsers inline every time. +#### Referencing bundled scripts from SKILL.md + +When a skill is loaded, the activation message exposes the absolute skill directory as `[Skill directory: /abs/path]` and also substitutes two template tokens anywhere in the SKILL.md body: + +| Token | Replaced with | +|---|---| +| `${HERMES_SKILL_DIR}` | Absolute path to the skill's directory | +| `${HERMES_SESSION_ID}` | The active session id (left in place if there is no session) | + +So a SKILL.md can tell the agent to run a bundled script directly with: + +```markdown +To analyse the input, run: + + node ${HERMES_SKILL_DIR}/scripts/analyse.js +``` + +The agent sees the substituted absolute path and invokes the `terminal` tool with a ready-to-run command — no path math, no extra `skill_view` round-trip. Disable substitution globally with `skills.template_vars: false` in `config.yaml`. + +#### Inline shell snippets (opt-in) + +Skills can also embed inline shell snippets written as `` !`cmd` `` in the SKILL.md body. When enabled, each snippet's stdout is inlined into the message before the agent reads it, so skills can inject dynamic context: + +```markdown +Current date: !`date -u +%Y-%m-%d` +Git branch: !`git -C ${HERMES_SKILL_DIR} rev-parse --abbrev-ref HEAD` +``` + +This is **off by default** — any snippet in a SKILL.md runs on the host without approval, so only enable it for skill sources you trust: + +```yaml +# config.yaml +skills: + inline_shell: true + inline_shell_timeout: 10 # seconds per snippet +``` + +Snippets run with the skill directory as their working directory, and output is capped at 4000 characters. Failures (timeouts, non-zero exits) show up as a short `[inline-shell error: ...]` marker instead of breaking the whole skill. + ### Test It Run the skill and verify the agent follows the instructions correctly: diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md index 42b3355595..d43c0a0182 100644 --- a/website/docs/guides/use-voice-mode-with-hermes.md +++ b/website/docs/guides/use-voice-mode-with-hermes.md @@ -164,6 +164,7 @@ voice: record_key: "ctrl+b" max_recording_seconds: 120 auto_tts: false + beep_enabled: true silence_threshold: 200 silence_duration: 3.0 diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md index 132a4d00a9..8a8b9df414 100644 --- a/website/docs/reference/faq.md +++ b/website/docs/reference/faq.md @@ -160,6 +160,33 @@ brew install python@3.12 # macOS The installer handles this automatically — if you see this error during manual installation, upgrade Python first. +#### Terminal commands say `node: command not found` (or `nvm`, `pyenv`, `asdf`, …) + +**Cause:** Hermes builds a per-session environment snapshot by running `bash -l` once at startup. A bash login shell reads `/etc/profile`, `~/.bash_profile`, and `~/.profile`, but **does not source `~/.bashrc`** — so tools that install themselves there (`nvm`, `asdf`, `pyenv`, `cargo`, custom `PATH` exports) stay invisible to the snapshot. This most commonly happens when Hermes runs under systemd or in a minimal shell where nothing has pre-loaded the interactive shell profile. + +**Solution:** Hermes auto-sources `~/.bashrc` by default. If that's not enough — e.g. you're a zsh user whose PATH lives in `~/.zshrc`, or you init `nvm` from a standalone file — list the extra files to source in `~/.hermes/config.yaml`: + +```yaml +terminal: + shell_init_files: + - ~/.zshrc # zsh users: pulls zsh-managed PATH into the bash snapshot + - ~/.nvm/nvm.sh # direct nvm init (works regardless of shell) + - /etc/profile.d/cargo.sh # system-wide rc files + # When this list is set, the default ~/.bashrc auto-source is NOT added — + # include it explicitly if you want both: + # - ~/.bashrc + # - ~/.zshrc +``` + +Missing files are skipped silently. Sourcing happens in bash, so files that rely on zsh-only syntax may error — if that's a concern, source just the PATH-setting portion (e.g. nvm's `nvm.sh` directly) rather than the whole rc file. + +To disable the auto-source behaviour (strict login-shell semantics only): + +```yaml +terminal: + auto_source_bashrc: false +``` + #### `uv: command not found` **Cause:** The `uv` package manager isn't installed or not in PATH. diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 9cb1f386b8..ab48e036dd 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -58,6 +58,12 @@ hermes skills uninstall | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. | | **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. | +## Dogfood + +| Skill | Description | +|-------|-------------| +| **adversarial-ux-test** | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | + ## DevOps | Skill | Description | diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 46c29929f9..301d7ee545 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -69,6 +69,7 @@ Internal dogfooding and QA skills used to test Hermes Agent itself. | Skill | Description | Path | |-------|-------------|------| | `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` | +| `adversarial-ux-test` | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | `dogfood/adversarial-ux-test` | ## email diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 4eb0c56d95..c6afd83322 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -1049,6 +1049,7 @@ voice: record_key: "ctrl+b" # Push-to-talk key inside the CLI max_recording_seconds: 120 # Hard stop for long recordings auto_tts: false # Enable spoken replies automatically when /voice on + beep_enabled: true # Play record start/stop beeps in CLI voice mode silence_threshold: 200 # RMS threshold for speech detection silence_duration: 3.0 # Seconds of silence before auto-stop ``` diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md index 6f7fc89506..2bf6430ff7 100644 --- a/website/docs/user-guide/features/tts.md +++ b/website/docs/user-guide/features/tts.md @@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription, ## Text-to-Speech -Convert text to speech with eight providers: +Convert text to speech with nine providers: | Provider | Quality | Cost | API Key | |----------|---------|------|---------| @@ -25,7 +25,8 @@ Convert text to speech with eight providers: | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` | | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` | | **xAI TTS** | Excellent | Paid | `XAI_API_KEY` | -| **NeuTTS** | Good | Free | None needed | +| **NeuTTS** | Good | Free (local) | None needed | +| **KittenTTS** | Good | Free (local) | None needed | ### Platform Delivery @@ -41,7 +42,7 @@ Convert text to speech with eight providers: ```yaml # In ~/.hermes/config.yaml tts: - provider: "edge" # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" + provider: "edge" # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" | "kittentts" speed: 1.0 # Global speed multiplier (provider-specific settings override this) edge: voice: "en-US-AriaNeural" # 322 voices, 74 languages @@ -77,6 +78,11 @@ tts: ref_text: '' model: neuphonic/neutts-air-q4-gguf device: cpu + kittentts: + model: KittenML/kitten-tts-nano-0.8-int8 # 25MB int8; also: kitten-tts-micro-0.8 (41MB), kitten-tts-mini-0.8 (80MB) + voice: Jasper # Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo + speed: 1.0 # 0.5 - 2.0 + clean_text: true # Expand numbers, currencies, units ``` **Speed control**: The global `tts.speed` value applies to all providers by default. Each provider can override it with its own `speed` setting (e.g., `tts.openai.speed: 1.5`). Provider-specific speed takes precedence over the global value. Default is `1.0` (normal speed). @@ -91,6 +97,7 @@ Telegram voice bubbles require Opus/OGG audio format: - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles - **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles +- **KittenTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles ```bash # Ubuntu/Debian @@ -103,7 +110,7 @@ brew install ffmpeg sudo dnf install ffmpeg ``` -Without ffmpeg, Edge TTS, MiniMax TTS, and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble). +Without ffmpeg, Edge TTS, MiniMax TTS, NeuTTS, and KittenTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble). :::tip If you want voice bubbles without installing ffmpeg, switch to the OpenAI, ElevenLabs, or Mistral provider. diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md index 2befd59e0f..b82718cf04 100644 --- a/website/docs/user-guide/features/voice-mode.md +++ b/website/docs/user-guide/features/voice-mode.md @@ -149,7 +149,7 @@ Two-stage algorithm detects when you've finished speaking: If no speech is detected at all for 15 seconds, recording stops automatically. -Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. +Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. You can also disable the record start/stop beeps with `voice.beep_enabled: false`. ### Streaming TTS @@ -383,6 +383,7 @@ voice: record_key: "ctrl+b" # Key to start/stop recording max_recording_seconds: 120 # Maximum recording length auto_tts: false # Auto-enable TTS when voice mode starts + beep_enabled: true # Play record start/stop beeps silence_threshold: 200 # RMS level (0-32767) below which counts as silence silence_duration: 3.0 # Seconds of silence before auto-stop