diff --git a/.github/workflows/deploy-site.yml b/.github/workflows/deploy-site.yml index 3e78bc61b..67f557bad 100644 --- a/.github/workflows/deploy-site.yml +++ b/.github/workflows/deploy-site.yml @@ -53,6 +53,9 @@ jobs: - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py + - name: Regenerate per-skill docs pages + catalogs + run: python3 website/scripts/generate-skill-docs.py + - name: Build skills index (if not already present) env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docs-site-checks.yml b/.github/workflows/docs-site-checks.yml index 2f985122c..80fe9ea9d 100644 --- a/.github/workflows/docs-site-checks.yml +++ b/.github/workflows/docs-site-checks.yml @@ -36,6 +36,9 @@ jobs: - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py + - name: Regenerate per-skill docs pages + catalogs + run: python3 website/scripts/generate-skill-docs.py + - name: Lint docs diagrams run: npm run lint:diagrams working-directory: website diff --git a/AGENTS.md b/AGENTS.md index ae78e005a..05a6742d4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -240,6 +240,19 @@ npm run fmt # prettier npm test # vitest ``` +### TUI in the Dashboard (`hermes dashboard` → `/chat`) + +The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`. + +- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths. +- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade). +- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not). +- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:;]` intercepted on the server and applied with `TIOCSWINSZ`. + +**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead. + +**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired. + --- ## Adding New Tools diff --git a/Dockerfile b/Dockerfile index 8904c4c74..4ab1d3804 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1 ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright # Install system dependencies in one layer, clear APT cache +# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.) +# that would otherwise accumulate when hermes runs as PID 1. See #15012. RUN apt-get update && \ apt-get install -y --no-install-recommends \ - build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \ + build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \ rm -rf /var/lib/apt/lists/* # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime @@ -41,9 +43,15 @@ COPY --chown=hermes:hermes . . # Build web dashboard (Vite outputs to hermes_cli/web_dist/) RUN cd web && npm run build +# ---------- Permissions ---------- +# Make install dir world-readable so any HERMES_UID can read it at runtime. +# The venv needs to be traversable too. +USER root +RUN chmod -R a+rX /opt/hermes +# Start as root so the entrypoint can usermod/groupmod + gosu. +# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). + # ---------- Python virtualenv ---------- -RUN chown hermes:hermes /opt/hermes -USER hermes RUN uv venv && \ uv pip install --no-cache-dir -e ".[all]" @@ -52,4 +60,4 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist ENV HERMES_HOME=/opt/data ENV PATH="/opt/data/.local/bin:${PATH}" VOLUME [ "/opt/data" ] -ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ] +ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ] diff --git a/acp_adapter/server.py b/acp_adapter/server.py index d73c71157..612748d56 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -60,7 +60,7 @@ from acp_adapter.events import ( make_tool_progress_cb, ) from acp_adapter.permissions import make_approval_callback -from acp_adapter.session import SessionManager, SessionState +from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets logger = logging.getLogger(__name__) @@ -287,7 +287,11 @@ class HermesACPAgent(acp.Agent): try: from model_tools import get_tool_definitions - enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + enabled_toolsets = _expand_acp_enabled_toolsets( + getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"], + mcp_server_names=[server.name for server in mcp_servers], + ) + state.agent.enabled_toolsets = enabled_toolsets disabled_toolsets = getattr(state.agent, "disabled_toolsets", None) state.agent.tools = get_tool_definitions( enabled_toolsets=enabled_toolsets, @@ -754,7 +758,9 @@ class HermesACPAgent(acp.Agent): def _cmd_tools(self, args: str, state: SessionState) -> str: try: from model_tools import get_tool_definitions - toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + toolsets = _expand_acp_enabled_toolsets( + getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"] + ) tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True) if not tools: return "No tools available." diff --git a/acp_adapter/session.py b/acp_adapter/session.py index 3f5f78f9a..724573002 100644 --- a/acp_adapter/session.py +++ b/acp_adapter/session.py @@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None: logger.debug("Failed to register ACP task cwd override", exc_info=True) +def _expand_acp_enabled_toolsets( + toolsets: List[str] | None = None, + mcp_server_names: List[str] | None = None, +) -> List[str]: + """Return ACP toolsets plus explicit MCP server toolsets for this session.""" + expanded: List[str] = [] + for name in list(toolsets or ["hermes-acp"]): + if name and name not in expanded: + expanded.append(name) + + for server_name in list(mcp_server_names or []): + toolset_name = f"mcp-{server_name}" + if server_name and toolset_name not in expanded: + expanded.append(toolset_name) + + return expanded + + def _clear_task_cwd(task_id: str) -> None: """Remove task-specific cwd overrides for an ACP session.""" if not task_id: @@ -537,9 +555,18 @@ class SessionManager: elif isinstance(model_cfg, str) and model_cfg.strip(): default_model = model_cfg.strip() + configured_mcp_servers = [ + name + for name, cfg in (config.get("mcp_servers") or {}).items() + if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False + ] + kwargs = { "platform": "acp", - "enabled_toolsets": ["hermes-acp"], + "enabled_toolsets": _expand_acp_enabled_toolsets( + ["hermes-acp"], + mcp_server_names=configured_mcp_servers, + ), "quiet_mode": True, "session_id": session_id, "model": model or default_model, diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index ea09c11ea..01fb8e48b 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -14,6 +14,8 @@ import copy import json import logging import os +import platform +import subprocess from pathlib import Path from hermes_constants import get_hermes_home @@ -277,8 +279,9 @@ def _is_oauth_token(key: str) -> bool: Positively identifies Anthropic OAuth tokens by their key format: - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow + - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN) - Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern + Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern and correctly return False. """ if not key: @@ -292,6 +295,9 @@ def _is_oauth_token(key: str) -> bool: # JWTs from Anthropic OAuth flow if key.startswith("eyJ"): return True + # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN) + if key.startswith("cc-"): + return True return False @@ -461,8 +467,72 @@ def build_anthropic_bedrock_client(region: str): ) +def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]: + """Read Claude Code OAuth credentials from the macOS Keychain. + + Claude Code >=2.1.114 stores credentials in the macOS Keychain under the + service name "Claude Code-credentials" rather than (or in addition to) + the JSON file at ~/.claude/.credentials.json. + + The password field contains a JSON string with the same claudeAiOauth + structure as the JSON file. + + Returns dict with {accessToken, refreshToken?, expiresAt?} or None. + """ + import platform + import subprocess + + if platform.system() != "Darwin": + return None + + try: + # Read the "Claude Code-credentials" generic password entry + result = subprocess.run( + ["security", "find-generic-password", + "-s", "Claude Code-credentials", + "-w"], + capture_output=True, + text=True, + timeout=5, + ) + except (OSError, subprocess.TimeoutExpired): + logger.debug("Keychain: security command not available or timed out") + return None + + if result.returncode != 0: + logger.debug("Keychain: no entry found for 'Claude Code-credentials'") + return None + + raw = result.stdout.strip() + if not raw: + return None + + try: + data = json.loads(raw) + except json.JSONDecodeError: + logger.debug("Keychain: credentials payload is not valid JSON") + return None + + oauth_data = data.get("claudeAiOauth") + if oauth_data and isinstance(oauth_data, dict): + access_token = oauth_data.get("accessToken", "") + if access_token: + return { + "accessToken": access_token, + "refreshToken": oauth_data.get("refreshToken", ""), + "expiresAt": oauth_data.get("expiresAt", 0), + "source": "macos_keychain", + } + + return None + + def read_claude_code_credentials() -> Optional[Dict[str, Any]]: - """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json. + """Read refreshable Claude Code OAuth credentials. + + Checks two sources in order: + 1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry + 2. ~/.claude/.credentials.json file This intentionally excludes ~/.claude.json primaryApiKey. Opencode's subscription flow is OAuth/setup-token based with refreshable credentials, @@ -471,6 +541,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]: Returns dict with {accessToken, refreshToken?, expiresAt?} or None. """ + # Try macOS Keychain first (covers Claude Code >=2.1.114) + kc_creds = _read_claude_code_credentials_from_keychain() + if kc_creds: + return kc_creds + + # Fall back to JSON file cred_path = Path.home() / ".claude" / ".credentials.json" if cred_path.exists(): try: @@ -641,7 +717,9 @@ def _write_claude_code_credentials( existing["claudeAiOauth"] = oauth_data cred_path.parent.mkdir(parents=True, exist_ok=True) - cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8") + _tmp_cred = cred_path.with_suffix(".tmp") + _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8") + _tmp_cred.replace(cred_path) # Restrict permissions (credentials file) cred_path.chmod(0o600) except (OSError, IOError) as e: @@ -908,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]: # --------------------------------------------------------------------------- +def _is_bedrock_model_id(model: str) -> bool: + """Detect AWS Bedrock model IDs that use dots as namespace separators. + + Bedrock model IDs come in two forms: + - Bare: ``anthropic.claude-opus-4-7`` + - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0`` + + In both cases the dots separate namespace components, not version + numbers, and must be preserved verbatim for the Bedrock API. + """ + lower = model.lower() + # Regional inference-profile prefixes + if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")): + return True + # Bare Bedrock model IDs: provider.model-family + if lower.startswith("anthropic."): + return True + return False + + def normalize_model_name(model: str, preserve_dots: bool = False) -> str: """Normalize a model name for the Anthropic API. @@ -915,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str: - Converts dots to hyphens in version numbers (OpenRouter uses dots, Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus). + - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and + regional inference profiles (``us.anthropic.claude-*``) whose dots + are namespace separators, not version separators. """ lower = model.lower() if lower.startswith("anthropic/"): model = model[len("anthropic/"):] if not preserve_dots: + # Bedrock model IDs use dots as namespace separators + # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*"). + # These must not be converted to hyphens. See issue #12295. + if _is_bedrock_model_id(model): + return model # OpenRouter uses dots for version separators (claude-opus-4.6), # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens. model = model.replace(".", "-") @@ -1598,4 +1704,3 @@ def build_anthropic_kwargs( return kwargs - diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index e812a337f..5e8a60e76 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -74,6 +74,12 @@ _PROVIDER_ALIASES = { "minimax_cn": "minimax-cn", "claude": "anthropic", "claude-code": "anthropic", + "github": "copilot", + "github-copilot": "copilot", + "github-model": "copilot", + "github-models": "copilot", + "github-copilot-acp": "copilot-acp", + "copilot-acp-agent": "copilot-acp", } @@ -89,10 +95,11 @@ def _normalize_aux_provider(provider: Optional[str]) -> str: if normalized == "main": # Resolve to the user's actual main provider so named custom providers # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly. - main_prov = _read_main_provider() + main_prov = (_read_main_provider() or "").strip().lower() if main_prov and main_prov not in ("auto", "main", ""): - return main_prov - return "custom" + normalized = main_prov + else: + return "custom" return _PROVIDER_ALIASES.get(normalized, normalized) @@ -1342,6 +1349,68 @@ def _is_auth_error(exc: Exception) -> bool: return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() +def _evict_cached_clients(provider: str) -> None: + """Drop cached auxiliary clients for a provider so fresh creds are used.""" + normalized = _normalize_aux_provider(provider) + with _client_cache_lock: + stale_keys = [ + key for key in _client_cache + if _normalize_aux_provider(str(key[0])) == normalized + ] + for key in stale_keys: + client = _client_cache.get(key, (None, None, None))[0] + if client is not None: + _force_close_async_httpx(client) + try: + close_fn = getattr(client, "close", None) + if callable(close_fn): + close_fn() + except Exception: + pass + _client_cache.pop(key, None) + + +def _refresh_provider_credentials(provider: str) -> bool: + """Refresh short-lived credentials for OAuth-backed auxiliary providers.""" + normalized = _normalize_aux_provider(provider) + try: + if normalized == "openai-codex": + from hermes_cli.auth import resolve_codex_runtime_credentials + + creds = resolve_codex_runtime_credentials(force_refresh=True) + if not str(creds.get("api_key", "") or "").strip(): + return False + _evict_cached_clients(normalized) + return True + if normalized == "nous": + from hermes_cli.auth import resolve_nous_runtime_credentials + + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + force_mint=True, + ) + if not str(creds.get("api_key", "") or "").strip(): + return False + _evict_cached_clients(normalized) + return True + if normalized == "anthropic": + from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token + + creds = read_claude_code_credentials() + token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None + if not str(token or "").strip(): + token = resolve_anthropic_token() + if not str(token or "").strip(): + return False + _evict_cached_clients(normalized) + return True + except Exception as exc: + logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc) + return False + return False + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -1736,7 +1805,7 @@ def resolve_provider_client( "but no endpoint credentials found") return None, None - # ── Named custom providers (config.yaml custom_providers list) ─── + # ── Named custom providers (config.yaml providers dict / custom_providers list) ─── try: from hermes_cli.runtime_provider import _get_named_custom_provider custom_entry = _get_named_custom_provider(provider) @@ -1747,16 +1816,51 @@ def resolve_provider_client( if not custom_key and custom_key_env: custom_key = os.getenv(custom_key_env, "").strip() custom_key = custom_key or "no-key-required" + # An explicit per-task api_mode override (from _resolve_task_provider_model) + # wins; otherwise fall back to what the provider entry declared. + entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip() if custom_base: final_model = _normalize_resolved_model( model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini", provider, ) - client = OpenAI(api_key=custom_key, base_url=custom_base) - client = _wrap_if_needed(client, final_model, custom_base) logger.debug( - "resolve_provider_client: named custom provider %r (%s)", - provider, final_model) + "resolve_provider_client: named custom provider %r (%s, api_mode=%s)", + provider, final_model, entry_api_mode or "chat_completions") + # anthropic_messages: route through the Anthropic Messages API + # via AnthropicAuxiliaryClient. Mirrors the anonymous-custom + # branch in _try_custom_endpoint(). See #15033. + if entry_api_mode == "anthropic_messages": + try: + from agent.anthropic_adapter import build_anthropic_client + real_client = build_anthropic_client(custom_key, custom_base) + except ImportError: + logger.warning( + "Named custom provider %r declares api_mode=" + "anthropic_messages but the anthropic SDK is not " + "installed — falling back to OpenAI-wire.", + provider, + ) + client = OpenAI(api_key=custom_key, base_url=custom_base) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + sync_anthropic = AnthropicAuxiliaryClient( + real_client, final_model, custom_key, custom_base, is_oauth=False, + ) + if async_mode: + return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model + return sync_anthropic, final_model + client = OpenAI(api_key=custom_key, base_url=custom_base) + # codex_responses or inherited auto-detect (via _wrap_if_needed). + # _wrap_if_needed reads the closed-over `api_mode` (the task-level + # override). Named-provider entry api_mode=codex_responses also + # flows through here. + if entry_api_mode == "codex_responses" and not isinstance( + client, CodexAuxiliaryClient + ): + client = CodexAuxiliaryClient(client, final_model) + else: + client = _wrap_if_needed(client, final_model, custom_base) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) logger.warning( @@ -1889,6 +1993,39 @@ def resolve_provider_client( "directly supported", provider) return None, None + elif pconfig.auth_type == "aws_sdk": + # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via + # boto3's credential chain (IAM roles, SSO, env vars, instance metadata). + try: + from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region + from agent.anthropic_adapter import build_anthropic_bedrock_client + except ImportError: + logger.warning("resolve_provider_client: bedrock requested but " + "boto3 or anthropic SDK not installed") + return None, None + + if not has_aws_credentials(): + logger.debug("resolve_provider_client: bedrock requested but " + "no AWS credentials found") + return None, None + + region = resolve_bedrock_region() + default_model = "anthropic.claude-haiku-4-5-20251001-v1:0" + final_model = _normalize_resolved_model(model or default_model, provider) + try: + real_client = build_anthropic_bedrock_client(region) + except ImportError as exc: + logger.warning("resolve_provider_client: cannot create Bedrock " + "client: %s", exc) + return None, None + client = AnthropicAuxiliaryClient( + real_client, final_model, api_key="aws-sdk", + base_url=f"https://bedrock-runtime.{region}.amazonaws.com", + ) + logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region) + return (_to_async_client(client, final_model) if async_mode + else (client, final_model)) + elif pconfig.auth_type in ("oauth_device_code", "oauth_external"): # OAuth providers — route through their specific try functions if provider == "nous": @@ -2857,6 +2994,49 @@ def call_llm( return _validate_llm_response( refreshed_client.chat.completions.create(**kwargs), task) + # ── Auth refresh retry ─────────────────────────────────────── + if (_is_auth_error(first_err) + and resolved_provider not in ("auto", "", None) + and not client_is_nous): + if _refresh_provider_credentials(resolved_provider): + logger.info( + "Auxiliary %s: refreshed %s credentials after auth error, retrying", + task or "call", resolved_provider, + ) + retry_client, retry_model = ( + resolve_vision_provider_client( + provider=resolved_provider, + model=final_model, + async_mode=False, + )[1:] + if task == "vision" + else _get_cached_client( + resolved_provider, + resolved_model, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + main_runtime=main_runtime, + ) + ) + if retry_client is not None: + retry_kwargs = _build_call_kwargs( + resolved_provider, + retry_model or final_model, + messages, + temperature=temperature, + max_tokens=max_tokens, + tools=tools, + timeout=effective_timeout, + extra_body=effective_extra_body, + base_url=resolved_base_url, + ) + _retry_base = str(getattr(retry_client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _retry_base): + retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"]) + return _validate_llm_response( + retry_client.chat.completions.create(**retry_kwargs), task) + # ── Payment / credit exhaustion fallback ────────────────────── # When the resolved provider returns 402 or a credit-related error, # try alternative providers instead of giving up. This handles the @@ -3077,6 +3257,48 @@ async def async_call_llm( return _validate_llm_response( await refreshed_client.chat.completions.create(**kwargs), task) + # ── Auth refresh retry (mirrors sync call_llm) ─────────────── + if (_is_auth_error(first_err) + and resolved_provider not in ("auto", "", None) + and not client_is_nous): + if _refresh_provider_credentials(resolved_provider): + logger.info( + "Auxiliary %s (async): refreshed %s credentials after auth error, retrying", + task or "call", resolved_provider, + ) + if task == "vision": + _, retry_client, retry_model = resolve_vision_provider_client( + provider=resolved_provider, + model=final_model, + async_mode=True, + ) + else: + retry_client, retry_model = _get_cached_client( + resolved_provider, + resolved_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + ) + if retry_client is not None: + retry_kwargs = _build_call_kwargs( + resolved_provider, + retry_model or final_model, + messages, + temperature=temperature, + max_tokens=max_tokens, + tools=tools, + timeout=effective_timeout, + extra_body=effective_extra_body, + base_url=resolved_base_url, + ) + _retry_base = str(getattr(retry_client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _retry_base): + retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"]) + return _validate_llm_response( + await retry_client.chat.completions.create(**retry_kwargs), task) + # ── Payment / connection fallback (mirrors sync call_llm) ───── should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) is_auto = resolved_provider in ("auto", "", None) diff --git a/agent/bedrock_adapter.py b/agent/bedrock_adapter.py index 9e4297581..48674a562 100644 --- a/agent/bedrock_adapter.py +++ b/agent/bedrock_adapter.py @@ -87,6 +87,114 @@ def reset_client_cache(): _bedrock_control_client_cache.clear() +def invalidate_runtime_client(region: str) -> bool: + """Evict the cached ``bedrock-runtime`` client for a single region. + + Per-region counterpart to :func:`reset_client_cache`. Used by the converse + call wrappers to discard clients whose underlying HTTP connection has + gone stale, so the next call allocates a fresh client (with a fresh + connection pool) instead of reusing a dead socket. + + Returns True if a cached entry was evicted, False if the region was not + cached. + """ + existed = region in _bedrock_runtime_client_cache + _bedrock_runtime_client_cache.pop(region, None) + return existed + + +# --------------------------------------------------------------------------- +# Stale-connection detection +# --------------------------------------------------------------------------- +# +# boto3 caches its HTTPS connection pool inside the client object. When a +# pooled connection is killed out from under us (NAT timeout, VPN flap, +# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as +# one of a handful of low-level exceptions — most commonly +# ``botocore.exceptions.ConnectionClosedError`` or +# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal +# ``assert`` in a couple of paths (connection pool state checks, chunked +# response readers) which bubbles up as a bare ``AssertionError`` with an +# empty ``str(exc)``. +# +# In all of these cases the client is the problem, not the request: retrying +# with the same cached client reproduces the failure until the process +# restarts. The fix is to evict the region's cached client so the next +# attempt builds a new one. + +_STALE_LIB_MODULE_PREFIXES = ( + "urllib3.", + "botocore.", + "boto3.", +) + + +def _traceback_frames_modules(exc: BaseException): + """Yield ``__name__``-style module strings for each frame in exc's traceback.""" + tb = getattr(exc, "__traceback__", None) + while tb is not None: + frame = tb.tb_frame + module = frame.f_globals.get("__name__", "") + yield module or "" + tb = tb.tb_next + + +def is_stale_connection_error(exc: BaseException) -> bool: + """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection. + + Matches: + * ``botocore.exceptions.ConnectionError`` and subclasses + (``ConnectionClosedError``, ``EndpointConnectionError``, + ``ReadTimeoutError``, ``ConnectTimeoutError``). + * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` / + ``ConnectionError`` (best-effort import — urllib3 is a transitive + dependency of botocore so it is always available in practice). + * Bare ``AssertionError`` raised from a frame inside urllib3, botocore, + or boto3. These are internal-invariant failures (typically triggered + by corrupted connection-pool state after a dropped socket) and are + recoverable by swapping the client. + + Non-library ``AssertionError``s (from application code or tests) are + intentionally not matched — only library-internal asserts signal stale + connection state. + """ + # botocore: the canonical signal — HTTPClientError is the umbrella for + # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError, + # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers + # the same family via a different branch of the hierarchy. + try: + from botocore.exceptions import ( + ConnectionError as BotoConnectionError, + HTTPClientError, + ) + botocore_errors: tuple = (BotoConnectionError, HTTPClientError) + except ImportError: # pragma: no cover — botocore always present with boto3 + botocore_errors = () + if botocore_errors and isinstance(exc, botocore_errors): + return True + + # urllib3: low-level transport failures + try: + from urllib3.exceptions import ( + ProtocolError, + NewConnectionError, + ConnectionError as Urllib3ConnectionError, + ) + urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError) + except ImportError: # pragma: no cover + urllib3_errors = () + if urllib3_errors and isinstance(exc, urllib3_errors): + return True + + # Library-internal AssertionError (urllib3 / botocore / boto3) + if isinstance(exc, AssertionError): + for module in _traceback_frames_modules(exc): + if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES): + return True + + return False + + # --------------------------------------------------------------------------- # AWS credential detection # --------------------------------------------------------------------------- @@ -787,7 +895,17 @@ def call_converse( guardrail_config=guardrail_config, ) - response = client.converse(**kwargs) + try: + response = client.converse(**kwargs) + except Exception as exc: + if is_stale_connection_error(exc): + logger.warning( + "bedrock: stale-connection error on converse(region=%s, model=%s): " + "%s — evicting cached client so the next call reconnects.", + region, model, type(exc).__name__, + ) + invalidate_runtime_client(region) + raise return normalize_converse_response(response) @@ -819,7 +937,17 @@ def call_converse_stream( guardrail_config=guardrail_config, ) - response = client.converse_stream(**kwargs) + try: + response = client.converse_stream(**kwargs) + except Exception as exc: + if is_stale_connection_error(exc): + logger.warning( + "bedrock: stale-connection error on converse_stream(region=%s, " + "model=%s): %s — evicting cached client so the next call reconnects.", + region, model, type(exc).__name__, + ) + invalidate_runtime_client(region) + raise return normalize_converse_stream_events(response) diff --git a/agent/codex_responses_adapter.py b/agent/codex_responses_adapter.py index 4d3e5590b..3b007a762 100644 --- a/agent/codex_responses_adapter.py +++ b/agent/codex_responses_adapter.py @@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY logger = logging.getLogger(__name__) +# Matches Codex/Harmony tool-call serialization that occasionally leaks into +# assistant-message content when the model fails to emit a structured +# ``function_call`` item. Accepts the common forms: +# +# to=functions.exec_command +# assistant to=functions.exec_command +# <|channel|>commentary to=functions.exec_command +# +# ``to=functions.`` is the stable marker — the optional ``assistant`` or +# Harmony channel prefix varies by degeneration mode. Case-insensitive to +# cover lowercase/uppercase ``assistant`` variants. +_TOOL_CALL_LEAK_PATTERN = re.compile( + r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*", + re.IGNORECASE, +) + + # --------------------------------------------------------------------------- # Multimodal content helpers # --------------------------------------------------------------------------- @@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if isinstance(out_text, str): final_text = out_text.strip() + # ── Tool-call leak recovery ────────────────────────────────── + # gpt-5.x on the Codex Responses API sometimes degenerates and emits + # what should be a structured `function_call` item as plain assistant + # text using the Harmony/Codex serialization (``to=functions.foo + # {json}`` or ``assistant to=functions.foo {json}``). The model + # intended to call a tool, but the intent never made it into + # ``response.output`` as a ``function_call`` item, so ``tool_calls`` + # is empty here. If we pass this through, the parent sees a + # confident-looking summary with no audit trail (empty ``tool_trace``) + # and no tools actually ran — the Taiwan-embassy-email incident. + # + # Detection: leaked tokens always contain ``to=functions.`` and + # the assistant message has no real tool calls. Treat it as incomplete + # so the existing Codex-incomplete continuation path (3 retries, + # handled in run_agent.py) gets a chance to re-elicit a proper + # ``function_call`` item. The existing loop already handles message + # append, dedup, and retry budget. + leaked_tool_call_text = False + if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text): + leaked_tool_call_text = True + logger.warning( + "Codex response contains leaked tool-call text in assistant content " + "(no structured function_call items). Treating as incomplete so the " + "continuation path can re-elicit a proper tool call. Leaked snippet: %r", + final_text[:300], + ) + # Clear the text so downstream code doesn't surface the garbage as + # a summary. The encrypted reasoning items (if any) are preserved + # so the model keeps its chain-of-thought on the retry. + final_text = "" + assistant_message = SimpleNamespace( content=final_text, tool_calls=tool_calls, @@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]: if tool_calls: finish_reason = "tool_calls" + elif leaked_tool_call_text: + finish_reason = "incomplete" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" elif reasoning_items_raw and not final_text: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f8036851f..ef40cbfaf 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine): self._context_probed = False self._context_probe_persistable = False self._previous_summary = None + self._last_summary_error = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 @@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine): self._last_compression_savings_pct: float = 100.0 self._ineffective_compression_count: int = 0 self._summary_failure_cooldown_until: float = 0.0 + self._last_summary_error: Optional[str] = None def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio self._previous_summary = summary self._summary_failure_cooldown_until = 0.0 self._summary_model_fallen_back = False + self._last_summary_error = None return self._with_summary_prefix(summary) except RuntimeError: # No provider configured — long cooldown, unlikely to self-resolve self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS + self._last_summary_error = "no auxiliary LLM provider configured" logging.warning("Context compression: no provider available for " "summary. Middle turns will be dropped without summary " "for %d seconds.", @@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown + err_text = str(e).strip() or e.__class__.__name__ + if len(err_text) > 220: + err_text = err_text[:217].rstrip() + "..." + self._last_summary_error = err_text logging.warning( "Failed to generate context summary: %s. " "Further summary attempts paused for %d seconds.", @@ -1099,6 +1107,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio return max(cut_idx, head_end + 1) + # ------------------------------------------------------------------ + # ContextEngine: manual /compress preflight + # ------------------------------------------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Return True if there is a non-empty middle region to compact. + + Overrides the ABC default so the gateway ``/compress`` guard can + skip the LLM call when the transcript is still entirely inside + the protected head/tail. + """ + compress_start = self._align_boundary_forward(messages, self.protect_first_n) + compress_end = self._find_tail_cut_by_tokens(messages, compress_start) + return compress_start < compress_end + # ------------------------------------------------------------------ # Main compression entry point # ------------------------------------------------------------------ diff --git a/agent/context_engine.py b/agent/context_engine.py index 6ae90b6cd..bbafcd29c 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -78,6 +78,7 @@ class ContextEngine(ABC): self, messages: List[Dict[str, Any]], current_tokens: int = None, + focus_topic: str = None, ) -> List[Dict[str, Any]]: """Compact the message list and return the new message list. @@ -86,6 +87,12 @@ class ContextEngine(ABC): context budget. The implementation is free to summarize, build a DAG, or do anything else — as long as the returned list is a valid OpenAI-format message sequence. + + Args: + focus_topic: Optional topic string from manual ``/compress ``. + Engines that support guided compression should prioritise + preserving information related to this topic. Engines that + don't support it may simply ignore this argument. """ # -- Optional: pre-flight check ---------------------------------------- @@ -98,6 +105,21 @@ class ContextEngine(ABC): """ return False + # -- Optional: manual /compress preflight ------------------------------ + + def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: + """Quick check: is there anything in ``messages`` that can be compacted? + + Used by the gateway ``/compress`` command as a preflight guard — + returning False lets the gateway report "nothing to compress yet" + without making an LLM call. + + Default returns True (always attempt). Engines with a cheap way + to introspect their own head/tail boundaries should override this + to return False when the transcript is still entirely protected. + """ + return True + # -- Optional: session lifecycle --------------------------------------- def on_session_start(self, session_id: str, **kwargs) -> None: diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 783f94956..94d40d2d9 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -46,6 +46,47 @@ def _resolve_args() -> list[str]: return shlex.split(raw) +def _resolve_home_dir() -> str: + """Return a stable HOME for child ACP processes.""" + + try: + from hermes_constants import get_subprocess_home + + profile_home = get_subprocess_home() + if profile_home: + return profile_home + except Exception: + pass + + home = os.environ.get("HOME", "").strip() + if home: + return home + + expanded = os.path.expanduser("~") + if expanded and expanded != "~": + return expanded + + try: + import pwd + + resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() + if resolved: + return resolved + except Exception: + pass + + # Last resort: /tmp (writable on any POSIX system). Avoids crashing the + # subprocess with no HOME; callers can set HERMES_HOME explicitly if they + # need a different writable dir. + return "/tmp" + + +def _build_subprocess_env() -> dict[str, str]: + env = os.environ.copy() + env["HOME"] = _resolve_home_dir() + return env + + def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: return { "jsonrpc": "2.0", @@ -382,6 +423,7 @@ class CopilotACPClient: text=True, bufsize=1, cwd=self._acp_cwd, + env=_build_subprocess_env(), ) except FileNotFoundError as exc: raise RuntimeError( diff --git a/agent/credential_pool.py b/agent/credential_pool.py index de8d03185..f6cb24dd6 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -455,6 +455,61 @@ class CredentialPool: logger.debug("Failed to sync from credentials file: %s", exc) return entry + def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential: + """Sync a Nous pool entry from auth.json if tokens differ. + + Nous OAuth refresh tokens are single-use. When another process + (e.g. a concurrent cron) refreshes the token via + ``resolve_nous_runtime_credentials``, it writes fresh tokens to + auth.json under ``_auth_store_lock``. The pool entry's tokens + become stale. This method detects that and adopts the newer pair, + avoiding a "refresh token reuse" revocation on the Nous Portal. + """ + if self.provider != "nous" or entry.source != "device_code": + return entry + try: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "nous") + if not state: + return entry + store_refresh = state.get("refresh_token", "") + store_access = state.get("access_token", "") + if store_refresh and store_refresh != entry.refresh_token: + logger.debug( + "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)", + entry.id, + ) + field_updates: Dict[str, Any] = { + "access_token": store_access, + "refresh_token": store_refresh, + "last_status": None, + "last_status_at": None, + "last_error_code": None, + } + if state.get("expires_at"): + field_updates["expires_at"] = state["expires_at"] + if state.get("agent_key"): + field_updates["agent_key"] = state["agent_key"] + if state.get("agent_key_expires_at"): + field_updates["agent_key_expires_at"] = state["agent_key_expires_at"] + if state.get("inference_base_url"): + field_updates["inference_base_url"] = state["inference_base_url"] + extra_updates = dict(entry.extra) + for extra_key in ("obtained_at", "expires_in", "agent_key_id", + "agent_key_expires_in", "agent_key_reused", + "agent_key_obtained_at"): + val = state.get(extra_key) + if val is not None: + extra_updates[extra_key] = val + updated = replace(entry, extra=extra_updates, **field_updates) + self._replace_entry(entry, updated) + self._persist() + return updated + except Exception as exc: + logger.debug("Failed to sync Nous entry from auth.json: %s", exc) + return entry + def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None: """Write refreshed pool entry tokens back to auth.json providers. @@ -561,6 +616,9 @@ class CredentialPool: last_refresh=refreshed.get("last_refresh"), ) elif self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced nous_state = { "access_token": entry.access_token, "refresh_token": entry.refresh_token, @@ -635,6 +693,26 @@ class CredentialPool: # Credentials file had a valid (non-expired) token — use it directly logger.debug("Credentials file has valid token, using without refresh") return synced + # For nous: another process may have consumed the refresh token + # between our proactive sync and the HTTP call. Re-sync from + # auth.json and adopt the fresh tokens if available. + if self.provider == "nous": + synced = self._sync_nous_entry_from_auth_store(entry) + if synced.refresh_token != entry.refresh_token: + logger.debug("Nous refresh failed but auth.json has newer tokens — adopting") + updated = replace( + synced, + last_status=STATUS_OK, + last_status_at=None, + last_error_code=None, + last_error_reason=None, + last_error_message=None, + last_error_reset_at=None, + ) + self._replace_entry(synced, updated) + self._persist() + self._sync_device_code_entry_to_auth_store(updated) + return updated self._mark_exhausted(entry, None) return None @@ -698,6 +776,17 @@ class CredentialPool: if synced is not entry: entry = synced cleared_any = True + # For nous entries, sync from auth.json before status checks. + # Another process may have successfully refreshed via + # resolve_nous_runtime_credentials(), making this entry's + # exhausted status stale. + if (self.provider == "nous" + and entry.source == "device_code" + and entry.last_status == STATUS_EXHAUSTED): + synced = self._sync_nous_entry_from_auth_store(entry) + if synced is not entry: + entry = synced + cleared_any = True if entry.last_status == STATUS_EXHAUSTED: exhausted_until = _exhausted_until(entry) if exhausted_until is not None and now < exhausted_until: @@ -739,8 +828,11 @@ class CredentialPool: if self._strategy == STRATEGY_LEAST_USED and len(available) > 1: entry = min(available, key=lambda e: e.request_count) + # Increment usage counter so subsequent selections distribute load + updated = replace(entry, request_count=entry.request_count + 1) + self._replace_entry(entry, updated) self._current_id = entry.id - return entry + return updated if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1: entry = available[0] @@ -1056,6 +1148,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "inference_base_url": state.get("inference_base_url"), "agent_key": state.get("agent_key"), "agent_key_expires_at": state.get("agent_key_expires_at"), + # Carry the mint/refresh timestamps into the pool so + # freshness-sensitive consumers (self-heal hooks, pool + # pruning by age) can distinguish just-minted credentials + # from stale ones. Without these, fresh device_code + # entries get obtained_at=None and look older than they + # are (#15099). + "obtained_at": state.get("obtained_at"), + "expires_in": state.get("expires_in"), + "agent_key_id": state.get("agent_key_id"), + "agent_key_expires_in": state.get("agent_key_expires_in"), + "agent_key_reused": state.get("agent_key_reused"), + "agent_key_obtained_at": state.get("agent_key_obtained_at"), "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None, "label": seeded_label, }, @@ -1066,9 +1170,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in # the auth store or credential pool, so we resolve them here. try: - from hermes_cli.copilot_auth import resolve_copilot_token + from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token token, source = resolve_copilot_token() if token: + api_token = get_copilot_api_token(token) source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" if not _is_suppressed(provider, source_name): active_sources.add(source_name) @@ -1080,7 +1185,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup { "source": source_name, "auth_type": AUTH_TYPE_API_KEY, - "access_token": token, + "access_token": api_token, "base_url": pconfig.inference_base_url if pconfig else "", "label": source, }, diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 04875b6a5..87324d676 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -45,6 +45,7 @@ class FailoverReason(enum.Enum): # Model model_not_found = "model_not_found" # 404 or invalid model — fallback to different model + provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy # Request format format_error = "format_error" # 400 bad request — abort or strip + retry @@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [ "unsupported model", ] +# OpenRouter aggregator policy-block patterns. +# +# When a user's OpenRouter account privacy setting (or a per-request +# `provider.data_collection: deny` preference) excludes the only endpoint +# serving a model, OpenRouter returns 404 with a *specific* message that is +# distinct from "model not found": +# +# "No endpoints available matching your guardrail restrictions and +# data policy. Configure: https://openrouter.ai/settings/privacy" +# +# We classify this as `provider_policy_blocked` rather than +# `model_not_found` because: +# - The model *exists* — model_not_found is misleading in logs +# - Provider fallback won't help: the account-level setting applies to +# every call on the same OpenRouter account +# - The error body already contains the fix URL, so the user gets +# actionable guidance without us rewriting the message +_PROVIDER_POLICY_BLOCKED_PATTERNS = [ + "no endpoints available matching your guardrail", + "no endpoints available matching your data policy", + "no endpoints found matching your data policy", +] + # Auth patterns (non-status-code signals) _AUTH_PATTERNS = [ "invalid api key", @@ -319,6 +343,11 @@ def classify_api_error( """ status_code = _extract_status_code(error) error_type = type(error).__name__ + # Copilot/GitHub Models RateLimitError may not set .status_code; force 429 + # so downstream rate-limit handling (classifier reason, pool rotation, + # fallback gating) fires correctly instead of misclassifying as generic. + if status_code is None and error_type == "RateLimitError": + status_code = 429 body = _extract_error_body(error) error_code = _extract_error_code(body) @@ -523,6 +552,17 @@ def _classify_by_status( return _classify_402(error_msg, result_fn) if status_code == 404: + # OpenRouter policy-block 404 — distinct from "model not found". + # The model exists; the user's account privacy setting excludes the + # only endpoint serving it. Falling back to another provider won't + # help (same account setting applies). The error body already + # contains the fix URL, so just surface it. + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( FailoverReason.model_not_found, @@ -640,6 +680,12 @@ def _classify_400( ) # Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter). + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( FailoverReason.model_not_found, @@ -812,6 +858,15 @@ def _classify_by_message( should_fallback=True, ) + # Provider policy-block (aggregator-side guardrail) — check before + # model_not_found so we don't mis-label as a missing model. + if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS): + return result_fn( + FailoverReason.provider_policy_blocked, + retryable=False, + should_fallback=False, + ) + # Model not found patterns if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS): return result_fn( diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py index 406e4a19b..5f64636f2 100644 --- a/agent/gemini_native_adapter.py +++ b/agent/gemini_native_adapter.py @@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool: return not normalized.endswith("/openai") +def probe_gemini_tier( + api_key: str, + base_url: str = DEFAULT_GEMINI_BASE_URL, + *, + model: str = "gemini-2.5-flash", + timeout: float = 10.0, +) -> str: + """Probe a Google AI Studio API key and return its tier. + + Returns one of: + + - ``"free"`` -- key is on the free tier (unusable with Hermes) + - ``"paid"`` -- key is on a paid tier + - ``"unknown"`` -- probe failed; callers should proceed without blocking. + """ + key = (api_key or "").strip() + if not key: + return "unknown" + + normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/") + if not normalized_base: + normalized_base = DEFAULT_GEMINI_BASE_URL + if normalized_base.lower().endswith("/openai"): + normalized_base = normalized_base[: -len("/openai")] + + url = f"{normalized_base}/models/{model}:generateContent" + payload = { + "contents": [{"role": "user", "parts": [{"text": "hi"}]}], + "generationConfig": {"maxOutputTokens": 1}, + } + + try: + with httpx.Client(timeout=timeout) as client: + resp = client.post( + url, + params={"key": key}, + json=payload, + headers={"Content-Type": "application/json"}, + ) + except Exception as exc: + logger.debug("probe_gemini_tier: network error: %s", exc) + return "unknown" + + headers_lower = {k.lower(): v for k, v in resp.headers.items()} + rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day") + if rpd_header: + try: + rpd_val = int(rpd_header) + except (TypeError, ValueError): + rpd_val = None + # Published free-tier daily caps (Dec 2025): + # gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000 + # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free. + if rpd_val is not None and rpd_val <= 1000: + return "free" + if rpd_val is not None and rpd_val > 1000: + return "paid" + + if resp.status_code == 429: + body_text = "" + try: + body_text = resp.text or "" + except Exception: + body_text = "" + if "free_tier" in body_text.lower(): + return "free" + return "paid" + + if 200 <= resp.status_code < 300: + return "paid" + + return "unknown" + + +def is_free_tier_quota_error(error_message: str) -> bool: + """Return True when a Gemini 429 message indicates free-tier exhaustion.""" + if not error_message: + return False + return "free_tier" in error_message.lower() + + +_FREE_TIER_GUIDANCE = ( + "\n\nYour Google API key is on the free tier (<= 250 requests/day for " + "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, " + "so the free tier is exhausted in a handful of messages and cannot sustain " + "an agent session. Enable billing on your Google Cloud project and " + "regenerate the key in a billing-enabled project: " + "https://aistudio.google.com/apikey" +) + + class GeminiAPIError(Exception): """Error shape compatible with Hermes retry/error classification.""" @@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError: else: message = f"Gemini returned HTTP {status}: {body_text[:500]}" + # Free-tier quota exhaustion -> append actionable guidance so users who + # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn + # that the free tier cannot sustain an agent session. + if status == 429 and is_free_tier_quota_error(err_message or body_text): + message = message + _FREE_TIER_GUIDANCE + return GeminiAPIError( message, code=code, @@ -704,6 +801,13 @@ class GeminiNativeClient: http_client: Optional[httpx.Client] = None, **_: Any, ) -> None: + if not (api_key or "").strip(): + raise RuntimeError( + "Gemini native client requires an API key, but none was provided. " + "Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env " + "(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` " + "to configure the Google provider." + ) self.api_key = api_key normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/") if normalized_base.endswith("/openai"): diff --git a/agent/gemini_schema.py b/agent/gemini_schema.py index 904c99d31..3608837a1 100644 --- a/agent/gemini_schema.py +++ b/agent/gemini_schema.py @@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]: ] continue cleaned[key] = value + + # Gemini's Schema validator requires every ``enum`` entry to be a string, + # even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``. + # OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's + # ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``), + # so we only drop the ``enum`` when it would collide with Gemini's rule. + # Keeping ``type: integer`` plus the human-readable description gives the + # model enough guidance; the tool handler still validates the value. + enum_val = cleaned.get("enum") + type_val = cleaned.get("type") + if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}: + if any(not isinstance(item, str) for item in enum_val): + cleaned.pop("enum", None) + return cleaned diff --git a/agent/memory_manager.py b/agent/memory_manager.py index 2435c3f24..62cbd6ae1 100644 --- a/agent/memory_manager.py +++ b/agent/memory_manager.py @@ -31,6 +31,7 @@ from __future__ import annotations import json import logging import re +import inspect from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -312,7 +313,39 @@ class MemoryManager: ) return "\n\n".join(parts) - def on_memory_write(self, action: str, target: str, content: str) -> None: + @staticmethod + def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str: + """Return how to pass metadata to a provider's memory-write hook.""" + try: + signature = inspect.signature(provider.on_memory_write) + except (TypeError, ValueError): + return "keyword" + + params = list(signature.parameters.values()) + if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params): + return "keyword" + if "metadata" in signature.parameters: + return "keyword" + + accepted = [ + p for p in params + if p.kind in ( + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.KEYWORD_ONLY, + ) + ] + if len(accepted) >= 4: + return "positional" + return "legacy" + + def on_memory_write( + self, + action: str, + target: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: """Notify external providers when the built-in memory tool writes. Skips the builtin provider itself (it's the source of the write). @@ -321,7 +354,15 @@ class MemoryManager: if provider.name == "builtin": continue try: - provider.on_memory_write(action, target, content) + metadata_mode = self._provider_memory_write_metadata_mode(provider) + if metadata_mode == "keyword": + provider.on_memory_write( + action, target, content, metadata=dict(metadata or {}) + ) + elif metadata_mode == "positional": + provider.on_memory_write(action, target, content, dict(metadata or {})) + else: + provider.on_memory_write(action, target, content) except Exception as e: logger.debug( "Memory provider '%s' on_memory_write failed: %s", diff --git a/agent/memory_provider.py b/agent/memory_provider.py index 24593e334..535338f4e 100644 --- a/agent/memory_provider.py +++ b/agent/memory_provider.py @@ -26,7 +26,7 @@ Optional hooks (override to opt in): on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context on_session_end(messages) — end-of-session extraction on_pre_compress(messages) -> str — extract before context compression - on_memory_write(action, target, content) — mirror built-in memory writes + on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes on_delegation(task, result, **kwargs) — parent-side observation of subagent work """ @@ -34,7 +34,7 @@ from __future__ import annotations import logging from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) @@ -220,12 +220,21 @@ class MemoryProvider(ABC): should all have ``env_var`` set and this method stays no-op). """ - def on_memory_write(self, action: str, target: str, content: str) -> None: + def on_memory_write( + self, + action: str, + target: str, + content: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: """Called when the built-in memory tool writes an entry. action: 'add', 'replace', or 'remove' target: 'memory' or 'user' content: the entry content + metadata: structured provenance for the write, when available. Common + keys include ``write_origin``, ``execution_context``, ``session_id``, + ``parent_session_id``, ``platform``, and ``tool_name``. Use to mirror built-in memory writes to your backend. """ diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 8ce70da33..850e16662 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -6,6 +6,7 @@ and run_agent.py for pre-flight context checks. import ipaddress import logging +import os import re import time from pathlib import Path @@ -21,6 +22,25 @@ from hermes_constants import OPENROUTER_MODELS_URL logger = logging.getLogger(__name__) + +def _resolve_requests_verify() -> bool | str: + """Resolve SSL verify setting for `requests` calls from env vars. + + The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE + by default. Hermes also honours HERMES_CA_BUNDLE (its own convention) + and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so + that a single env var can cover both `requests` and `httpx` callsites + inside the same process. + + Returns either a filesystem path to a CA bundle, or True to defer to + the requests default (certifi). + """ + for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"): + val = os.getenv(env_var) + if val and os.path.isfile(val): + return val + return True + # Provider names that can appear as a "provider:" prefix before a model ID. # Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b") # are preserved so the full model name reaches cache lookups and server queries. @@ -123,8 +143,9 @@ DEFAULT_CONTEXT_LENGTHS = { "claude": 200000, # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models - # GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models - # endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex. + # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we + # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of + # Apr 2026) and is resolved via _resolve_codex_oauth_context_length(). "gpt-5.5": 400000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) @@ -494,7 +515,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any return _model_metadata_cache try: - response = requests.get(OPENROUTER_MODELS_URL, timeout=10) + response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify()) response.raise_for_status() data = response.json() @@ -561,6 +582,7 @@ def fetch_endpoint_model_metadata( server_url.rstrip("/") + "/api/v1/models", headers=headers, timeout=10, + verify=_resolve_requests_verify(), ) response.raise_for_status() payload = response.json() @@ -609,7 +631,7 @@ def fetch_endpoint_model_metadata( for candidate in candidates: url = candidate.rstrip("/") + "/models" try: - response = requests.get(url, headers=headers, timeout=10) + response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify()) response.raise_for_status() payload = response.json() cache: Dict[str, Dict[str, Any]] = {} @@ -640,9 +662,10 @@ def fetch_endpoint_model_metadata( try: # Try /v1/props first (current llama.cpp); fall back to /props for older builds base = candidate.rstrip("/").replace("/v1", "") - props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5) + _verify = _resolve_requests_verify() + props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify) if not props_resp.ok: - props_resp = requests.get(base + "/props", headers=headers, timeout=5) + props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify) if props_resp.ok: props = props_resp.json() gen_settings = props.get("default_generation_settings", {}) @@ -714,6 +737,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]: return cache.get(key) +def _invalidate_cached_context_length(model: str, base_url: str) -> None: + """Drop a stale cache entry so it gets re-resolved on the next lookup.""" + key = f"{model}@{base_url}" + cache = _load_context_cache() + if key not in cache: + return + del cache[key] + path = _get_context_cache_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.dump({"context_lengths": cache}, f, default_flow_style=False) + except Exception as e: + logger.debug("Failed to invalidate context length cache entry %s: %s", key, e) + + def get_next_probe_tier(current_length: int) -> Optional[int]: """Return the next lower probe tier, or None if already at minimum.""" for tier in CONTEXT_PROBE_TIERS: @@ -991,7 +1030,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) -> "x-api-key": api_key, "anthropic-version": "2023-06-01", } - resp = requests.get(url, headers=headers, timeout=10) + resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify()) if resp.status_code != 200: return None data = resp.json() @@ -1005,6 +1044,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) -> return None +# Known ChatGPT Codex OAuth context windows (observed via live +# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the +# `context_window` values, which are what Codex actually enforces — the +# direct OpenAI API has larger limits for the same slugs, but Codex OAuth +# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). +# +# Used as a fallback when the live probe fails (no token, network error). +# Longest keys first so substring match picks the most specific entry. +_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = { + "gpt-5.1-codex-max": 272_000, + "gpt-5.1-codex-mini": 272_000, + "gpt-5.3-codex": 272_000, + "gpt-5.2-codex": 272_000, + "gpt-5.4-mini": 272_000, + "gpt-5.5": 272_000, + "gpt-5.4": 272_000, + "gpt-5.2": 272_000, + "gpt-5": 272_000, +} + + +_codex_oauth_context_cache: Dict[str, int] = {} +_codex_oauth_context_cache_time: float = 0.0 +_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour + + +def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]: + """Probe the ChatGPT Codex /models endpoint for per-slug context windows. + + Codex OAuth imposes its own context limits that differ from the direct + OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The + `context_window` field in each model entry is the authoritative source. + + Returns a ``{slug: context_window}`` dict. Empty on failure. + """ + global _codex_oauth_context_cache, _codex_oauth_context_cache_time + now = time.time() + if ( + _codex_oauth_context_cache + and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL + ): + return _codex_oauth_context_cache + + try: + resp = requests.get( + "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=10, + verify=_resolve_requests_verify(), + ) + if resp.status_code != 200: + logger.debug( + "Codex /models probe returned HTTP %s; falling back to hardcoded defaults", + resp.status_code, + ) + return {} + data = resp.json() + except Exception as exc: + logger.debug("Codex /models probe failed: %s", exc) + return {} + + entries = data.get("models", []) if isinstance(data, dict) else [] + result: Dict[str, int] = {} + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + ctx = item.get("context_window") + if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0: + result[slug.strip()] = ctx + + if result: + _codex_oauth_context_cache = result + _codex_oauth_context_cache_time = now + return result + + +def _resolve_codex_oauth_context_length( + model: str, access_token: str = "" +) -> Optional[int]: + """Resolve a Codex OAuth model's real context window. + + Prefers a live probe of chatgpt.com/backend-api/codex/models (when we + have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``. + """ + model_bare = _strip_provider_prefix(model).strip() + if not model_bare: + return None + + if access_token: + live = _fetch_codex_oauth_context_lengths(access_token) + if model_bare in live: + return live[model_bare] + # Case-insensitive match in case casing drifts + model_lower = model_bare.lower() + for slug, ctx in live.items(): + if slug.lower() == model_lower: + return ctx + + # Fallback: longest-key-first substring match over hardcoded defaults. + model_lower = model_bare.lower() + for slug, ctx in sorted( + _CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True + ): + if slug in model_lower: + return ctx + + return None + + def _resolve_nous_context_length(model: str) -> Optional[int]: """Resolve Nous Portal model context length via OpenRouter metadata. @@ -1050,6 +1199,7 @@ def get_model_context_length( Resolution order: 0. Explicit config override (model.context_length or custom_providers per-model) 1. Persistent cache (previously discovered via probing) + 1b. AWS Bedrock static table (must precede custom-endpoint probe) 2. Active endpoint metadata (/models for explicit custom endpoints) 3. Local server query (for local endpoints) 4. Anthropic /v1/models API (API-key users only, not OAuth) @@ -1072,7 +1222,41 @@ def get_model_context_length( if base_url: cached = get_cached_context_length(model, base_url) if cached is not None: - return cached + # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds + # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via + # models.dev and persisted it. Codex OAuth caps at 272K for every + # slug, so any cached Codex entry at or above 400K is a leftover + # from the old resolution path. Drop it and fall through to the + # live /models probe in step 5 below. + if provider == "openai-codex" and cached >= 400_000: + logger.info( + "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); " + "re-resolving via live /models probe", + model, base_url, f"{cached:,}", + ) + _invalidate_cached_context_length(model, base_url) + else: + return cached + + # 1b. AWS Bedrock — use static context length table. + # Bedrock's ListFoundationModels API doesn't expose context window sizes, + # so we maintain a curated table in bedrock_adapter.py that reflects + # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native + # Anthropic API). This must run BEFORE the custom-endpoint probe at + # step 2 — bedrock-runtime..amazonaws.com is not in + # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint, + # fail the /models probe (Bedrock doesn't expose that shape), and fall + # back to the 128K default before reaching the original step 4b branch. + if provider == "bedrock" or ( + base_url + and base_url_hostname(base_url).startswith("bedrock-runtime.") + and base_url_host_matches(base_url, "amazonaws.com") + ): + try: + from agent.bedrock_adapter import get_bedrock_context_length + return get_bedrock_context_length(model) + except ImportError: + pass # boto3 not installed — fall through to generic resolution # 2. Active endpoint metadata for truly custom/unknown endpoints. # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their @@ -1119,19 +1303,7 @@ def get_model_context_length( if ctx: return ctx - # 4b. AWS Bedrock — use static context length table. - # Bedrock's ListFoundationModels doesn't expose context window sizes, - # so we maintain a curated table in bedrock_adapter.py. - if provider == "bedrock" or ( - base_url - and base_url_hostname(base_url).startswith("bedrock-runtime.") - and base_url_host_matches(base_url, "amazonaws.com") - ): - try: - from agent.bedrock_adapter import get_bedrock_context_length - return get_bedrock_context_length(model) - except ImportError: - pass # boto3 not installed — fall through to generic resolution + # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.) # 5. Provider-aware lookups (before generic OpenRouter cache) # These are provider-specific and take priority over the generic OR cache, @@ -1145,10 +1317,32 @@ def get_model_context_length( if inferred: effective_provider = inferred + # 5a. Copilot live /models API — max_prompt_tokens from the user's account. + # This catches account-specific models (e.g. claude-opus-4.6-1m) that + # don't exist in models.dev. For models that ARE in models.dev, this + # returns the provider-enforced limit which is what users can actually use. + if effective_provider in ("copilot", "copilot-acp", "github-copilot"): + try: + from hermes_cli.models import get_copilot_model_context + ctx = get_copilot_model_context(model, api_key=api_key) + if ctx: + return ctx + except Exception: + pass # Fall through to models.dev + if effective_provider == "nous": ctx = _resolve_nous_context_length(model) if ctx: return ctx + if effective_provider == "openai-codex": + # Codex OAuth enforces lower context limits than the direct OpenAI + # API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K + # on Codex). Authoritative source is Codex's own /models endpoint. + codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "") + if codex_ctx: + if base_url: + save_context_length(model, base_url, codex_ctx) + return codex_ctx if effective_provider: from agent.models_dev import lookup_models_dev_context ctx = lookup_models_dev_context(effective_provider, model) diff --git a/agent/moonshot_schema.py b/agent/moonshot_schema.py new file mode 100644 index 000000000..08585bab4 --- /dev/null +++ b/agent/moonshot_schema.py @@ -0,0 +1,190 @@ +"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset. + +Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI +tool calling. Requests that violate it fail with HTTP 400: + + tools.function.parameters is not a valid moonshot flavored json schema, + details: <...> + +Known rejection modes documented at +https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102 +and MoonshotAI/kimi-cli#1595: + +1. Every property schema must carry a ``type``. Standard JSON Schema allows + type to be omitted (the value is then unconstrained); Moonshot refuses. +2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not + the parent. Presence of both causes "type should be defined in anyOf + items instead of the parent schema". + +The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is +handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it +applies at MCP registration time for all providers. +""" + +from __future__ import annotations + +import copy +from typing import Any, Dict, List + +# Keys whose values are maps of name → schema (not schemas themselves). +# When we recurse, we walk the values of these maps as schemas, but we do +# NOT apply the missing-type repair to the map itself. +_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"}) + +# Keys whose values are lists of schemas. +_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"}) + +# Keys whose values are a single nested schema. +_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"}) + + +def _repair_schema(node: Any, is_schema: bool = True) -> Any: + """Recursively apply Moonshot repairs to a schema node. + + ``is_schema=True`` means this dict is a JSON Schema node and gets the + missing-type + anyOf-parent repairs applied. ``is_schema=False`` means + it's a container map (e.g. the value of ``properties``) and we only + recurse into its values. + """ + if isinstance(node, list): + # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so + # every element is itself a schema. + return [_repair_schema(item, is_schema=True) for item in node] + if not isinstance(node, dict): + return node + + # Walk the dict, deciding per-key whether recursion is into a schema + # node, a container map, or a scalar. + repaired: Dict[str, Any] = {} + for key, value in node.items(): + if key in _SCHEMA_MAP_KEYS and isinstance(value, dict): + # Map of name → schema. Don't treat the map itself as a schema + # (it has no type / properties of its own), but each value is. + repaired[key] = { + sub_key: _repair_schema(sub_val, is_schema=True) + for sub_key, sub_val in value.items() + } + elif key in _SCHEMA_LIST_KEYS and isinstance(value, list): + repaired[key] = [_repair_schema(v, is_schema=True) for v in value] + elif key in _SCHEMA_NODE_KEYS: + # items / not / additionalProperties: single nested schema. + # additionalProperties can also be a bool — leave those alone. + if isinstance(value, dict): + repaired[key] = _repair_schema(value, is_schema=True) + else: + repaired[key] = value + else: + # Scalars (description, title, format, enum values, etc.) pass through. + repaired[key] = value + + if not is_schema: + return repaired + + # Rule 2: when anyOf is present, type belongs only on the children. + if "anyOf" in repaired and isinstance(repaired["anyOf"], list): + repaired.pop("type", None) + return repaired + + # Rule 1: property schemas without type need one. $ref nodes are exempt + # — their type comes from the referenced definition. + if "$ref" in repaired: + return repaired + return _fill_missing_type(repaired) + + +def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]: + """Infer a reasonable ``type`` if this schema node has none.""" + if "type" in node and node["type"] not in (None, ""): + return node + + # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum`` + # → type of first enum value, else fall back to ``string`` (safest scalar). + if "properties" in node or "required" in node or "additionalProperties" in node: + inferred = "object" + elif "items" in node or "prefixItems" in node: + inferred = "array" + elif "enum" in node and isinstance(node["enum"], list) and node["enum"]: + sample = node["enum"][0] + if isinstance(sample, bool): + inferred = "boolean" + elif isinstance(sample, int): + inferred = "integer" + elif isinstance(sample, float): + inferred = "number" + else: + inferred = "string" + else: + inferred = "string" + + return {**node, "type": inferred} + + +def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]: + """Normalize tool parameters to a Moonshot-compatible object schema. + + Returns a deep-copied schema with the two flavored-JSON-Schema repairs + applied. Input is not mutated. + """ + if not isinstance(parameters, dict): + return {"type": "object", "properties": {}} + + repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True) + if not isinstance(repaired, dict): + return {"type": "object", "properties": {}} + + # Top-level must be an object schema + if repaired.get("type") != "object": + repaired["type"] = "object" + if "properties" not in repaired: + repaired["properties"] = {} + + return repaired + + +def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters.""" + if not tools: + return tools + + sanitized: List[Dict[str, Any]] = [] + any_change = False + for tool in tools: + if not isinstance(tool, dict): + sanitized.append(tool) + continue + fn = tool.get("function") + if not isinstance(fn, dict): + sanitized.append(tool) + continue + params = fn.get("parameters") + repaired = sanitize_moonshot_tool_parameters(params) + if repaired is not params: + any_change = True + new_fn = {**fn, "parameters": repaired} + sanitized.append({**tool, "function": new_fn}) + else: + sanitized.append(tool) + + return sanitized if any_change else tools + + +def is_moonshot_model(model: str | None) -> bool: + """True for any Kimi / Moonshot model slug, regardless of aggregator prefix. + + Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator- + prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``). + Detection by model name covers Nous / OpenRouter / other aggregators that + route to Moonshot's inference, where the base URL is the aggregator's, not + ``api.moonshot.ai``. + """ + if not model: + return False + bare = model.strip().lower() + # Last path segment (covers aggregator-prefixed slugs) + tail = bare.rsplit("/", 1)[-1] + if tail.startswith("kimi-") or tail == "kimi": + return True + # Vendor-prefixed forms commonly used on aggregators + if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"): + return True + return False diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 9c130ab84..6b73e83b3 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -1,154 +1,29 @@ -"""Shared slash command helpers for skills and built-in prompt-style modes. +"""Shared slash command helpers for skills. Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces -can invoke skills via /skill-name commands and prompt-only built-ins like -/plan. +can invoke skills via /skill-name commands. """ import json import logging import re -import subprocess -from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional from hermes_constants import display_hermes_home +from agent.skill_preprocessing import ( + expand_inline_shell as _expand_inline_shell, + load_skills_config as _load_skills_config, + substitute_template_vars as _substitute_template_vars, +) logger = logging.getLogger(__name__) _skill_commands: Dict[str, Dict[str, Any]] = {} -_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") # Patterns for sanitizing skill names into clean hyphen-separated slugs. _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]") _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}") -# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. -# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are -# left as-is so the user can debug them. -_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") - -# Matches inline shell snippets like: !`date +%Y-%m-%d` -# Non-greedy, single-line only — no newlines inside the backticks. -_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") - -# Cap inline-shell output so a runaway command can't blow out the context. -_INLINE_SHELL_MAX_OUTPUT = 4000 - - -def _load_skills_config() -> dict: - """Load the ``skills`` section of config.yaml (best-effort).""" - try: - from hermes_cli.config import load_config - - cfg = load_config() or {} - skills_cfg = cfg.get("skills") - if isinstance(skills_cfg, dict): - return skills_cfg - except Exception: - logger.debug("Could not read skills config", exc_info=True) - return {} - - -def _substitute_template_vars( - content: str, - skill_dir: Path | None, - session_id: str | None, -) -> str: - """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. - - Only substitutes tokens for which a concrete value is available — - unresolved tokens are left in place so the author can spot them. - """ - if not content: - return content - - skill_dir_str = str(skill_dir) if skill_dir else None - - def _replace(match: re.Match) -> str: - token = match.group(1) - if token == "HERMES_SKILL_DIR" and skill_dir_str: - return skill_dir_str - if token == "HERMES_SESSION_ID" and session_id: - return str(session_id) - return match.group(0) - - return _SKILL_TEMPLATE_RE.sub(_replace, content) - - -def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: - """Execute a single inline-shell snippet and return its stdout (trimmed). - - Failures return a short ``[inline-shell error: ...]`` marker instead of - raising, so one bad snippet can't wreck the whole skill message. - """ - try: - completed = subprocess.run( - ["bash", "-c", command], - cwd=str(cwd) if cwd else None, - capture_output=True, - text=True, - timeout=max(1, int(timeout)), - check=False, - ) - except subprocess.TimeoutExpired: - return f"[inline-shell timeout after {timeout}s: {command}]" - except FileNotFoundError: - return f"[inline-shell error: bash not found]" - except Exception as exc: - return f"[inline-shell error: {exc}]" - - output = (completed.stdout or "").rstrip("\n") - if not output and completed.stderr: - output = completed.stderr.rstrip("\n") - if len(output) > _INLINE_SHELL_MAX_OUTPUT: - output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]" - return output - - -def _expand_inline_shell( - content: str, - skill_dir: Path | None, - timeout: int, -) -> str: - """Replace every !`cmd` snippet in ``content`` with its stdout. - - Runs each snippet with the skill directory as CWD so relative paths in - the snippet work the way the author expects. - """ - if "!`" not in content: - return content - - def _replace(match: re.Match) -> str: - cmd = match.group(1).strip() - if not cmd: - return "" - return _run_inline_shell(cmd, skill_dir, timeout) - - return _INLINE_SHELL_RE.sub(_replace, content) - - -def build_plan_path( - user_instruction: str = "", - *, - now: datetime | None = None, -) -> Path: - """Return the default workspace-relative markdown path for a /plan invocation. - - Relative paths are intentional: file tools are task/backend-aware and resolve - them against the active working directory for local, docker, ssh, modal, - daytona, and similar terminal backends. That keeps the plan with the active - workspace instead of the Hermes host's global home directory. - """ - slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else "" - slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-") - if slug: - slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-") - slug = slug or "conversation-plan" - timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S") - return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md" - - def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None: """Load a skill by name/path and return (loaded_payload, skill_dir, display_name).""" raw_identifier = (skill_identifier or "").strip() @@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu else: normalized = raw_identifier.lstrip("/") - loaded_skill = json.loads(skill_view(normalized, task_id=task_id)) + loaded_skill = json.loads( + skill_view(normalized, task_id=task_id, preprocess=False) + ) except Exception: return None diff --git a/agent/skill_preprocessing.py b/agent/skill_preprocessing.py new file mode 100644 index 000000000..b95d1ddda --- /dev/null +++ b/agent/skill_preprocessing.py @@ -0,0 +1,131 @@ +"""Shared SKILL.md preprocessing helpers.""" + +import logging +import re +import subprocess +from pathlib import Path + +logger = logging.getLogger(__name__) + +# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. +# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are +# left as-is so the user can debug them. +_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") + +# Matches inline shell snippets like: !`date +%Y-%m-%d` +# Non-greedy, single-line only -- no newlines inside the backticks. +_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") + +# Cap inline-shell output so a runaway command can't blow out the context. +_INLINE_SHELL_MAX_OUTPUT = 4000 + + +def load_skills_config() -> dict: + """Load the ``skills`` section of config.yaml (best-effort).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + skills_cfg = cfg.get("skills") + if isinstance(skills_cfg, dict): + return skills_cfg + except Exception: + logger.debug("Could not read skills config", exc_info=True) + return {} + + +def substitute_template_vars( + content: str, + skill_dir: Path | None, + session_id: str | None, +) -> str: + """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. + + Only substitutes tokens for which a concrete value is available -- + unresolved tokens are left in place so the author can spot them. + """ + if not content: + return content + + skill_dir_str = str(skill_dir) if skill_dir else None + + def _replace(match: re.Match) -> str: + token = match.group(1) + if token == "HERMES_SKILL_DIR" and skill_dir_str: + return skill_dir_str + if token == "HERMES_SESSION_ID" and session_id: + return str(session_id) + return match.group(0) + + return _SKILL_TEMPLATE_RE.sub(_replace, content) + + +def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: + """Execute a single inline-shell snippet and return its stdout (trimmed). + + Failures return a short ``[inline-shell error: ...]`` marker instead of + raising, so one bad snippet can't wreck the whole skill message. + """ + try: + completed = subprocess.run( + ["bash", "-c", command], + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=max(1, int(timeout)), + check=False, + ) + except subprocess.TimeoutExpired: + return f"[inline-shell timeout after {timeout}s: {command}]" + except FileNotFoundError: + return "[inline-shell error: bash not found]" + except Exception as exc: + return f"[inline-shell error: {exc}]" + + output = (completed.stdout or "").rstrip("\n") + if not output and completed.stderr: + output = completed.stderr.rstrip("\n") + if len(output) > _INLINE_SHELL_MAX_OUTPUT: + output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]" + return output + + +def expand_inline_shell( + content: str, + skill_dir: Path | None, + timeout: int, +) -> str: + """Replace every !`cmd` snippet in ``content`` with its stdout. + + Runs each snippet with the skill directory as CWD so relative paths in + the snippet work the way the author expects. + """ + if "!`" not in content: + return content + + def _replace(match: re.Match) -> str: + cmd = match.group(1).strip() + if not cmd: + return "" + return run_inline_shell(cmd, skill_dir, timeout) + + return _INLINE_SHELL_RE.sub(_replace, content) + + +def preprocess_skill_content( + content: str, + skill_dir: Path | None, + session_id: str | None = None, + skills_cfg: dict | None = None, +) -> str: + """Apply configured SKILL.md template and inline-shell preprocessing.""" + if not content: + return content + + cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config() + if cfg.get("template_vars", True): + content = substitute_template_vars(content, skill_dir, session_id) + if cfg.get("inline_shell", False): + timeout = int(cfg.get("inline_shell_timeout", 10) or 10) + content = expand_inline_shell(content, skill_dir, timeout) + return content diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 900f59dcf..1cccf7e92 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly. import copy from typing import Any, Dict, List, Optional +from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools from agent.prompt_builder import DEVELOPER_ROLE_MODELS from agent.transports.base import ProviderTransport from agent.transports.types import NormalizedResponse, ToolCall, Usage @@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport): # Tools if tools: + # Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting + # tool parameters here keeps aggregator routes (Nous, OpenRouter, + # etc.) compatible, in addition to direct moonshot.ai endpoints. + if is_moonshot_model(model): + tools = sanitize_moonshot_tools(tools) api_kwargs["tools"] = tools # max_tokens resolution — priority: ephemeral > user > provider default diff --git a/batch_runner.py b/batch_runner.py index 7413ad59f..f3aaefa3d 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -951,13 +951,9 @@ class BatchRunner: root_logger.setLevel(original_level) # Aggregate all batch statistics and update checkpoint - all_completed_prompts = list(completed_prompts_set) total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0} - + for batch_result in results: - # Add newly completed prompts - all_completed_prompts.extend(batch_result.get("completed_prompts", [])) - # Aggregate tool stats for tool_name, stats in batch_result.get("tool_stats", {}).items(): if tool_name not in total_tool_stats: @@ -977,7 +973,7 @@ class BatchRunner: # Save final checkpoint (best-effort; incremental writes already happened) try: - checkpoint_data["completed_prompts"] = all_completed_prompts + checkpoint_data["completed_prompts"] = sorted(completed_prompts_set) self._save_checkpoint(checkpoint_data, lock=checkpoint_lock) except Exception as ckpt_err: print(f"⚠️ Warning: Failed to save final checkpoint: {ckpt_err}") diff --git a/cli-config.yaml.example b/cli-config.yaml.example index bd63901e1..977acbe92 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -326,6 +326,16 @@ compression: # To pin a specific model/provider for compression summaries, use the # auxiliary section below (auxiliary.compression.provider / model). +# ============================================================================= +# Anthropic prompt caching TTL +# ============================================================================= +# When prompt caching is active (Claude via OpenRouter or native Anthropic), +# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and +# "1h". Other values are ignored and "5m" is used. +# +prompt_caching: + cache_ttl: "5m" # use "1h" for long sessions with pauses between turns + # ============================================================================= # Auxiliary Models (Advanced — Experimental) # ============================================================================= diff --git a/cli.py b/cli.py index a289e3ab2..abd4d2391 100644 --- a/cli.py +++ b/cli.py @@ -1688,7 +1688,6 @@ def _looks_like_slash_command(text: str) -> bool: from agent.skill_commands import ( scan_skill_commands, build_skill_invocation_message, - build_plan_path, build_preloaded_skills_prompt, ) @@ -3084,6 +3083,8 @@ class HermesCLI: format_runtime_provider_error, ) + _primary_exc = None + runtime = None try: runtime = resolve_runtime_provider( requested=self.requested_provider, @@ -3091,7 +3092,34 @@ class HermesCLI: explicit_base_url=self._explicit_base_url, ) except Exception as exc: - message = format_runtime_provider_error(exc) + _primary_exc = exc + + # Primary provider auth failed — try fallback providers before giving up. + if runtime is None and _primary_exc is not None: + from hermes_cli.auth import AuthError + if isinstance(_primary_exc, AuthError): + _fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else [] + for _fb in _fb_chain: + _fb_provider = (_fb.get("provider") or "").strip().lower() + _fb_model = (_fb.get("model") or "").strip() + if not _fb_provider or not _fb_model: + continue + try: + runtime = resolve_runtime_provider(requested=_fb_provider) + logger.warning( + "Primary provider auth failed (%s). Falling through to fallback: %s/%s", + _primary_exc, _fb_provider, _fb_model, + ) + _cprint(f"⚠️ Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}") + self.requested_provider = _fb_provider + self.model = _fb_model + _primary_exc = None + break + except Exception: + continue + + if runtime is None: + message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed." ChatConsole().print(f"[bold red]{message}[/]") return False @@ -3254,6 +3282,23 @@ class HermesCLI: _cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}") _cprint(f"{_DIM}Use a session ID from a previous CLI run (hermes sessions list).{_RST}") return False + # If the requested session is the (empty) head of a compression + # chain, walk to the descendant that actually holds the messages. + # See #15000 and SessionDB.resolve_resume_session_id. + try: + resolved_id = self._session_db.resolve_resume_session_id(self.session_id) + except Exception: + resolved_id = self.session_id + if resolved_id and resolved_id != self.session_id: + ChatConsole().print( + f"[{_DIM}]Session {_escape(self.session_id)} was compressed into " + f"{_escape(resolved_id)}; resuming the descendant with your " + f"transcript.[/]" + ) + self.session_id = resolved_id + resolved_meta = self._session_db.get_session(self.session_id) + if resolved_meta: + session_meta = resolved_meta restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: restored = [m for m in restored if m.get("role") != "session_meta"] @@ -3472,6 +3517,22 @@ class HermesCLI: ) return False + # If the requested session is the (empty) head of a compression chain, + # walk to the descendant that actually holds the messages. See #15000. + try: + resolved_id = self._session_db.resolve_resume_session_id(self.session_id) + except Exception: + resolved_id = self.session_id + if resolved_id and resolved_id != self.session_id: + self._console_print( + f"[dim]Session {self.session_id} was compressed into " + f"{resolved_id}; resuming the descendant with your transcript.[/]" + ) + self.session_id = resolved_id + resolved_meta = self._session_db.get_session(self.session_id) + if resolved_meta: + session_meta = resolved_meta + restored = self._session_db.get_messages_as_conversation(self.session_id) if restored: restored = [m for m in restored if m.get("role") != "session_meta"] @@ -4686,6 +4747,22 @@ class HermesCLI: _cprint(" Use /history or `hermes sessions list` to see available sessions.") return + # If the target is the empty head of a compression chain, redirect to + # the descendant that actually holds the transcript. See #15000. + try: + resolved_id = self._session_db.resolve_resume_session_id(target_id) + except Exception: + resolved_id = target_id + if resolved_id and resolved_id != target_id: + _cprint( + f" Session {target_id} was compressed into {resolved_id}; " + f"resuming the descendant with your transcript." + ) + target_id = resolved_id + resolved_meta = self._session_db.get_session(target_id) + if resolved_meta: + session_meta = resolved_meta + if target_id == self.session_id: _cprint(" Already on that session.") return @@ -5297,29 +5374,26 @@ class HermesCLI: _cprint(f" ✓ Model switched: {result.new_model}") _cprint(f" Provider: {provider_label}") - # Rich metadata from models.dev + # Context: always resolve via the provider-aware chain so Codex OAuth, + # Copilot, and Nous-enforced caps win over the raw models.dev entry + # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth). mi = result.model_info + from hermes_cli.model_switch import resolve_display_context_length + ctx = resolve_display_context_length( + result.new_model, + result.target_provider, + base_url=result.base_url or self.base_url or "", + api_key=result.api_key or self.api_key or "", + model_info=mi, + ) + if ctx: + _cprint(f" Context: {ctx:,} tokens") if mi: - if mi.context_window: - _cprint(f" Context: {mi.context_window:,} tokens") if mi.max_output: _cprint(f" Max output: {mi.max_output:,} tokens") if mi.has_cost_data(): _cprint(f" Cost: {mi.format_cost()}") _cprint(f" Capabilities: {mi.format_capabilities()}") - else: - # Fallback to old context length lookup - try: - from agent.model_metadata import get_model_context_length - ctx = get_model_context_length( - result.new_model, - base_url=result.base_url or self.base_url, - api_key=result.api_key or self.api_key, - provider=result.target_provider, - ) - _cprint(f" Context: {ctx:,} tokens") - except Exception: - pass # Cache notice cache_enabled = ( @@ -5378,79 +5452,6 @@ class HermesCLI: except Exception: return False - def _show_model_and_providers(self): - """Show current model + provider and list all authenticated providers. - - Shows current model + provider, then lists all authenticated - providers with their available models. - """ - from hermes_cli.models import ( - curated_models_for_provider, list_available_providers, - normalize_provider, _PROVIDER_LABELS, - get_pricing_for_provider, format_model_pricing_table, - ) - from hermes_cli.auth import resolve_provider as _resolve_provider - - # Resolve current provider - raw_provider = normalize_provider(self.provider) - if raw_provider == "auto": - try: - current = _resolve_provider( - self.requested_provider, - explicit_api_key=self._explicit_api_key, - explicit_base_url=self._explicit_base_url, - ) - except Exception: - current = "openrouter" - else: - current = raw_provider - current_label = _PROVIDER_LABELS.get(current, current) - - print(f"\n Current: {self.model} via {current_label}") - print() - - # Show all authenticated providers with their models - providers = list_available_providers() - authed = [p for p in providers if p["authenticated"]] - unauthed = [p for p in providers if not p["authenticated"]] - - if authed: - print(" Authenticated providers & models:") - for p in authed: - is_active = p["id"] == current - marker = " ← active" if is_active else "" - print(f" [{p['id']}]{marker}") - curated = curated_models_for_provider(p["id"]) - # Fetch pricing for providers that support it (openrouter, nous) - pricing_map = get_pricing_for_provider(p["id"]) if p["id"] in ("openrouter", "nous") else {} - if curated and pricing_map: - cur_model = self.model if is_active else "" - for line in format_model_pricing_table(curated, pricing_map, current_model=cur_model): - print(line) - elif curated: - for mid, desc in curated: - current_marker = " ← current" if (is_active and mid == self.model) else "" - print(f" {mid}{current_marker}") - elif p["id"] == "custom": - from hermes_cli.models import _get_custom_base_url - custom_url = _get_custom_base_url() - if custom_url: - print(f" endpoint: {custom_url}") - if is_active: - print(f" model: {self.model} ← current") - print(" (use hermes model to change)") - else: - print(" (use hermes model to change)") - print() - - if unauthed: - names = ", ".join(p["label"] for p in unauthed) - print(f" Not configured: {names}") - print(" Run: hermes setup") - print() - - print(" To change model or provider, use: hermes model") - def _output_console(self): """Use prompt_toolkit-safe Rich rendering once the TUI is live.""" if getattr(self, "_app", None): @@ -6026,16 +6027,12 @@ class HermesCLI: self._handle_resume_command(cmd_original) elif canonical == "model": self._handle_model_switch(cmd_original) - elif canonical == "provider": - self._show_model_and_providers() elif canonical == "gquota": self._handle_gquota_command(cmd_original) elif canonical == "personality": # Use original case (handler lowercases the personality name itself) self._handle_personality_command(cmd_original) - elif canonical == "plan": - self._handle_plan_command(cmd_original) elif canonical == "retry": retry_msg = self.retry_last() if retry_msg and hasattr(self, '_pending_input'): @@ -6165,6 +6162,8 @@ class HermesCLI: self._handle_skin_command(cmd_original) elif canonical == "voice": self._handle_voice_command(cmd_original) + elif canonical == "busy": + self._handle_busy_command(cmd_original) else: # Check for user-defined quick commands (bypass agent loop, no LLM call) base_cmd = cmd_lower.split()[0] @@ -6270,32 +6269,6 @@ class HermesCLI: return True - def _handle_plan_command(self, cmd: str): - """Handle /plan [request] — load the bundled plan skill.""" - parts = cmd.strip().split(maxsplit=1) - user_instruction = parts[1].strip() if len(parts) > 1 else "" - - plan_path = build_plan_path(user_instruction) - msg = build_skill_invocation_message( - "/plan", - user_instruction, - task_id=self.session_id, - runtime_note=( - "Save the markdown plan with write_file to this exact relative path " - f"inside the active workspace/backend cwd: {plan_path}" - ), - ) - - if not msg: - ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]") - return - - _cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}") - if hasattr(self, '_pending_input'): - self._pending_input.put(msg) - else: - ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]") - def _handle_background_command(self, cmd: str): """Handle /background — run a prompt in a separate background session. @@ -6685,6 +6658,13 @@ class HermesCLI: print(f" ⚠ Port {_port} is not reachable at {cdp_url}") os.environ["BROWSER_CDP_URL"] = cdp_url + # Eagerly start the CDP supervisor so pending_dialogs + frame_tree + # show up in the next browser_snapshot. No-op if already started. + try: + from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found] + _ensure_cdp_supervisor("default") + except Exception: + pass print() print("🌐 Browser connected to live Chrome via CDP") print(f" Endpoint: {cdp_url}") @@ -6706,7 +6686,8 @@ class HermesCLI: if current: os.environ.pop("BROWSER_CDP_URL", None) try: - from tools.browser_tool import cleanup_all_browsers + from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor + _stop_cdp_supervisor("default") cleanup_all_browsers() except Exception: pass @@ -6919,6 +6900,36 @@ class HermesCLI: else: _cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}") + def _handle_busy_command(self, cmd: str): + """Handle /busy — control what Enter does while Hermes is working. + + Usage: + /busy Show current busy input mode + /busy status Show current busy input mode + /busy queue Queue input for the next turn instead of interrupting + /busy interrupt Interrupt the current run on Enter (default) + """ + parts = cmd.strip().split(maxsplit=1) + if len(parts) < 2 or parts[1].strip().lower() == "status": + _cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}") + _cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}") + _cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}") + return + + arg = parts[1].strip().lower() + if arg not in {"queue", "interrupt"}: + _cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}") + _cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}") + return + + self.busy_input_mode = arg + if save_config_value("display.busy_input_mode", arg): + behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy." + _cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}") + _cprint(f" {_DIM}{behavior}{_RST}") + else: + _cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}") + def _handle_fast_command(self, cmd: str): """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode).""" if not self._fast_command_available(): @@ -6997,51 +7008,52 @@ class HermesCLI: focus_topic = parts[1].strip() original_count = len(self.conversation_history) - try: - from agent.model_metadata import estimate_messages_tokens_rough - from agent.manual_compression_feedback import summarize_manual_compression - original_history = list(self.conversation_history) - approx_tokens = estimate_messages_tokens_rough(original_history) - if focus_topic: - print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " - f"focus: \"{focus_topic}\"...") - else: - print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") + with self._busy_command("Compressing context..."): + try: + from agent.model_metadata import estimate_messages_tokens_rough + from agent.manual_compression_feedback import summarize_manual_compression + original_history = list(self.conversation_history) + approx_tokens = estimate_messages_tokens_rough(original_history) + if focus_topic: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), " + f"focus: \"{focus_topic}\"...") + else: + print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...") - compressed, _ = self.agent._compress_context( - original_history, - self.agent._cached_system_prompt or "", - approx_tokens=approx_tokens, - focus_topic=focus_topic or None, - ) - self.conversation_history = compressed - # _compress_context ends the old session and creates a new child - # session on the agent (run_agent.py::_compress_context). Sync the - # CLI's session_id so /status, /resume, exit summary, and title - # generation all point at the live continuation session, not the - # ended parent. Without this, subsequent end_session() calls target - # the already-closed parent and the child is orphaned. - if ( - getattr(self.agent, "session_id", None) - and self.agent.session_id != self.session_id - ): - self.session_id = self.agent.session_id - self._pending_title = None - new_tokens = estimate_messages_tokens_rough(self.conversation_history) - summary = summarize_manual_compression( - original_history, - self.conversation_history, - approx_tokens, - new_tokens, - ) - icon = "🗜️" if summary["noop"] else "✅" - print(f" {icon} {summary['headline']}") - print(f" {summary['token_line']}") - if summary["note"]: - print(f" {summary['note']}") + compressed, _ = self.agent._compress_context( + original_history, + self.agent._cached_system_prompt or "", + approx_tokens=approx_tokens, + focus_topic=focus_topic or None, + ) + self.conversation_history = compressed + # _compress_context ends the old session and creates a new child + # session on the agent (run_agent.py::_compress_context). Sync the + # CLI's session_id so /status, /resume, exit summary, and title + # generation all point at the live continuation session, not the + # ended parent. Without this, subsequent end_session() calls target + # the already-closed parent and the child is orphaned. + if ( + getattr(self.agent, "session_id", None) + and self.agent.session_id != self.session_id + ): + self.session_id = self.agent.session_id + self._pending_title = None + new_tokens = estimate_messages_tokens_rough(self.conversation_history) + summary = summarize_manual_compression( + original_history, + self.conversation_history, + approx_tokens, + new_tokens, + ) + icon = "🗜️" if summary["noop"] else "✅" + print(f" {icon} {summary['headline']}") + print(f" {summary['token_line']}") + if summary["note"]: + print(f" {summary['note']}") - except Exception as e: - print(f" ❌ Compression failed: {e}") + except Exception as e: + print(f" ❌ Compression failed: {e}") def _handle_debug_command(self): """Handle /debug — upload debug report + logs and print paste URLs.""" @@ -9543,9 +9555,20 @@ class HermesCLI: @kb.add('c-d') def handle_ctrl_d(event): - """Handle Ctrl+D - exit.""" - self._should_exit = True - event.app.exit() + """Ctrl+D: delete char under cursor (standard readline behaviour). + Only exit when the input is empty — same as bash/zsh. Pending + attached images count as input and block the EOF-exit so the + user doesn't lose them silently. + """ + buf = event.app.current_buffer + if buf.text: + buf.delete() + elif self._attached_images: + # Empty text but pending attachments — no-op, don't exit. + return + else: + self._should_exit = True + event.app.exit() _modal_prompt_active = Condition( lambda: bool(self._secret_state or self._sudo_state) diff --git a/cron/jobs.py b/cron/jobs.py index 4d34b1534..158f53654 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -371,6 +371,39 @@ def save_jobs(jobs: List[Dict[str, Any]]): raise +def _normalize_workdir(workdir: Optional[str]) -> Optional[str]: + """Normalize and validate a cron job workdir. + + Rules: + - Empty / None → None (feature off, preserves old behaviour). + - ``~`` is expanded. Relative paths are rejected — cron jobs run detached + from any shell cwd, so relative paths have no stable meaning. + - The path must exist and be a directory at create/update time. We do + NOT re-check at run time (a user might briefly unmount the dir; the + scheduler will just fall back to old behaviour with a logged warning). + + Returns the absolute path string, or None when disabled. + Raises ValueError on invalid input. + """ + if workdir is None: + return None + raw = str(workdir).strip() + if not raw: + return None + expanded = Path(raw).expanduser() + if not expanded.is_absolute(): + raise ValueError( + f"Cron workdir must be an absolute path (got {raw!r}). " + f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous." + ) + resolved = expanded.resolve() + if not resolved.exists(): + raise ValueError(f"Cron workdir does not exist: {resolved}") + if not resolved.is_dir(): + raise ValueError(f"Cron workdir is not a directory: {resolved}") + return str(resolved) + + def create_job( prompt: str, schedule: str, @@ -385,6 +418,7 @@ def create_job( base_url: Optional[str] = None, script: Optional[str] = None, enabled_toolsets: Optional[List[str]] = None, + workdir: Optional[str] = None, ) -> Dict[str, Any]: """ Create a new cron job. @@ -407,6 +441,12 @@ def create_job( enabled_toolsets: Optional list of toolset names to restrict the agent to. When set, only tools from these toolsets are loaded, reducing token overhead. When omitted, all default tools are loaded. + workdir: Optional absolute path. When set, the job runs as if launched + from that directory: AGENTS.md / CLAUDE.md / .cursorrules from + that directory are injected into the system prompt, and the + terminal/file/code_exec tools use it as their working directory + (via TERMINAL_CWD). When unset, the old behaviour is preserved + (no context files injected, tools use the scheduler's cwd). Returns: The created job dict @@ -439,6 +479,7 @@ def create_job( normalized_script = normalized_script or None normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None normalized_toolsets = normalized_toolsets or None + normalized_workdir = _normalize_workdir(workdir) label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job" job = { @@ -471,6 +512,7 @@ def create_job( "deliver": deliver, "origin": origin, # Tracks where job was created for "origin" delivery "enabled_toolsets": normalized_toolsets, + "workdir": normalized_workdir, } jobs = load_jobs() @@ -504,6 +546,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] if job["id"] != job_id: continue + # Validate / normalize workdir if present in updates. Empty string or + # None both mean "clear the field" (restore old behaviour). + if "workdir" in updates: + _wd = updates["workdir"] + if _wd in (None, "", False): + updates["workdir"] = None + else: + updates["workdir"] = _normalize_workdir(_wd) + updated = _apply_skill_fields({**job, **updates}) schedule_changed = "schedule" in updates diff --git a/cron/scheduler.py b/cron/scheduler.py index 979770374..3dbb54c7d 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now logger = logging.getLogger(__name__) + +def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None: + """Resolve the toolset list for a cron job. + + Precedence: + 1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update). + Keeps the agent's job-scoped toolset override intact — #6130. + 2. Per-platform ``hermes tools`` config for the ``cron`` platform. + Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``) + so users can gate cron toolsets globally without recreating every job. + 3. ``None`` on any lookup failure — AIAgent loads the full default set + (legacy behavior before this change, preserved as the safety net). + + _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by + ``_get_platform_tools`` for unconfigured platforms, so fresh installs + get cron WITHOUT ``moa`` by default (issue reported by Norbert — + surprise $4.63 run). + """ + per_job = job.get("enabled_toolsets") + if per_job: + return per_job + try: + from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load + return sorted(_get_platform_tools(cfg or {}, "cron")) + except Exception as exc: + logger.warning( + "Cron toolset resolution failed, falling back to full default toolset: %s", + exc, + ) + return None + # Valid delivery platforms — used to validate user-supplied platform names # in cron delivery targets, preventing env var enumeration via crafted names. _KNOWN_DELIVERY_PLATFORMS = frozenset({ @@ -764,6 +795,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: chat_name=origin.get("chat_name", "") if origin else "", ) + # Per-job working directory. When set (and validated at create/update + # time), we point TERMINAL_CWD at it so: + # - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md / + # .cursorrules from the job's project dir, AND + # - the terminal, file, and code-exec tools run commands from there. + # + # tick() serializes workdir-jobs outside the parallel pool, so mutating + # os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less + # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour + # (skip_context_files=True, tools use whatever cwd the scheduler has). + _job_workdir = (job.get("workdir") or "").strip() or None + if _job_workdir and not Path(_job_workdir).is_dir(): + # Directory was removed between create-time validation and now. Log + # and drop back to old behaviour rather than crashing the job. + logger.warning( + "Job '%s': configured workdir %r no longer exists — running without it", + job_id, _job_workdir, + ) + _job_workdir = None + _prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_") + if _job_workdir: + os.environ["TERMINAL_CWD"] = _job_workdir + logger.info("Job '%s': using workdir %s", job_id, _job_workdir) + try: # Re-read .env and config.yaml fresh every run so provider/key # changes take effect without a gateway restart. @@ -840,6 +895,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: resolve_runtime_provider, format_runtime_provider_error, ) + from hermes_cli.auth import AuthError try: runtime_kwargs = { "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"), @@ -847,6 +903,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: if job.get("base_url"): runtime_kwargs["explicit_base_url"] = job.get("base_url") runtime = resolve_runtime_provider(**runtime_kwargs) + except AuthError as auth_exc: + # Primary provider auth failed — try fallback chain before giving up. + logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc) + fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model") + fb_list = (fb if isinstance(fb, list) else [fb]) if fb else [] + runtime = None + for entry in fb_list: + if not isinstance(entry, dict): + continue + try: + fb_kwargs = {"requested": entry.get("provider")} + if entry.get("base_url"): + fb_kwargs["explicit_base_url"] = entry["base_url"] + if entry.get("api_key"): + fb_kwargs["explicit_api_key"] = entry["api_key"] + runtime = resolve_runtime_provider(**fb_kwargs) + logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider")) + break + except Exception as fb_exc: + logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc) + if runtime is None: + raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc except Exception as exc: message = format_runtime_provider_error(exc) raise RuntimeError(message) from exc @@ -886,10 +964,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), provider_sort=pr.get("sort"), - enabled_toolsets=job.get("enabled_toolsets") or None, + enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg), disabled_toolsets=["cronjob", "messaging", "clarify"], quiet_mode=True, - skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd + # When a workdir is configured, inject AGENTS.md / CLAUDE.md / + # .cursorrules from that directory; otherwise preserve the old + # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd). + skip_context_files=not bool(_job_workdir), skip_memory=True, # Cron system prompts would corrupt user representations platform="cron", session_id=_cron_session_id, @@ -1028,6 +1109,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: return False, output, "", error_msg finally: + # Restore TERMINAL_CWD to whatever it was before this job ran. We + # only ever mutate it when the job has a workdir; see the setup block + # at the top of run_job for the serialization guarantee. + if _job_workdir: + if _prior_terminal_cwd == "_UNSET_": + os.environ.pop("TERMINAL_CWD", None) + else: + os.environ["TERMINAL_CWD"] = _prior_terminal_cwd # Clean up ContextVar session/delivery state for this job. clear_session_vars(_ctx_tokens) if _session_db: @@ -1155,14 +1244,28 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: mark_job_run(job["id"], False, str(e)) return False - # Run all due jobs concurrently, each in its own ContextVar copy - # so session/delivery state stays isolated per-thread. - with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool: - _futures = [] - for job in due_jobs: - _ctx = contextvars.copy_context() - _futures.append(_tick_pool.submit(_ctx.run, _process_job, job)) - _results = [f.result() for f in _futures] + # Partition due jobs: those with a per-job workdir mutate + # os.environ["TERMINAL_CWD"] inside run_job, which is process-global — + # so they MUST run sequentially to avoid corrupting each other. Jobs + # without a workdir leave env untouched and stay parallel-safe. + workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()] + parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()] + + _results: list = [] + + # Sequential pass for workdir jobs. + for job in workdir_jobs: + _ctx = contextvars.copy_context() + _results.append(_ctx.run(_process_job, job)) + + # Parallel pass for the rest — same behaviour as before. + if parallel_jobs: + with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool: + _futures = [] + for job in parallel_jobs: + _ctx = contextvars.copy_context() + _futures.append(_tick_pool.submit(_ctx.run, _process_job, job)) + _results.extend(f.result() for f in _futures) return sum(_results) finally: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..a0fe1a100 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,52 @@ +# +# docker-compose.yml for Hermes Agent +# +# Usage: +# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d +# +# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so +# files created inside the container stay readable/writable on the host. +# The entrypoint remaps the internal `hermes` user to these values via +# usermod/groupmod + gosu. +# +# Security notes: +# - The dashboard service binds to 127.0.0.1 by default. It stores API +# keys; exposing it on LAN without auth is unsafe. If you want remote +# access, use an SSH tunnel or put it behind a reverse proxy that +# adds authentication — do NOT pass --insecure --host 0.0.0.0. +# - The gateway's API server is off unless you uncomment API_SERVER_KEY +# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing +# this on an internet-facing host. +# +services: + gateway: + build: . + image: hermes-agent + container_name: hermes + restart: unless-stopped + network_mode: host + volumes: + - ~/.hermes:/opt/data + environment: + - HERMES_UID=${HERMES_UID:-10000} + - HERMES_GID=${HERMES_GID:-10000} + # To expose the OpenAI-compatible API server beyond localhost, + # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth): + # - API_SERVER_HOST=0.0.0.0 + # - API_SERVER_KEY=${API_SERVER_KEY} + command: ["gateway", "run"] + + dashboard: + image: hermes-agent + container_name: hermes-dashboard + restart: unless-stopped + network_mode: host + depends_on: + - gateway + volumes: + - ~/.hermes:/opt/data + environment: + - HERMES_UID=${HERMES_UID:-10000} + - HERMES_GID=${HERMES_GID:-10000} + # Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`. + command: ["dashboard", "--host", "127.0.0.1", "--no-open"] diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 67d193f13..0be1d656c 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true fi + # Fix ownership of the data volume. When HERMES_UID remaps the hermes user, + # files created by previous runs (under the old UID) become inaccessible. + # Always chown -R when UID was remapped; otherwise only if top-level is wrong. actual_hermes_uid=$(id -u hermes) - if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then - echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing" + needs_chown=false + if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then + needs_chown=true + elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then + needs_chown=true + fi + if [ "$needs_chown" = true ]; then + echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)" # In rootless Podman the container's "root" is mapped to an unprivileged # host UID — chown will fail. That's fine: the volume is already owned # by the mapped user on the host side. diff --git a/gateway/config.py b/gateway/config.py index 67ebf7346..509737279 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -135,7 +135,7 @@ class SessionResetPolicy: mode=mode if mode is not None else "both", at_hour=at_hour if at_hour is not None else 4, idle_minutes=idle_minutes if idle_minutes is not None else 1440, - notify=notify if notify is not None else True, + notify=_coerce_bool(notify, True), notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"), ) @@ -178,7 +178,7 @@ class PlatformConfig: home_channel = HomeChannel.from_dict(data["home_channel"]) return cls( - enabled=data.get("enabled", False), + enabled=_coerce_bool(data.get("enabled"), False), token=data.get("token"), api_key=data.get("api_key"), home_channel=home_channel, @@ -435,7 +435,7 @@ class GatewayConfig: reset_triggers=data.get("reset_triggers", ["/new", "/reset"]), quick_commands=quick_commands, sessions_dir=sessions_dir, - always_log_local=data.get("always_log_local", True), + always_log_local=_coerce_bool(data.get("always_log_local"), True), stt_enabled=_coerce_bool(stt_enabled, True), group_sessions_per_user=_coerce_bool(group_sessions_per_user, True), thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False), @@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig: os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower() if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"): os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip() + if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"): + gac = telegram_cfg["group_allowed_chats"] + if isinstance(gac, list): + gac = ",".join(str(v) for v in gac) + os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac) if "disable_link_previews" in telegram_cfg: plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {}) if not isinstance(plat_data, dict): diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index a6b52ff32..db3304a09 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter): If the client disconnects mid-stream, ``agent.interrupt()`` is called so the agent stops issuing upstream LLM calls, then the - asyncio task is cancelled. When ``store=True`` the full response - is persisted to the ResponseStore in a ``finally`` block so GET - /v1/responses/{id} and ``previous_response_id`` chaining work the - same as the batch path. + asyncio task is cancelled. When ``store=True`` an initial + ``in_progress`` snapshot is persisted immediately after + ``response.created`` and disconnects update it to an + ``incomplete`` snapshot so GET /v1/responses/{id} and + ``previous_response_id`` chaining still have something to + recover from. """ import queue as _q @@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter): final_response_text = "" agent_error: Optional[str] = None usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} + terminal_snapshot_persisted = False + + def _persist_response_snapshot( + response_env: Dict[str, Any], + *, + conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None, + ) -> None: + if not store: + return + if conversation_history_snapshot is None: + conversation_history_snapshot = list(conversation_history) + conversation_history_snapshot.append({"role": "user", "content": user_message}) + self._response_store.put(response_id, { + "response": response_env, + "conversation_history": conversation_history_snapshot, + "instructions": instructions, + "session_id": session_id, + }) + if conversation: + self._response_store.set_conversation(conversation, response_id) + + def _persist_incomplete_if_needed() -> None: + """Persist an ``incomplete`` snapshot if no terminal one was written. + + Called from both the client-disconnect (``ConnectionResetError``) + and server-cancellation (``asyncio.CancelledError``) paths so + GET /v1/responses/{id} and ``previous_response_id`` chaining keep + working after abrupt stream termination. + """ + if not store or terminal_snapshot_persisted: + return + incomplete_text = "".join(final_text_parts) or final_response_text + incomplete_items: List[Dict[str, Any]] = list(emitted_items) + if incomplete_text: + incomplete_items.append({ + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": incomplete_text}], + }) + incomplete_env = _envelope("incomplete") + incomplete_env["output"] = incomplete_items + incomplete_env["usage"] = { + "input_tokens": usage.get("input_tokens", 0), + "output_tokens": usage.get("output_tokens", 0), + "total_tokens": usage.get("total_tokens", 0), + } + incomplete_history = list(conversation_history) + incomplete_history.append({"role": "user", "content": user_message}) + if incomplete_text: + incomplete_history.append({"role": "assistant", "content": incomplete_text}) + _persist_response_snapshot( + incomplete_env, + conversation_history_snapshot=incomplete_history, + ) try: # response.created — initial envelope, status=in_progress @@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter): "type": "response.created", "response": created_env, }) + _persist_response_snapshot(created_env) last_activity = time.monotonic() async def _open_message_item() -> None: @@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter): "output_tokens": usage.get("output_tokens", 0), "total_tokens": usage.get("total_tokens", 0), } + _failed_history = list(conversation_history) + _failed_history.append({"role": "user", "content": user_message}) + if final_response_text or agent_error: + _failed_history.append({ + "role": "assistant", + "content": final_response_text or agent_error, + }) + _persist_response_snapshot( + failed_env, + conversation_history_snapshot=_failed_history, + ) + terminal_snapshot_persisted = True await _write_event("response.failed", { "type": "response.failed", "response": failed_env, @@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter): "output_tokens": usage.get("output_tokens", 0), "total_tokens": usage.get("total_tokens", 0), } + full_history = list(conversation_history) + full_history.append({"role": "user", "content": user_message}) + if isinstance(result, dict) and result.get("messages"): + full_history.extend(result["messages"]) + else: + full_history.append({"role": "assistant", "content": final_response_text}) + _persist_response_snapshot( + completed_env, + conversation_history_snapshot=full_history, + ) + terminal_snapshot_persisted = True await _write_event("response.completed", { "type": "response.completed", "response": completed_env, }) - # Persist for future chaining / GET retrieval, mirroring - # the batch path behavior. - if store: - full_history = list(conversation_history) - full_history.append({"role": "user", "content": user_message}) - if isinstance(result, dict) and result.get("messages"): - full_history.extend(result["messages"]) - else: - full_history.append({"role": "assistant", "content": final_response_text}) - self._response_store.put(response_id, { - "response": completed_env, - "conversation_history": full_history, - "instructions": instructions, - "session_id": session_id, - }) - if conversation: - self._response_store.set_conversation(conversation, response_id) - except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError): + _persist_incomplete_if_needed() # Client disconnected — interrupt the agent so it stops # making upstream LLM calls, then cancel the task. agent = agent_ref[0] if agent_ref else None @@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter): except (asyncio.CancelledError, Exception): pass logger.info("SSE client disconnected; interrupted agent task %s", response_id) + except asyncio.CancelledError: + # Server-side cancellation (e.g. shutdown, request timeout) — + # persist an incomplete snapshot so GET /v1/responses/{id} and + # previous_response_id chaining still work, then re-raise so the + # runtime's cancellation semantics are respected. + _persist_incomplete_if_needed() + agent = agent_ref[0] if agent_ref else None + if agent is not None: + try: + agent.interrupt("SSE task cancelled") + except Exception: + pass + if not agent_task.done(): + agent_task.cancel() + logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id) + raise return response diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index db7603498..fd325fde4 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None: return None -def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: +def _split_host_port(value: str) -> tuple[str, int | None]: + raw = str(value or "").strip() + if not raw: + return "", None + if "://" in raw: + parsed = urlsplit(raw) + return (parsed.hostname or "").lower().rstrip("."), parsed.port + if raw.startswith("[") and "]" in raw: + host, _, rest = raw[1:].partition("]") + port = None + if rest.startswith(":") and rest[1:].isdigit(): + port = int(rest[1:]) + return host.lower().rstrip("."), port + if raw.count(":") == 1: + host, _, maybe_port = raw.rpartition(":") + if maybe_port.isdigit(): + return host.lower().rstrip("."), int(maybe_port) + return raw.lower().strip("[]").rstrip("."), None + + +def _no_proxy_entries() -> list[str]: + entries: list[str] = [] + for key in ("NO_PROXY", "no_proxy"): + raw = os.environ.get(key, "") + entries.extend(part.strip() for part in raw.split(",") if part.strip()) + return entries + + +def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool: + token = str(entry or "").strip().lower() + if not token: + return False + if token == "*": + return True + + token_host, token_port = _split_host_port(token) + if token_port is not None and port is not None and token_port != port: + return False + if token_port is not None and port is None: + return False + if not token_host: + return False + + try: + network = ipaddress.ip_network(token_host, strict=False) + try: + return ipaddress.ip_address(host) in network + except ValueError: + return False + except ValueError: + pass + + try: + token_ip = ipaddress.ip_address(token_host) + try: + return ipaddress.ip_address(host) == token_ip + except ValueError: + return False + except ValueError: + pass + + if token_host.startswith("*."): + suffix = token_host[1:] + return host.endswith(suffix) + if token_host.startswith("."): + return host == token_host[1:] or host.endswith(token_host) + return host == token_host or host.endswith(f".{token_host}") + + +def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool: + """Return True when NO_PROXY/no_proxy matches at least one target host. + + Supports exact hosts, domain suffixes, wildcard suffixes, IP literals, + CIDR ranges, optional host:port entries, and ``*``. + """ + entries = _no_proxy_entries() + if not entries or not target_hosts: + return False + if isinstance(target_hosts, str): + candidates = [target_hosts] + else: + candidates = list(target_hosts) + for candidate in candidates: + host, port = _split_host_port(str(candidate)) + if not host: + continue + if any(_no_proxy_entry_matches(entry, host, port) for entry in entries): + return True + return False + + +def resolve_proxy_url( + platform_env_var: str | None = None, + *, + target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None, +) -> str | None: """Return a proxy URL from env vars, or macOS system proxy. Check order: @@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: 1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants) 2. macOS system proxy via ``scutil --proxy`` (auto-detect) - Returns *None* if no proxy is found. + Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one + of ``target_hosts``. """ if platform_env_var: value = (os.environ.get(platform_env_var) or "").strip() if value: + if should_bypass_proxy(target_hosts): + return None return normalize_proxy_url(value) for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = (os.environ.get(key) or "").strip() if value: + if should_bypass_proxy(target_hosts): + return None return normalize_proxy_url(value) - return normalize_proxy_url(_detect_macos_system_proxy()) + detected = normalize_proxy_url(_detect_macos_system_proxy()) + if detected and should_bypass_proxy(target_hosts): + return None + return detected def proxy_kwargs_for_bot(proxy_url: str | None) -> dict: diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py index 39d4e537e..afcbf1a7e 100644 --- a/gateway/platforms/bluebubbles.py +++ b/gateway/platforms/bluebubbles.py @@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str: class BlueBubblesAdapter(BasePlatformAdapter): platform = Platform.BLUEBUBBLES + SUPPORTS_MESSAGE_EDITING = False MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH def __init__(self, config: PlatformConfig): @@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter): # Text sending # ------------------------------------------------------------------ + @staticmethod + def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]: + # Use the base splitter but skip pagination indicators — iMessage + # bubbles flow naturally without "(1/3)" suffixes. + chunks = BasePlatformAdapter.truncate_message(content, max_length) + return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks] + async def send( self, chat_id: str, @@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - text = strip_markdown(content or "") + text = self.format_message(content) if not text: return SendResult(success=False, error="BlueBubbles send requires text") - chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH) + # Split on paragraph breaks first (double newlines) so each thought + # becomes its own iMessage bubble, then truncate any that are still + # too long. + paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()] + chunks: List[str] = [] + for para in (paragraphs or [text]): + if len(para) <= self.MAX_MESSAGE_LENGTH: + chunks.append(para) + else: + chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH)) last = SendResult(success=True) for chunk in chunks: guid = await self._resolve_chat_guid(chat_id) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index f741d45b5..3eaf6ac05 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -2246,10 +2246,6 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_usage(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/usage") - @tree.command(name="provider", description="Show available providers") - async def slash_provider(interaction: discord.Interaction): - await self._run_simple_slash(interaction, "/provider") - @tree.command(name="help", description="Show available commands") async def slash_help(interaction: discord.Interaction): await self._run_simple_slash(interaction, "/help") @@ -2719,7 +2715,12 @@ class DiscordAdapter(BasePlatformAdapter): return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off") def _discord_free_response_channels(self) -> set: - """Return Discord channel IDs where no bot mention is required.""" + """Return Discord channel IDs where no bot mention is required. + + A single ``"*"`` entry (either from a list or a comma-separated + string) is preserved in the returned set so callers can short-circuit + on wildcard membership, consistent with ``allowed_channels``. + """ raw = self.config.extra.get("free_response_channels") if raw is None: raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") @@ -3212,14 +3213,14 @@ class DiscordAdapter(BasePlatformAdapter): allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "") if allowed_channels_raw: allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()} - if not (channel_ids & allowed_channels): + if "*" not in allowed_channels and not (channel_ids & allowed_channels): logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids) return # Check ignored channels - never respond even when mentioned ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "") ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()} - if channel_ids & ignored_channels: + if "*" in ignored_channels or (channel_ids & ignored_channels): logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids) return @@ -3233,7 +3234,11 @@ class DiscordAdapter(BasePlatformAdapter): voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()} current_channel_id = str(message.channel.id) is_voice_linked_channel = current_channel_id in voice_linked_ids - is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel + is_free_channel = ( + "*" in free_channels + or bool(channel_ids & free_channels) + or is_voice_linked_channel + ) # Skip the mention check if the message is in a thread where # the bot has previously participated (auto-created or replied in). @@ -3866,6 +3871,15 @@ if DISCORD_AVAILABLE: self.resolved = True model_id = interaction.data["values"][0] + self.clear_items() + await interaction.response.edit_message( + embed=discord.Embed( + title="⚙ Switching Model", + description=f"Switching to `{model_id}`...", + color=discord.Color.blue(), + ), + view=None, + ) try: result_text = await self.on_model_selected( @@ -3876,14 +3890,13 @@ if DISCORD_AVAILABLE: except Exception as exc: result_text = f"Error switching model: {exc}" - self.clear_items() - await interaction.response.edit_message( + await interaction.edit_original_response( embed=discord.Embed( title="⚙ Model Switched", description=result_text, color=discord.Color.green(), ), - view=self, + view=None, ) async def _on_back(self, interaction: discord.Interaction): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index bec0d690a..be1bf494c 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter): "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0), } - proxy_url = resolve_proxy_url("TELEGRAM_PROXY") disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on")) fallback_ips = self._fallback_ips() if not fallback_ips: @@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter): ", ".join(fallback_ips), ) + proxy_targets = ["api.telegram.org", *fallback_ips] + proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets) if fallback_ips and not proxy_url and not disable_fallback: logger.info( "[%s] Telegram fallback IPs active: %s", diff --git a/gateway/platforms/telegram_network.py b/gateway/platforms/telegram_network.py index ed2d60d79..b099adc50 100644 --- a/gateway/platforms/telegram_network.py +++ b/gateway/platforms/telegram_network.py @@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"] -def _resolve_proxy_url() -> str | None: +def _resolve_proxy_url(target_hosts=None) -> str | None: # Delegate to shared implementation (env vars + macOS system proxy detection) from gateway.platforms.base import resolve_proxy_url - return resolve_proxy_url("TELEGRAM_PROXY") + return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts) class TelegramFallbackTransport(httpx.AsyncBaseTransport): @@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport): def __init__(self, fallback_ips: Iterable[str], **transport_kwargs): self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))] - proxy_url = _resolve_proxy_url() + proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips]) if proxy_url and "proxy" not in transport_kwargs: transport_kwargs["proxy"] = proxy_url self._primary = httpx.AsyncHTTPTransport(**transport_kwargs) diff --git a/gateway/run.py b/gateway/run.py index db3f8b00d..f5c1858db 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -14,6 +14,7 @@ Usage: """ import asyncio +import dataclasses import json import logging import os @@ -297,50 +298,16 @@ from gateway.restart import ( ) -def _normalize_whatsapp_identifier(value: str) -> str: - """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.""" - return ( - str(value or "") - .strip() - .replace("+", "", 1) - .split(":", 1)[0] - .split("@", 1)[0] - ) +from gateway.whatsapp_identity import ( + canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401 + expand_whatsapp_aliases as _expand_whatsapp_auth_aliases, + normalize_whatsapp_identifier as _normalize_whatsapp_identifier, +) -def _expand_whatsapp_auth_aliases(identifier: str) -> set: - """Resolve WhatsApp phone/LID aliases using bridge session mapping files.""" - normalized = _normalize_whatsapp_identifier(identifier) - if not normalized: - return set() - - session_dir = _hermes_home / "whatsapp" / "session" - resolved = set() - queue = [normalized] - - while queue: - current = queue.pop(0) - if not current or current in resolved: - continue - - resolved.add(current) - for suffix in ("", "_reverse"): - mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" - if not mapping_path.exists(): - continue - try: - mapped = _normalize_whatsapp_identifier( - json.loads(mapping_path.read_text(encoding="utf-8")) - ) - except Exception: - continue - if mapped and mapped not in resolved: - queue.append(mapped) - - return resolved - logger = logging.getLogger(__name__) + # Sentinel placed into _running_agents immediately when a session starts # processing, *before* any await. Prevents a second message for the same # session from bypassing the "already running" guard during the async gap @@ -349,16 +316,30 @@ _AGENT_PENDING_SENTINEL = object() def _resolve_runtime_agent_kwargs() -> dict: - """Resolve provider credentials for gateway-created AIAgent instances.""" + """Resolve provider credentials for gateway-created AIAgent instances. + + If the primary provider fails with an authentication error, attempt to + resolve credentials using the fallback provider chain from config.yaml + before giving up. + """ from hermes_cli.runtime_provider import ( resolve_runtime_provider, format_runtime_provider_error, ) + from hermes_cli.auth import AuthError try: runtime = resolve_runtime_provider( requested=os.getenv("HERMES_INFERENCE_PROVIDER"), ) + except AuthError as auth_exc: + # Primary provider auth failed (expired token, revoked key, etc.). + # Try the fallback provider chain before raising. + logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc) + fb_config = _try_resolve_fallback_provider() + if fb_config is not None: + return fb_config + raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc except Exception as exc: raise RuntimeError(format_runtime_provider_error(exc)) from exc @@ -373,6 +354,48 @@ def _resolve_runtime_agent_kwargs() -> dict: } +def _try_resolve_fallback_provider() -> dict | None: + """Attempt to resolve credentials from the fallback_model/fallback_providers config.""" + from hermes_cli.runtime_provider import resolve_runtime_provider + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if not cfg_path.exists(): + return None + with open(cfg_path, encoding="utf-8") as _f: + cfg = _y.safe_load(_f) or {} + fb = cfg.get("fallback_providers") or cfg.get("fallback_model") + if not fb: + return None + # Normalize to list + fb_list = fb if isinstance(fb, list) else [fb] + for entry in fb_list: + if not isinstance(entry, dict): + continue + try: + runtime = resolve_runtime_provider( + requested=entry.get("provider"), + explicit_base_url=entry.get("base_url"), + explicit_api_key=entry.get("api_key"), + ) + logger.info("Fallback provider resolved: %s", runtime.get("provider")) + return { + "api_key": runtime.get("api_key"), + "base_url": runtime.get("base_url"), + "provider": runtime.get("provider"), + "api_mode": runtime.get("api_mode"), + "command": runtime.get("command"), + "args": list(runtime.get("args") or []), + "credential_pool": runtime.get("credential_pool"), + } + except Exception as fb_exc: + logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc) + continue + except Exception: + pass + return None + + def _build_media_placeholder(event) -> str: """Build a text placeholder for media-only events so they aren't dropped. @@ -2309,6 +2332,17 @@ class GatewayRunner: for key, entry in _expired_entries: try: await self._async_flush_memories(entry.session_id, key) + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _parts = key.split(":") + _platform = _parts[2] if len(_parts) > 2 else "" + _invoke_hook( + "on_session_finalize", + session_id=entry.session_id, + platform=_platform, + ) + except Exception: + pass # Shut down memory provider and close tool resources # on the cached agent. Idle agents live in # _agent_cache (not _running_agents), so look there. @@ -2969,6 +3003,7 @@ class GatewayRunner: Platform.QQBOT: "QQ_ALLOWED_USERS", } platform_group_env_map = { + Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS", Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS", } platform_allow_all_map = { @@ -3025,7 +3060,7 @@ class GatewayRunner: # Check platform-specific and global allowlists platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip() group_allowlist = "" - if source.chat_type == "group": + if source.chat_type in {"group", "forum"}: group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip() global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip() @@ -3034,7 +3069,7 @@ class GatewayRunner: return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") # Some platforms authorize group traffic by chat ID rather than sender ID. - if group_allowlist and source.chat_type == "group" and source.chat_id: + if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id: allowed_group_ids = { chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip() } @@ -3145,7 +3180,50 @@ class GatewayRunner: # Internal events (e.g. background-process completion notifications) # are system-generated and must skip user authorization. - if getattr(event, "internal", False): + is_internal = bool(getattr(event, "internal", False)) + + # Fire pre_gateway_dispatch plugin hook for user-originated messages. + # Plugins receive the MessageEvent and may return a dict influencing flow: + # {"action": "skip", "reason": ...} -> drop (no reply, plugin handled) + # {"action": "rewrite", "text": ...} -> replace event.text, continue + # {"action": "allow"} / None -> normal dispatch + # Hook runs BEFORE auth so plugins can handle unauthorized senders + # (e.g. customer handover ingest) without triggering the pairing flow. + if not is_internal: + try: + from hermes_cli.plugins import invoke_hook as _invoke_hook + _hook_results = _invoke_hook( + "pre_gateway_dispatch", + event=event, + gateway=self, + session_store=self.session_store, + ) + except Exception as _hook_exc: + logger.warning("pre_gateway_dispatch invocation failed: %s", _hook_exc) + _hook_results = [] + + for _result in _hook_results: + if not isinstance(_result, dict): + continue + _action = _result.get("action") + if _action == "skip": + logger.info( + "pre_gateway_dispatch skip: reason=%s platform=%s chat=%s", + _result.get("reason"), + source.platform.value if source.platform else "unknown", + source.chat_id or "unknown", + ) + return None + if _action == "rewrite": + _new_text = _result.get("text") + if isinstance(_new_text, str): + event = dataclasses.replace(event, text=_new_text) + source = event.source + break + if _action == "allow": + break + + if is_internal: pass elif source.user_id is None: # Messages with no user identity (Telegram service messages, @@ -3442,7 +3520,7 @@ class GatewayRunner: # running-agent guard. Reject gracefully rather than falling # through to interrupt + discard. Without this, commands # like /model, /reasoning, /voice, /insights, /title, - # /resume, /retry, /undo, /compress, /usage, /provider, + # /resume, /retry, /undo, /compress, /usage, # /reload-mcp, /sethome, /reset (all registered as Discord # slash commands) would interrupt the agent AND get # silently discarded by the slash-command safety net, @@ -3513,6 +3591,10 @@ class GatewayRunner: if self._queue_during_drain_enabled() else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now." ) + if self._busy_input_mode == "queue": + logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20]) + self._queue_or_replace_pending_event(_quick_key, event) + return None logger.debug("PRIORITY interrupt for session %s", _quick_key[:20]) running_agent.interrupt(event.text) if _quick_key in self._pending_messages: @@ -3629,34 +3711,9 @@ class GatewayRunner: if canonical == "model": return await self._handle_model_command(event) - if canonical == "provider": - return await self._handle_provider_command(event) - if canonical == "personality": return await self._handle_personality_command(event) - if canonical == "plan": - try: - from agent.skill_commands import build_plan_path, build_skill_invocation_message - - user_instruction = event.get_command_args().strip() - plan_path = build_plan_path(user_instruction) - event.text = build_skill_invocation_message( - "/plan", - user_instruction, - task_id=_quick_key, - runtime_note=( - "Save the markdown plan with write_file to this exact relative path " - f"inside the active workspace/backend cwd: {plan_path}" - ), - ) - if not event.text: - return "Failed to load the bundled /plan skill." - canonical = None - except Exception as e: - logger.exception("Failed to prepare /plan command") - return f"Failed to enter plan mode: {e}" - if canonical == "retry": return await self._handle_retry_command(event) @@ -5602,9 +5659,17 @@ class GatewayRunner: lines = [f"Model switched to `{result.new_model}`"] lines.append(f"Provider: {plabel}") mi = result.model_info + from hermes_cli.model_switch import resolve_display_context_length + ctx = resolve_display_context_length( + result.new_model, + result.target_provider, + base_url=result.base_url or current_base_url or "", + api_key=result.api_key or current_api_key or "", + model_info=mi, + ) + if ctx: + lines.append(f"Context: {ctx:,} tokens") if mi: - if mi.context_window: - lines.append(f"Context: {mi.context_window:,} tokens") if mi.max_output: lines.append(f"Max output: {mi.max_output:,} tokens") if mi.has_cost_data(): @@ -5738,28 +5803,25 @@ class GatewayRunner: lines = [f"Model switched to `{result.new_model}`"] lines.append(f"Provider: {provider_label}") - # Rich metadata from models.dev + # Context: always resolve via the provider-aware chain so Codex OAuth, + # Copilot, and Nous-enforced caps win over the raw models.dev entry. mi = result.model_info + from hermes_cli.model_switch import resolve_display_context_length + ctx = resolve_display_context_length( + result.new_model, + result.target_provider, + base_url=result.base_url or current_base_url or "", + api_key=result.api_key or current_api_key or "", + model_info=mi, + ) + if ctx: + lines.append(f"Context: {ctx:,} tokens") if mi: - if mi.context_window: - lines.append(f"Context: {mi.context_window:,} tokens") if mi.max_output: lines.append(f"Max output: {mi.max_output:,} tokens") if mi.has_cost_data(): lines.append(f"Cost: {mi.format_cost()}") lines.append(f"Capabilities: {mi.format_capabilities()}") - else: - try: - from agent.model_metadata import get_model_context_length - ctx = get_model_context_length( - result.new_model, - base_url=result.base_url or current_base_url, - api_key=result.api_key or current_api_key, - provider=result.target_provider, - ) - lines.append(f"Context: {ctx:,} tokens") - except Exception: - pass # Cache notice cache_enabled = ( @@ -5779,63 +5841,6 @@ class GatewayRunner: return "\n".join(lines) - async def _handle_provider_command(self, event: MessageEvent) -> str: - """Handle /provider command - show available providers.""" - import yaml - from hermes_cli.models import ( - list_available_providers, - normalize_provider, - _PROVIDER_LABELS, - ) - - # Resolve current provider from config - current_provider = "openrouter" - model_cfg = {} - config_path = _hermes_home / 'config.yaml' - try: - if config_path.exists(): - with open(config_path, encoding="utf-8") as f: - cfg = yaml.safe_load(f) or {} - model_cfg = cfg.get("model", {}) - if isinstance(model_cfg, dict): - current_provider = model_cfg.get("provider", current_provider) - except Exception: - pass - - current_provider = normalize_provider(current_provider) - if current_provider == "auto": - try: - from hermes_cli.auth import resolve_provider as _resolve_provider - current_provider = _resolve_provider(current_provider) - except Exception: - current_provider = "openrouter" - - # Detect custom endpoint from config base_url - if current_provider == "openrouter": - _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else "" - if _cfg_base and "openrouter.ai" not in _cfg_base: - current_provider = "custom" - - current_label = _PROVIDER_LABELS.get(current_provider, current_provider) - - lines = [ - f"🔌 **Current provider:** {current_label} (`{current_provider}`)", - "", - "**Available providers:**", - ] - - providers = list_available_providers() - for p in providers: - marker = " ← active" if p["id"] == current_provider else "" - auth = "✅" if p["authenticated"] else "❌" - aliases = f" _(also: {', '.join(p['aliases'])})_" if p["aliases"] else "" - lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}") - - lines.append("") - lines.append("Switch: `/model provider:model-name`") - lines.append("Setup: `hermes setup`") - return "\n".join(lines) - async def _handle_personality_command(self, event: MessageEvent) -> str: """Handle /personality command - list or set a personality.""" import yaml @@ -7102,10 +7107,7 @@ class GatewayRunner: tmp_agent._print_fn = lambda *a, **kw: None compressor = tmp_agent.context_compressor - compress_start = compressor.protect_first_n - compress_start = compressor._align_boundary_forward(msgs, compress_start) - compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start) - if compress_start >= compress_end: + if not compressor.has_content_to_compress(msgs): return "Nothing to compress yet (the transcript is still all protected context)." loop = asyncio.get_running_loop() @@ -7231,13 +7233,19 @@ class GatewayRunner: logger.debug("Failed to list titled sessions: %s", e) return f"Could not list sessions: {e}" - # Resolve the name to a session ID + # Resolve the name to a session ID. target_id = self._session_db.resolve_session_by_title(name) if not target_id: return ( f"No session found matching '**{name}**'.\n" "Use `/resume` with no arguments to see available sessions." ) + # Compression creates child continuations that hold the live transcript. + # Follow that chain so gateway /resume matches CLI behavior (#15000). + try: + target_id = self._session_db.resolve_resume_session_id(target_id) + except Exception as e: + logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e) # Check if already on that session current_entry = self.session_store.get_or_create_session(source) diff --git a/gateway/session.py b/gateway/session.py index db90d3121..fe12e6ab3 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -60,6 +60,10 @@ from .config import ( SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py HomeChannel, ) +from .whatsapp_identity import ( + canonical_whatsapp_identifier, + normalize_whatsapp_identifier, +) @dataclass @@ -281,6 +285,18 @@ def build_session_context_prompt( "Do not promise to perform these actions. If the user asks, explain " "that you can only read messages sent directly to you and respond." ) + elif context.source.platform == Platform.BLUEBUBBLES: + lines.append("") + lines.append( + "**Platform notes:** You are responding via iMessage. " + "Keep responses short and conversational — think texts, not essays. " + "Structure longer replies as separate short thoughts, each separated " + "by a blank line (double newline). Each block between blank lines " + "will be delivered as its own iMessage bubble, so write accordingly: " + "one idea per bubble, 1–3 sentences each. " + "If the user needs a detailed answer, give the short version first " + "and offer to elaborate." + ) # Connected platforms platforms_list = ["local (files on this machine)"] @@ -518,15 +534,24 @@ def build_session_key( """ platform = source.platform.value if source.chat_type == "dm": - if source.chat_id: + dm_chat_id = source.chat_id + if source.platform == Platform.WHATSAPP: + dm_chat_id = canonical_whatsapp_identifier(source.chat_id) + + if dm_chat_id: if source.thread_id: - return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}" - return f"agent:main:{platform}:dm:{source.chat_id}" + return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}" + return f"agent:main:{platform}:dm:{dm_chat_id}" if source.thread_id: return f"agent:main:{platform}:dm:{source.thread_id}" return f"agent:main:{platform}:dm" participant_id = source.user_id_alt or source.user_id + if participant_id and source.platform == Platform.WHATSAPP: + # Same JID/LID-flip bug as the DM case: without canonicalisation, a + # single group member gets two isolated per-user sessions when the + # bridge reshuffles alias forms. + participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id key_parts = ["agent:main", platform, source.chat_type] if source.chat_id: diff --git a/gateway/whatsapp_identity.py b/gateway/whatsapp_identity.py new file mode 100644 index 000000000..b0792daf7 --- /dev/null +++ b/gateway/whatsapp_identity.py @@ -0,0 +1,135 @@ +"""Shared helpers for canonicalising WhatsApp sender identity. + +WhatsApp's bridge can surface the same human under two different JID shapes +within a single conversation: + +- LID form: ``999999999999999@lid`` +- Phone form: ``15551234567@s.whatsapp.net`` + +Both the authorisation path (:mod:`gateway.run`) and the session-key path +(:mod:`gateway.session`) need to collapse these aliases to a single stable +identity. This module is the single source of truth for that resolution so +the two paths can never drift apart. + +Public helpers: + +- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax + down to the bare numeric identifier. +- :func:`canonical_whatsapp_identifier` — walk the bridge's + ``lid-mapping-*.json`` files and return a stable canonical identity + across phone/LID variants. +- :func:`expand_whatsapp_aliases` — return the full alias set for an + identifier. Used by authorisation code that needs to match any known + form of a sender against an allow-list. + +Plugins that need per-sender behaviour on WhatsApp (role-based routing, +per-contact authorisation, policy gating in a gateway hook) should use +``canonical_whatsapp_identifier`` so their bookkeeping lines up with +Hermes' own session keys. +""" + +from __future__ import annotations + +import json +from typing import Set + +from hermes_constants import get_hermes_home + + +def normalize_whatsapp_identifier(value: str) -> str: + """Strip WhatsApp JID/LID syntax down to its stable numeric identifier. + + Accepts any of the identifier shapes the WhatsApp bridge may emit: + ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``, + ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``. + Returns just the numeric identifier (``"60123456789"``) suitable for + equality comparisons. + + Useful for plugins that want to match sender IDs against + user-supplied config (phone numbers in ``config.yaml``) without + worrying about which variant the bridge happens to deliver. + """ + return ( + str(value or "") + .strip() + .replace("+", "", 1) + .split(":", 1)[0] + .split("@", 1)[0] + ) + + +def expand_whatsapp_aliases(identifier: str) -> Set[str]: + """Resolve WhatsApp phone/LID aliases via bridge session mapping files. + + Returns the set of all identifiers transitively reachable through the + bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files, + starting from ``identifier``. The result always includes the + normalized input itself, so callers can safely ``in`` check against + the return value without a separate fallback branch. + + Returns an empty set if ``identifier`` normalizes to empty. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return set() + + session_dir = get_hermes_home() / "whatsapp" / "session" + resolved: Set[str] = set() + queue = [normalized] + + while queue: + current = queue.pop(0) + if not current or current in resolved: + continue + + resolved.add(current) + for suffix in ("", "_reverse"): + mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json" + if not mapping_path.exists(): + continue + try: + mapped = normalize_whatsapp_identifier( + json.loads(mapping_path.read_text(encoding="utf-8")) + ) + except Exception: + continue + if mapped and mapped not in resolved: + queue.append(mapped) + + return resolved + + +def canonical_whatsapp_identifier(identifier: str) -> str: + """Return a stable WhatsApp sender identity across phone-JID/LID variants. + + WhatsApp may surface the same person under either a phone-format JID + (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This + applies to a DM ``chat_id`` *and* to the ``participant_id`` of a + member inside a group chat — both represent a user identity, and the + bridge may flip between the two for the same human. + + This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json`` + files, walks the mapping transitively, and picks the shortest + (numeric-preferred) alias as the canonical identity. + :func:`gateway.session.build_session_key` uses this for both WhatsApp + DM chat_ids and WhatsApp group participant_ids, so callers get the + same session-key identity Hermes itself uses. + + Plugins that need per-sender behaviour (role-based routing, + authorisation, per-contact policy) should use this so their + bookkeeping lines up with Hermes' session bookkeeping even when + the bridge reshuffles aliases. + + Returns an empty string if ``identifier`` normalizes to empty. If no + mapping files exist yet (fresh bridge install), returns the + normalized input unchanged. + """ + normalized = normalize_whatsapp_identifier(identifier) + if not normalized: + return "" + + # expand_whatsapp_aliases always includes `normalized` itself in the + # returned set, so the min() below degrades gracefully to `normalized` + # when no lid-mapping files are present. + aliases = expand_whatsapp_aliases(normalized) + return min(aliases, key=lambda candidate: (len(candidate), candidate)) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 28c5bd9a6..00685436d 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -22,6 +22,7 @@ import shutil import shlex import ssl import stat +import sys import base64 import hashlib import subprocess @@ -32,8 +33,10 @@ import webbrowser from contextlib import contextmanager from dataclasses import dataclass, field from datetime import datetime, timezone +from http.server import BaseHTTPRequestHandler, HTTPServer from pathlib import Path from typing import Any, Dict, List, Optional +from urllib.parse import parse_qs, urlencode, urlparse import httpx import yaml @@ -80,6 +83,27 @@ CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56" QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token" QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL = "https://accounts.spotify.com" +DEFAULT_SPOTIFY_API_BASE_URL = "https://api.spotify.com/v1" +DEFAULT_SPOTIFY_REDIRECT_URI = "http://127.0.0.1:43827/spotify/callback" +SPOTIFY_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify" +SPOTIFY_DASHBOARD_URL = "https://developer.spotify.com/dashboard" +SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120 +DEFAULT_SPOTIFY_SCOPE = " ".join(( + "user-modify-playback-state", + "user-read-playback-state", + "user-read-currently-playing", + "user-read-recently-played", + "playlist-read-private", + "playlist-read-collaborative", + "playlist-modify-public", + "playlist-modify-private", + "user-library-read", + "user-library-modify", +)) +SERVICE_PROVIDER_NAMES: Dict[str, str] = { + "spotify": "Spotify", +} # Google Gemini OAuth (google-gemini-cli provider, Cloud Code Assist backend) DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google" @@ -224,6 +248,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { api_key_env_vars=("DASHSCOPE_API_KEY",), base_url_env_var="DASHSCOPE_BASE_URL", ), + "alibaba-coding-plan": ProviderConfig( + id="alibaba-coding-plan", + name="Alibaba Cloud (Coding Plan)", + auth_type="api_key", + inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1", + api_key_env_vars=("ALIBABA_CODING_PLAN_API_KEY", "DASHSCOPE_API_KEY"), + base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL", + ), "minimax-cn": ProviderConfig( id="minimax-cn", name="MiniMax (China)", @@ -417,10 +449,10 @@ def _resolve_api_key_provider_secret( if provider_id == "copilot": # Use the dedicated copilot auth module for proper token validation try: - from hermes_cli.copilot_auth import resolve_copilot_token + from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token token, source = resolve_copilot_token() if token: - return token, source + return get_copilot_api_token(token), source except ValueError as exc: logger.warning("Copilot token validation failed: %s", exc) except Exception: @@ -711,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]: try: raw = json.loads(auth_file.read_text()) - except Exception: + except Exception as exc: + corrupt_path = auth_file.with_suffix(".json.corrupt") + try: + import shutil + shutil.copy2(auth_file, corrupt_path) + except Exception: + pass + logger.warning( + "auth: failed to parse %s (%s) — starting with empty store. " + "Corrupt file preserved at %s", + auth_file, exc, corrupt_path, + ) return {"version": AUTH_STORE_VERSION, "providers": {}} if isinstance(raw, dict) and ( @@ -786,6 +829,34 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di auth_store["active_provider"] = provider_id +def _store_provider_state( + auth_store: Dict[str, Any], + provider_id: str, + state: Dict[str, Any], + *, + set_active: bool = True, +) -> None: + providers = auth_store.setdefault("providers", {}) + if not isinstance(providers, dict): + auth_store["providers"] = {} + providers = auth_store["providers"] + providers[provider_id] = state + if set_active: + auth_store["active_provider"] = provider_id + + +def is_known_auth_provider(provider_id: str) -> bool: + normalized = (provider_id or "").strip().lower() + return normalized in PROVIDER_REGISTRY or normalized in SERVICE_PROVIDER_NAMES + + +def get_auth_provider_display_name(provider_id: str) -> str: + normalized = (provider_id or "").strip().lower() + if normalized in PROVIDER_REGISTRY: + return PROVIDER_REGISTRY[normalized].name + return SERVICE_PROVIDER_NAMES.get(normalized, provider_id) + + def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]: """Return the persisted credential pool, or one provider slice.""" auth_store = _load_auth_store() @@ -946,10 +1017,12 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool: del pool[target] cleared = True - if not cleared: - return False if auth_store.get("active_provider") == target: auth_store["active_provider"] = None + cleared = True + + if not cleared: + return False _save_auth_store(auth_store) return True @@ -1024,6 +1097,8 @@ def resolve_provider( "step": "stepfun", "stepfun-coding-plan": "stepfun", "arcee-ai": "arcee", "arceeai": "arcee", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", + "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan", + "alibaba_coding_plan": "alibaba-coding-plan", "claude": "anthropic", "claude-code": "anthropic", "github": "copilot", "github-copilot": "copilot", "github-models": "copilot", "github-model": "copilot", @@ -1416,8 +1491,597 @@ def get_gemini_oauth_auth_status() -> Dict[str, Any]: "email": creds.email, "project_id": creds.project_id, } +# Spotify auth — PKCE tokens stored in ~/.hermes/auth.json +# ============================================================================= +def _spotify_scope_list(raw_scope: Optional[str] = None) -> List[str]: + scope_text = (raw_scope or DEFAULT_SPOTIFY_SCOPE).strip() + scopes = [part for part in scope_text.split() if part] + seen: set[str] = set() + ordered: List[str] = [] + for scope in scopes: + if scope not in seen: + seen.add(scope) + ordered.append(scope) + return ordered + + +def _spotify_scope_string(raw_scope: Optional[str] = None) -> str: + return " ".join(_spotify_scope_list(raw_scope)) + + +def _spotify_client_id( + explicit: Optional[str] = None, + state: Optional[Dict[str, Any]] = None, +) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + explicit, + get_env_value("HERMES_SPOTIFY_CLIENT_ID"), + get_env_value("SPOTIFY_CLIENT_ID"), + state.get("client_id") if isinstance(state, dict) else None, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip() + if cleaned: + return cleaned + raise AuthError( + "Spotify client_id is required. Set HERMES_SPOTIFY_CLIENT_ID or pass --client-id.", + provider="spotify", + code="spotify_client_id_missing", + ) + + +def _spotify_redirect_uri( + explicit: Optional[str] = None, + state: Optional[Dict[str, Any]] = None, +) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + explicit, + get_env_value("HERMES_SPOTIFY_REDIRECT_URI"), + get_env_value("SPOTIFY_REDIRECT_URI"), + state.get("redirect_uri") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_REDIRECT_URI, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip() + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_REDIRECT_URI + + +def _spotify_api_base_url(state: Optional[Dict[str, Any]] = None) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + get_env_value("HERMES_SPOTIFY_API_BASE_URL"), + state.get("api_base_url") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_API_BASE_URL, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip().rstrip("/") + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_API_BASE_URL + + +def _spotify_accounts_base_url(state: Optional[Dict[str, Any]] = None) -> str: + from hermes_cli.config import get_env_value + + candidates = ( + get_env_value("HERMES_SPOTIFY_ACCOUNTS_BASE_URL"), + state.get("accounts_base_url") if isinstance(state, dict) else None, + DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL, + ) + for candidate in candidates: + cleaned = str(candidate or "").strip().rstrip("/") + if cleaned: + return cleaned + return DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL + + +def _spotify_code_verifier(length: int = 64) -> str: + raw = base64.urlsafe_b64encode(os.urandom(length)).decode("ascii") + return raw.rstrip("=")[:128] + + +def _spotify_code_challenge(code_verifier: str) -> str: + digest = hashlib.sha256(code_verifier.encode("utf-8")).digest() + return base64.urlsafe_b64encode(digest).decode("ascii").rstrip("=") + + +def _spotify_build_authorize_url( + *, + client_id: str, + redirect_uri: str, + scope: str, + state: str, + code_challenge: str, + accounts_base_url: str, +) -> str: + query = urlencode({ + "client_id": client_id, + "response_type": "code", + "redirect_uri": redirect_uri, + "scope": scope, + "state": state, + "code_challenge_method": "S256", + "code_challenge": code_challenge, + }) + return f"{accounts_base_url}/authorize?{query}" + + +def _spotify_validate_redirect_uri(redirect_uri: str) -> tuple[str, int, str]: + parsed = urlparse(redirect_uri) + if parsed.scheme != "http": + raise AuthError( + "Spotify PKCE redirect_uri must use http://localhost or http://127.0.0.1.", + provider="spotify", + code="spotify_redirect_invalid", + ) + host = parsed.hostname or "" + if host not in {"127.0.0.1", "localhost"}: + raise AuthError( + "Spotify PKCE redirect_uri must point to localhost or 127.0.0.1.", + provider="spotify", + code="spotify_redirect_invalid", + ) + if not parsed.port: + raise AuthError( + "Spotify PKCE redirect_uri must include an explicit localhost port.", + provider="spotify", + code="spotify_redirect_invalid", + ) + return host, parsed.port, parsed.path or "/" + + +def _make_spotify_callback_handler(expected_path: str) -> tuple[type[BaseHTTPRequestHandler], dict[str, Any]]: + result: dict[str, Any] = { + "code": None, + "state": None, + "error": None, + "error_description": None, + } + + class _SpotifyCallbackHandler(BaseHTTPRequestHandler): + def do_GET(self) -> None: # noqa: N802 + parsed = urlparse(self.path) + if parsed.path != expected_path: + self.send_response(404) + self.end_headers() + self.wfile.write(b"Not found.") + return + + params = parse_qs(parsed.query) + result["code"] = params.get("code", [None])[0] + result["state"] = params.get("state", [None])[0] + result["error"] = params.get("error", [None])[0] + result["error_description"] = params.get("error_description", [None])[0] + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + if result["error"]: + body = "

Spotify authorization failed.

You can close this tab." + else: + body = "

Spotify authorization received.

You can close this tab." + self.wfile.write(body.encode("utf-8")) + + def log_message(self, format: str, *args: Any) -> None: # noqa: A003 + return + + return _SpotifyCallbackHandler, result + + +def _spotify_wait_for_callback( + redirect_uri: str, + *, + timeout_seconds: float = 180.0, +) -> dict[str, Any]: + host, port, path = _spotify_validate_redirect_uri(redirect_uri) + handler_cls, result = _make_spotify_callback_handler(path) + + class _ReuseHTTPServer(HTTPServer): + allow_reuse_address = True + + try: + server = _ReuseHTTPServer((host, port), handler_cls) + except OSError as exc: + raise AuthError( + f"Could not bind Spotify callback server on {host}:{port}: {exc}", + provider="spotify", + code="spotify_callback_bind_failed", + ) from exc + + thread = threading.Thread(target=server.serve_forever, kwargs={"poll_interval": 0.1}, daemon=True) + thread.start() + deadline = time.time() + max(5.0, timeout_seconds) + try: + while time.time() < deadline: + if result["code"] or result["error"]: + return result + time.sleep(0.1) + finally: + server.shutdown() + server.server_close() + thread.join(timeout=1.0) + raise AuthError( + "Spotify authorization timed out waiting for the local callback.", + provider="spotify", + code="spotify_callback_timeout", + ) + + +def _spotify_token_payload_to_state( + token_payload: Dict[str, Any], + *, + client_id: str, + redirect_uri: str, + requested_scope: str, + accounts_base_url: str, + api_base_url: str, + previous_state: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + now = datetime.now(timezone.utc) + expires_in = _coerce_ttl_seconds(token_payload.get("expires_in", 0)) + expires_at = datetime.fromtimestamp(now.timestamp() + expires_in, tz=timezone.utc) + state = dict(previous_state or {}) + state.update({ + "client_id": client_id, + "redirect_uri": redirect_uri, + "accounts_base_url": accounts_base_url, + "api_base_url": api_base_url, + "scope": requested_scope, + "granted_scope": str(token_payload.get("scope") or requested_scope).strip(), + "token_type": str(token_payload.get("token_type", "Bearer") or "Bearer").strip() or "Bearer", + "access_token": str(token_payload.get("access_token", "") or "").strip(), + "refresh_token": str( + token_payload.get("refresh_token") + or state.get("refresh_token") + or "" + ).strip(), + "obtained_at": now.isoformat(), + "expires_at": expires_at.isoformat(), + "expires_in": expires_in, + "auth_type": "oauth_pkce", + }) + return state + + +def _spotify_exchange_code_for_tokens( + *, + client_id: str, + code: str, + redirect_uri: str, + code_verifier: str, + accounts_base_url: str, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + try: + response = httpx.post( + f"{accounts_base_url}/api/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "client_id": client_id, + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "code_verifier": code_verifier, + }, + timeout=timeout_seconds, + ) + except Exception as exc: + raise AuthError( + f"Spotify token exchange failed: {exc}", + provider="spotify", + code="spotify_token_exchange_failed", + ) from exc + + if response.status_code >= 400: + detail = response.text.strip() + raise AuthError( + "Spotify token exchange failed." + + (f" Response: {detail}" if detail else ""), + provider="spotify", + code="spotify_token_exchange_failed", + ) + payload = response.json() + if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip(): + raise AuthError( + "Spotify token response did not include an access_token.", + provider="spotify", + code="spotify_token_exchange_invalid", + ) + return payload + + +def _refresh_spotify_oauth_state( + state: Dict[str, Any], + *, + timeout_seconds: float = 20.0, +) -> Dict[str, Any]: + refresh_token = str(state.get("refresh_token", "") or "").strip() + if not refresh_token: + raise AuthError( + "Spotify refresh token missing. Run `hermes auth spotify` again.", + provider="spotify", + code="spotify_refresh_token_missing", + relogin_required=True, + ) + + client_id = _spotify_client_id(state=state) + accounts_base_url = _spotify_accounts_base_url(state) + try: + response = httpx.post( + f"{accounts_base_url}/api/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + }, + timeout=timeout_seconds, + ) + except Exception as exc: + raise AuthError( + f"Spotify token refresh failed: {exc}", + provider="spotify", + code="spotify_refresh_failed", + ) from exc + + if response.status_code >= 400: + detail = response.text.strip() + raise AuthError( + "Spotify token refresh failed. Run `hermes auth spotify` again." + + (f" Response: {detail}" if detail else ""), + provider="spotify", + code="spotify_refresh_failed", + relogin_required=True, + ) + + payload = response.json() + if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip(): + raise AuthError( + "Spotify refresh response did not include an access_token.", + provider="spotify", + code="spotify_refresh_invalid", + relogin_required=True, + ) + + return _spotify_token_payload_to_state( + payload, + client_id=client_id, + redirect_uri=_spotify_redirect_uri(state=state), + requested_scope=str(state.get("scope") or DEFAULT_SPOTIFY_SCOPE), + accounts_base_url=accounts_base_url, + api_base_url=_spotify_api_base_url(state), + previous_state=state, + ) + + +def resolve_spotify_runtime_credentials( + *, + force_refresh: bool = False, + refresh_if_expiring: bool = True, + refresh_skew_seconds: int = SPOTIFY_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, +) -> Dict[str, Any]: + with _auth_store_lock(): + auth_store = _load_auth_store() + state = _load_provider_state(auth_store, "spotify") + if not state: + raise AuthError( + "Spotify is not authenticated. Run `hermes auth spotify` first.", + provider="spotify", + code="spotify_auth_missing", + relogin_required=True, + ) + + should_refresh = bool(force_refresh) + if not should_refresh and refresh_if_expiring: + should_refresh = _is_expiring(state.get("expires_at"), refresh_skew_seconds) + if should_refresh: + state = _refresh_spotify_oauth_state(state) + _store_provider_state(auth_store, "spotify", state, set_active=False) + _save_auth_store(auth_store) + + access_token = str(state.get("access_token", "") or "").strip() + if not access_token: + raise AuthError( + "Spotify access token missing. Run `hermes auth spotify` again.", + provider="spotify", + code="spotify_access_token_missing", + relogin_required=True, + ) + + return { + "provider": "spotify", + "access_token": access_token, + "api_key": access_token, + "token_type": str(state.get("token_type", "Bearer") or "Bearer"), + "base_url": _spotify_api_base_url(state), + "scope": str(state.get("granted_scope") or state.get("scope") or "").strip(), + "client_id": _spotify_client_id(state=state), + "redirect_uri": _spotify_redirect_uri(state=state), + "expires_at": state.get("expires_at"), + "refresh_token": str(state.get("refresh_token", "") or "").strip(), + } + + +def get_spotify_auth_status() -> Dict[str, Any]: + state = get_provider_auth_state("spotify") + if not state: + return {"logged_in": False} + + expires_at = state.get("expires_at") + refresh_token = str(state.get("refresh_token", "") or "").strip() + return { + "logged_in": bool(refresh_token or not _is_expiring(expires_at, 0)), + "auth_type": state.get("auth_type", "oauth_pkce"), + "client_id": state.get("client_id"), + "redirect_uri": state.get("redirect_uri"), + "scope": state.get("granted_scope") or state.get("scope"), + "expires_at": expires_at, + "api_base_url": state.get("api_base_url"), + "has_refresh_token": bool(refresh_token), + } + + +def _spotify_interactive_setup(redirect_uri_hint: str) -> str: + """Walk the user through creating a Spotify developer app, persist the + resulting client_id to ~/.hermes/.env, and return it. + + Raises SystemExit if the user aborts or submits an empty value. + """ + from hermes_cli.config import save_env_value + + print() + print("=" * 70) + print("Spotify first-time setup") + print("=" * 70) + print() + print("Spotify requires every user to register their own lightweight") + print("developer app. This takes about two minutes and only has to be") + print("done once per machine.") + print() + print(f"Full guide: {SPOTIFY_DOCS_URL}") + print() + print("Steps:") + print(f" 1. Opening {SPOTIFY_DASHBOARD_URL} in your browser...") + print(" 2. Click 'Create app' and fill in:") + print(" App name: anything (e.g. hermes-agent)") + print(" Description: anything") + print(f" Redirect URI: {redirect_uri_hint}") + print(" API/SDK: Web API") + print(" 3. Agree to the terms, click Save.") + print(" 4. Open the app's Settings page and copy the Client ID.") + print(" 5. Paste it below.") + print() + + if not _is_remote_session(): + try: + webbrowser.open(SPOTIFY_DASHBOARD_URL) + except Exception: + pass + + try: + raw = input("Spotify Client ID: ").strip() + except (EOFError, KeyboardInterrupt): + print() + raise SystemExit("Spotify setup cancelled.") + + if not raw: + print() + print(f"No Client ID entered. See {SPOTIFY_DOCS_URL} for the full guide.") + raise SystemExit("Spotify setup cancelled: empty Client ID.") + + # Persist so subsequent `hermes auth spotify` runs skip the wizard. + save_env_value("HERMES_SPOTIFY_CLIENT_ID", raw) + # Only persist the redirect URI if it's non-default, to avoid pinning + # users to a value the default might later change to. + if redirect_uri_hint and redirect_uri_hint != DEFAULT_SPOTIFY_REDIRECT_URI: + save_env_value("HERMES_SPOTIFY_REDIRECT_URI", redirect_uri_hint) + + print() + print("Saved HERMES_SPOTIFY_CLIENT_ID to ~/.hermes/.env") + print() + return raw + + +def login_spotify_command(args) -> None: + existing_state = get_provider_auth_state("spotify") or {} + + # Interactive wizard: if no client_id is configured anywhere, walk the + # user through creating the Spotify developer app instead of crashing + # with "HERMES_SPOTIFY_CLIENT_ID is required". + explicit_client_id = getattr(args, "client_id", None) + try: + client_id = _spotify_client_id(explicit_client_id, existing_state) + except AuthError as exc: + if getattr(exc, "code", "") != "spotify_client_id_missing": + raise + client_id = _spotify_interactive_setup( + redirect_uri_hint=getattr(args, "redirect_uri", None) or DEFAULT_SPOTIFY_REDIRECT_URI, + ) + + redirect_uri = _spotify_redirect_uri(getattr(args, "redirect_uri", None), existing_state) + scope = _spotify_scope_string(getattr(args, "scope", None) or existing_state.get("scope")) + accounts_base_url = _spotify_accounts_base_url(existing_state) + api_base_url = _spotify_api_base_url(existing_state) + open_browser = not getattr(args, "no_browser", False) + + code_verifier = _spotify_code_verifier() + code_challenge = _spotify_code_challenge(code_verifier) + state_nonce = uuid.uuid4().hex + authorize_url = _spotify_build_authorize_url( + client_id=client_id, + redirect_uri=redirect_uri, + scope=scope, + state=state_nonce, + code_challenge=code_challenge, + accounts_base_url=accounts_base_url, + ) + + print("Starting Spotify PKCE login...") + print(f"Client ID: {client_id}") + print(f"Redirect URI: {redirect_uri}") + print("Make sure this redirect URI is allow-listed in your Spotify app settings.") + print() + print("Open this URL to authorize Hermes:") + print(authorize_url) + print() + print(f"Full setup guide: {SPOTIFY_DOCS_URL}") + print() + + if open_browser and not _is_remote_session(): + try: + opened = webbrowser.open(authorize_url) + except Exception: + opened = False + if opened: + print("Browser opened for Spotify authorization.") + else: + print("Could not open the browser automatically; use the URL above.") + + callback = _spotify_wait_for_callback( + redirect_uri, + timeout_seconds=float(getattr(args, "timeout", None) or 180.0), + ) + if callback.get("error"): + detail = callback.get("error_description") or callback["error"] + raise SystemExit(f"Spotify authorization failed: {detail}") + if callback.get("state") != state_nonce: + raise SystemExit("Spotify authorization failed: state mismatch.") + + token_payload = _spotify_exchange_code_for_tokens( + client_id=client_id, + code=str(callback.get("code") or ""), + redirect_uri=redirect_uri, + code_verifier=code_verifier, + accounts_base_url=accounts_base_url, + timeout_seconds=float(getattr(args, "timeout", None) or 20.0), + ) + spotify_state = _spotify_token_payload_to_state( + token_payload, + client_id=client_id, + redirect_uri=redirect_uri, + requested_scope=scope, + accounts_base_url=accounts_base_url, + api_base_url=api_base_url, + ) + + with _auth_store_lock(): + auth_store = _load_auth_store() + _store_provider_state(auth_store, "spotify", spotify_state, set_active=False) + saved_to = _save_auth_store(auth_store) + + print("Spotify login successful!") + print(f" Auth state: {saved_to}") + print(" Provider state saved under providers.spotify") + print(f" Docs: {SPOTIFY_DOCS_URL}") # ============================================================================= # SSH / remote session detection @@ -1534,12 +2198,21 @@ def refresh_codex_oauth_pure( try: err = response.json() if isinstance(err, dict): - err_code = err.get("error") - if isinstance(err_code, str) and err_code.strip(): - code = err_code.strip() - err_desc = err.get("error_description") or err.get("message") - if isinstance(err_desc, str) and err_desc.strip(): - message = f"Codex token refresh failed: {err_desc.strip()}" + err_obj = err.get("error") + # OpenAI shape: {"error": {"code": "...", "message": "...", "type": "..."}} + if isinstance(err_obj, dict): + nested_code = err_obj.get("code") or err_obj.get("type") + if isinstance(nested_code, str) and nested_code.strip(): + code = nested_code.strip() + nested_msg = err_obj.get("message") + if isinstance(nested_msg, str) and nested_msg.strip(): + message = f"Codex token refresh failed: {nested_msg.strip()}" + # OAuth spec shape: {"error": "code_str", "error_description": "..."} + elif isinstance(err_obj, str) and err_obj.strip(): + code = err_obj.strip() + err_desc = err.get("error_description") or err.get("message") + if isinstance(err_desc, str) and err_desc.strip(): + message = f"Codex token refresh failed: {err_desc.strip()}" except Exception: pass if code in {"invalid_grant", "invalid_token", "invalid_request"}: @@ -1698,6 +2371,24 @@ def resolve_codex_runtime_credentials( # TLS verification helper # ============================================================================= +def _default_verify() -> bool | ssl.SSLContext: + """Platform-aware default SSL verify for httpx clients. + + On macOS with Homebrew Python, the system OpenSSL cannot locate the + system trust store and valid public certs fail verification. When + certifi is importable we pin its bundle explicitly; elsewhere we + defer to httpx's built-in default (certifi via its own dependency). + Mirrors the weixin fix in 3a0ec1d93. + """ + if sys.platform == "darwin": + try: + import certifi + return ssl.create_default_context(cafile=certifi.where()) + except ImportError: + pass + return True + + def _resolve_verify( *, insecure: Optional[bool] = None, @@ -1716,6 +2407,7 @@ def _resolve_verify( or tls_state.get("ca_bundle") or os.getenv("HERMES_CA_BUNDLE") or os.getenv("SSL_CERT_FILE") + or os.getenv("REQUESTS_CA_BUNDLE") ) if effective_insecure: @@ -1727,9 +2419,9 @@ def _resolve_verify( "CA bundle path does not exist: %s — falling back to default certificates", ca_path, ) - return True + return _default_verify() return ssl.create_default_context(cafile=ca_path) - return True + return _default_verify() # ============================================================================= @@ -1848,6 +2540,28 @@ def _refresh_access_token( code = str(error_payload.get("error", "invalid_grant")) description = str(error_payload.get("error_description") or "Refresh token exchange failed") relogin = code in {"invalid_grant", "invalid_token"} + + # Detect the OAuth 2.1 "refresh token reuse" signal from the Nous portal + # server and surface an actionable message. This fires when an external + # process (health-check script, monitoring tool, custom self-heal hook) + # called POST /api/oauth/token with Hermes's refresh_token without + # persisting the rotated token back to auth.json — the server then + # retires the original RT, Hermes's next refresh uses it, and the whole + # session chain gets revoked as a token-theft signal (#15099). + lowered = description.lower() + if "reuse" in lowered or "reuse detected" in lowered: + description = ( + "Nous Portal detected refresh-token reuse and revoked this session.\n" + "This usually means an external process (monitoring script, " + "custom self-heal hook, or another Hermes install sharing " + "~/.hermes/auth.json) called POST /api/oauth/token with Hermes's " + "refresh token without persisting the rotated token back.\n" + "Nous refresh tokens are single-use — only Hermes may call the " + "refresh endpoint. For health checks, use `hermes auth status` " + "instead.\n" + "Re-authenticate with: hermes auth add nous" + ) + raise AuthError(description, provider="nous", code=code, relogin_required=relogin) @@ -2456,59 +3170,116 @@ def resolve_nous_runtime_credentials( # Status helpers # ============================================================================= -def get_nous_auth_status() -> Dict[str, Any]: - """Status snapshot for `hermes status` output. +def _empty_nous_auth_status() -> Dict[str, Any]: + return { + "logged_in": False, + "portal_base_url": None, + "inference_base_url": None, + "access_expires_at": None, + "agent_key_expires_at": None, + "has_refresh_token": False, + } - Checks the credential pool first (where the dashboard device-code flow - and ``hermes auth`` store credentials), then falls back to the legacy - auth-store provider state. + +def _snapshot_nous_pool_status() -> Dict[str, Any]: + """Best-effort status from the credential pool. + + This is a fallback only. The auth-store provider state is the runtime source + of truth because it is what ``resolve_nous_runtime_credentials()`` refreshes + and mints against. """ - # Check credential pool first — the dashboard device-code flow saves - # here but may not have written to the auth store yet. try: from agent.credential_pool import load_pool - pool = load_pool("nous") - if pool and pool.has_credentials(): - entry = pool.select() - if entry is not None: - access_token = ( - getattr(entry, "access_token", None) - or getattr(entry, "runtime_api_key", "") - ) - if access_token: - return { - "logged_in": True, - "portal_base_url": getattr(entry, "portal_base_url", None) - or getattr(entry, "base_url", None), - "inference_base_url": getattr(entry, "inference_base_url", None) - or getattr(entry, "base_url", None), - "access_token": access_token, - "access_expires_at": getattr(entry, "expires_at", None), - "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None), - "has_refresh_token": bool(getattr(entry, "refresh_token", None)), - } - except Exception: - pass - # Fall back to auth-store provider state - state = get_provider_auth_state("nous") - if not state: + pool = load_pool("nous") + if not pool or not pool.has_credentials(): + return _empty_nous_auth_status() + + entries = list(pool.entries()) + if not entries: + return _empty_nous_auth_status() + + def _entry_sort_key(entry: Any) -> tuple[float, float, int]: + agent_exp = _parse_iso_timestamp(getattr(entry, "agent_key_expires_at", None)) or 0.0 + access_exp = _parse_iso_timestamp(getattr(entry, "expires_at", None)) or 0.0 + priority = int(getattr(entry, "priority", 0) or 0) + return (agent_exp, access_exp, -priority) + + entry = max(entries, key=_entry_sort_key) + access_token = ( + getattr(entry, "access_token", None) + or getattr(entry, "runtime_api_key", "") + ) + if not access_token: + return _empty_nous_auth_status() + return { - "logged_in": False, - "portal_base_url": None, - "inference_base_url": None, - "access_expires_at": None, - "agent_key_expires_at": None, - "has_refresh_token": False, + "logged_in": True, + "portal_base_url": getattr(entry, "portal_base_url", None) + or getattr(entry, "base_url", None), + "inference_base_url": getattr(entry, "inference_base_url", None) + or getattr(entry, "base_url", None), + "access_token": access_token, + "access_expires_at": getattr(entry, "expires_at", None), + "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None), + "has_refresh_token": bool(getattr(entry, "refresh_token", None)), + "source": f"pool:{getattr(entry, 'label', 'unknown')}", } - return { - "logged_in": bool(state.get("access_token")), - "portal_base_url": state.get("portal_base_url"), - "inference_base_url": state.get("inference_base_url"), - "access_expires_at": state.get("expires_at"), - "agent_key_expires_at": state.get("agent_key_expires_at"), - "has_refresh_token": bool(state.get("refresh_token")), - } + except Exception: + return _empty_nous_auth_status() + + +def get_nous_auth_status() -> Dict[str, Any]: + """Status snapshot for Nous auth. + + Prefer the auth-store provider state, because that is the live source of + truth for refresh + mint operations. When provider state exists, validate it + by resolving runtime credentials so revoked refresh sessions do not show up + as a healthy login. If provider state is absent, fall back to the credential + pool for the just-logged-in / not-yet-promoted case. + """ + state = get_provider_auth_state("nous") + if state: + base_status = { + "logged_in": bool(state.get("access_token")), + "portal_base_url": state.get("portal_base_url"), + "inference_base_url": state.get("inference_base_url"), + "access_expires_at": state.get("expires_at"), + "agent_key_expires_at": state.get("agent_key_expires_at"), + "has_refresh_token": bool(state.get("refresh_token")), + "access_token": state.get("access_token"), + "source": "auth_store", + } + try: + creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=60) + refreshed_state = get_provider_auth_state("nous") or state + base_status.update( + { + "logged_in": True, + "portal_base_url": refreshed_state.get("portal_base_url") or base_status.get("portal_base_url"), + "inference_base_url": creds.get("base_url") + or refreshed_state.get("inference_base_url") + or base_status.get("inference_base_url"), + "access_expires_at": refreshed_state.get("expires_at") or base_status.get("access_expires_at"), + "agent_key_expires_at": creds.get("expires_at") + or refreshed_state.get("agent_key_expires_at") + or base_status.get("agent_key_expires_at"), + "has_refresh_token": bool(refreshed_state.get("refresh_token")), + "source": f"runtime:{creds.get('source', 'portal')}", + "key_id": creds.get("key_id"), + } + ) + return base_status + except AuthError as exc: + base_status.update({ + "logged_in": False, + "error": str(exc), + "relogin_required": bool(getattr(exc, "relogin_required", False)), + "error_code": getattr(exc, "code", None), + }) + return base_status + + return _snapshot_nous_pool_status() def get_codex_auth_status() -> Dict[str, Any]: @@ -2624,6 +3395,8 @@ def get_external_process_provider_status(provider_id: str) -> Dict[str, Any]: def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: """Generic auth status dispatcher.""" target = provider_id or get_active_provider() + if target == "spotify": + return get_spotify_auth_status() if target == "nous": return get_nous_auth_status() if target == "openai-codex": @@ -2796,6 +3569,46 @@ def _update_config_for_provider( return config_path +def _get_config_provider() -> Optional[str]: + """Return model.provider from config.yaml, normalized, if present.""" + try: + config = read_raw_config() + except Exception: + return None + if not config: + return None + model = config.get("model") + if not isinstance(model, dict): + return None + provider = model.get("provider") + if not isinstance(provider, str): + return None + provider = provider.strip().lower() + return provider or None + + +def _config_provider_matches(provider_id: Optional[str]) -> bool: + """Return True when config.yaml currently selects *provider_id*.""" + if not provider_id: + return False + return _get_config_provider() == provider_id.strip().lower() + + +def _logout_default_provider_from_config() -> Optional[str]: + """Fallback logout target when auth.json has no active provider. + + `hermes logout` historically keyed off auth.json.active_provider only. + That left users stuck when auth state had already been cleared but + config.yaml still selected an OAuth provider such as openai-codex for the + agent model: there was no active auth provider to target, so logout printed + "No provider is currently logged in" and never reset model.provider. + """ + provider = _get_config_provider() + if provider in {"nous", "openai-codex"}: + return provider + return None + + def _reset_config_provider() -> Path: """Reset config.yaml provider back to auto after logout.""" config_path = get_config_path() @@ -3016,52 +3829,61 @@ def login_command(args) -> None: raise SystemExit(0) -def _login_openai_codex(args, pconfig: ProviderConfig) -> None: +def _login_openai_codex( + args, + pconfig: ProviderConfig, + *, + force_new_login: bool = False, +) -> None: """OpenAI Codex login via device code flow. Tokens stored in ~/.hermes/auth.json.""" + del args, pconfig # kept for parity with other provider login helpers + # Check for existing Hermes-owned credentials - try: - existing = resolve_codex_runtime_credentials() - # Verify the resolved token is actually usable (not expired). - # resolve_codex_runtime_credentials attempts refresh, so if we get - # here the token should be valid — but double-check before telling - # the user "Login successful!". - _resolved_key = existing.get("api_key", "") - if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60): - print("Existing Codex credentials found in Hermes auth store.") - try: - reuse = input("Use existing credentials? [Y/n]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - reuse = "y" - if reuse in ("", "y", "yes"): - config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL)) - print() - print("Login successful!") - print(f" Config updated: {config_path} (model.provider=openai-codex)") - return - else: - print("Existing Codex credentials are expired. Starting fresh login...") - except AuthError: - pass + if not force_new_login: + try: + existing = resolve_codex_runtime_credentials() + # Verify the resolved token is actually usable (not expired). + # resolve_codex_runtime_credentials attempts refresh, so if we get + # here the token should be valid — but double-check before telling + # the user "Login successful!". + _resolved_key = existing.get("api_key", "") + if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60): + print("Existing Codex credentials found in Hermes auth store.") + try: + reuse = input("Use existing credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + reuse = "y" + if reuse in ("", "y", "yes"): + config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL)) + print() + print("Login successful!") + print(f" Config updated: {config_path} (model.provider=openai-codex)") + return + else: + print("Existing Codex credentials are expired. Starting fresh login...") + except AuthError: + pass # Check for existing Codex CLI tokens we can import - cli_tokens = _import_codex_cli_tokens() - if cli_tokens: - print("Found existing Codex CLI credentials at ~/.codex/auth.json") - print("Hermes will create its own session to avoid conflicts with Codex CLI / VS Code.") - try: - do_import = input("Import these credentials? (a separate login is recommended) [y/N]: ").strip().lower() - except (EOFError, KeyboardInterrupt): - do_import = "n" - if do_import in ("y", "yes"): - _save_codex_tokens(cli_tokens) - base_url = os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") or DEFAULT_CODEX_BASE_URL - config_path = _update_config_for_provider("openai-codex", base_url) - print() - print("Credentials imported. Note: if Codex CLI refreshes its token,") - print("Hermes will keep working independently with its own session.") - print(f" Config updated: {config_path} (model.provider=openai-codex)") - return + if not force_new_login: + cli_tokens = _import_codex_cli_tokens() + if cli_tokens: + print("Found existing Codex CLI credentials at ~/.codex/auth.json") + print("Hermes will create its own session to avoid conflicts with Codex CLI / VS Code.") + try: + do_import = input("Import these credentials? (a separate login is recommended) [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "n" + if do_import in ("y", "yes"): + _save_codex_tokens(cli_tokens) + base_url = os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") or DEFAULT_CODEX_BASE_URL + config_path = _update_config_for_provider("openai-codex", base_url) + print() + print("Credentials imported. Note: if Codex CLI refreshes its token,") + print("Hermes will keep working independently with its own session.") + print(f" Config updated: {config_path} (model.provider=openai-codex)") + return # Run a fresh device code flow — Hermes gets its own OAuth session print() @@ -3489,20 +4311,21 @@ def logout_command(args) -> None: """Clear auth state for a provider.""" provider_id = getattr(args, "provider", None) - if provider_id and provider_id not in PROVIDER_REGISTRY: + if provider_id and not is_known_auth_provider(provider_id): print(f"Unknown provider: {provider_id}") raise SystemExit(1) active = get_active_provider() - target = provider_id or active + target = provider_id or active or _logout_default_provider_from_config() if not target: print("No provider is currently logged in.") return - provider_name = PROVIDER_REGISTRY[target].name if target in PROVIDER_REGISTRY else target + config_matches = _config_provider_matches(target) + provider_name = get_auth_provider_display_name(target) - if clear_provider_auth(target): + if clear_provider_auth(target) or config_matches: _reset_config_provider() print(f"Logged out of {provider_name}.") if os.getenv("OPENROUTER_API_KEY"): diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 9c3320010..94ea2559c 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -110,18 +110,40 @@ def _display_source(source: str) -> str: return source.split(":", 1)[1] if source.startswith("manual:") else source +def _classify_exhausted_status(entry) -> tuple[str, bool]: + code = getattr(entry, "last_error_code", None) + reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower() + message = str(getattr(entry, "last_error_message", "") or "").strip().lower() + + if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any( + token in message for token in ("rate limit", "usage limit", "quota", "too many requests") + ): + return "rate-limited", True + + if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any( + token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication") + ): + return "auth failed", False + + return "exhausted", True + + + def _format_exhausted_status(entry) -> str: if entry.last_status != STATUS_EXHAUSTED: return "" + label, show_retry_window = _classify_exhausted_status(entry) reason = getattr(entry, "last_error_reason", None) reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else "" code = f" ({entry.last_error_code})" if entry.last_error_code else "" + if not show_retry_window: + return f" {label}{reason_text}{code} (re-auth may be required)" exhausted_until = _exhausted_until(entry) if exhausted_until is None: - return f" exhausted{reason_text}{code}" + return f" {label}{reason_text}{code}" remaining = max(0, int(math.ceil(exhausted_until - time.time()))) if remaining <= 0: - return f" exhausted{reason_text}{code} (ready to retry)" + return f" {label}{reason_text}{code} (ready to retry)" minutes, seconds = divmod(remaining, 60) hours, minutes = divmod(minutes, 60) days, hours = divmod(hours, 24) @@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str: wait = f"{minutes}m {seconds}s" else: wait = f"{seconds}s" - return f" exhausted{reason_text}{code} ({wait} left)" + return f" {label}{reason_text}{code} ({wait} left)" def auth_add_command(args) -> None: @@ -386,6 +408,44 @@ def auth_reset_command(args) -> None: print(f"Reset status on {count} {provider} credentials") +def auth_status_command(args) -> None: + provider = _normalize_provider(getattr(args, "provider", "") or "") + if not provider: + raise SystemExit("Provider is required. Example: `hermes auth status spotify`.") + status = auth_mod.get_auth_status(provider) + if not status.get("logged_in"): + reason = status.get("error") + if reason: + print(f"{provider}: logged out ({reason})") + else: + print(f"{provider}: logged out") + return + + print(f"{provider}: logged in") + for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"): + value = status.get(key) + if value: + print(f" {key}: {value}") + + +def auth_logout_command(args) -> None: + auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None))) + + +def auth_spotify_command(args) -> None: + action = str(getattr(args, "spotify_action", "") or "login").strip().lower() + if action in {"", "login"}: + auth_mod.login_spotify_command(args) + return + if action == "status": + auth_status_command(SimpleNamespace(provider="spotify")) + return + if action == "logout": + auth_logout_command(SimpleNamespace(provider="spotify")) + return + raise SystemExit(f"Unknown Spotify auth action: {action}") + + def _interactive_auth() -> None: """Interactive credential pool management when `hermes auth` is called bare.""" # Show current pool status first @@ -583,5 +643,14 @@ def auth_command(args) -> None: if action == "reset": auth_reset_command(args) return + if action == "status": + auth_status_command(args) + return + if action == "logout": + auth_logout_command(args) + return + if action == "spotify": + auth_spotify_command(args) + return # No subcommand — launch interactive mode _interactive_auth() diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index fb6068a81..0f792592f 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]: return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)} +_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag" +_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved + + +def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]: + """Return ``(tag, release_url)`` for the latest git tag, or None. + + Local-only — runs ``git describe --tags --abbrev=0`` against the + Hermes checkout. Cached per-process. Release URL always points at the + canonical NousResearch/hermes-agent repo (forks don't get a link). + """ + global _latest_release_cache + if _latest_release_cache is not None: + return _latest_release_cache or None + + repo_dir = repo_dir or _resolve_repo_dir() + if repo_dir is None: + _latest_release_cache = () # falsy sentinel — skip future lookups + return None + + try: + result = subprocess.run( + ["git", "describe", "--tags", "--abbrev=0"], + capture_output=True, + text=True, + timeout=3, + cwd=str(repo_dir), + ) + except Exception: + _latest_release_cache = () + return None + + if result.returncode != 0: + _latest_release_cache = () + return None + + tag = (result.stdout or "").strip() + if not tag: + _latest_release_cache = () + return None + + url = f"{_RELEASE_URL_BASE}/{tag}" + _latest_release_cache = (tag, url) + return _latest_release_cache + + def format_banner_version_label() -> str: """Return the version label shown in the startup banner title.""" base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})" @@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str, agent_name = _skin_branding("agent_name", "Hermes Agent") title_color = _skin_color("banner_title", "#FFD700") border_color = _skin_color("banner_border", "#CD7F32") + version_label = format_banner_version_label() + release_info = get_latest_release_tag() + if release_info: + _tag, _url = release_info + title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]" + else: + title_markup = f"[bold {title_color}]{version_label}[/]" outer_panel = Panel( layout_table, - title=f"[bold {title_color}]{format_banner_version_label()}[/]", + title=title_markup, border_style=border_color, padding=(0, 2), ) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 87d73af58..efff57180 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -77,7 +77,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("rollback", "List or restore filesystem checkpoints", "Session", args_hint="[number]"), CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session", - aliases=("snap",), args_hint="[create|restore |prune]"), + cli_only=True, aliases=("snap",), args_hint="[create|restore |prune]"), CommandDef("stop", "Kill all running background processes", "Session"), CommandDef("approve", "Approve a pending dangerous command", "Session", gateway_only=True, args_hint="[session|always]"), @@ -104,9 +104,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("config", "Show current configuration", "Configuration", cli_only=True), CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"), - CommandDef("provider", "Show available providers and current provider", - "Configuration"), - CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"), + CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info", + cli_only=True), CommandDef("personality", "Set a predefined personality", "Configuration", args_hint="[name]"), @@ -124,9 +123,12 @@ COMMAND_REGISTRY: list[CommandDef] = [ args_hint="[normal|fast|status]", subcommands=("normal", "fast", "status", "on", "off")), CommandDef("skin", "Show or change the display skin/theme", "Configuration", - args_hint="[name]"), + cli_only=True, args_hint="[name]"), CommandDef("voice", "Toggle voice mode", "Configuration", args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")), + CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration", + cli_only=True, args_hint="[queue|interrupt|status]", + subcommands=("queue", "interrupt", "status")), # Tools & Skills CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills", @@ -139,7 +141,8 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("cron", "Manage scheduled tasks", "Tools & Skills", cli_only=True, args_hint="[subcommand]", subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")), - CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"), + CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills", + cli_only=True), CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills", aliases=("reload_mcp",)), CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills", @@ -317,7 +320,7 @@ def should_bypass_active_session(command_name: str | None) -> bool: safety net in gateway.run discards any command text that reaches the pending queue — which meant a mid-run /model (or /reasoning, /voice, /insights, /title, /resume, /retry, /undo, /compress, - /usage, /provider, /reload-mcp, /sethome, /reset) would silently + /usage, /reload-mcp, /sethome, /reset) would silently interrupt the agent AND get discarded, producing a zero-char response. See issue #5057 / PRs #6252, #10370, #4665. diff --git a/hermes_cli/config.py b/hermes_cli/config.py index cfcc7ff28..7678287a0 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -466,6 +466,12 @@ DEFAULT_CONFIG = { "record_sessions": False, # Auto-record browser sessions as WebM videos "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) "cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome + # CDP supervisor — dialog + frame detection via a persistent WebSocket. + # Active only when a CDP-capable backend is attached (Browserbase or + # local Chrome via /browser connect). See + # website/docs/developer-guide/browser-supervisor.md. + "dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept + "dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond "camofox": { # When true, Hermes sends a stable profile-scoped userId to Camofox # so the server maps it to a persistent Firefox profile automatically. @@ -486,7 +492,27 @@ DEFAULT_CONFIG = { # exceed this are rejected with guidance to use offset+limit. # 100K chars ≈ 25–35K tokens across typical tokenisers. "file_read_max_chars": 100_000, - + + # Tool-output truncation thresholds. When terminal output or a + # single read_file page exceeds these limits, Hermes truncates the + # payload sent to the model (keeping head + tail for terminal, + # enforcing pagination for read_file). Tuning these trades context + # footprint against how much raw output the model can see in one + # shot. Ported from anomalyco/opencode PR #23770. + # + # - max_bytes: terminal_tool output cap, in chars + # (default 50_000 ≈ 12-15K tokens). + # - max_lines: read_file pagination cap — the maximum `limit` + # a single read_file call can request before + # being clamped (default 2000). + # - max_line_length: per-line cap applied when read_file emits a + # line-numbered view (default 2000 chars). + "tool_output": { + "max_bytes": 50_000, + "max_lines": 2000, + "max_line_length": 2000, + }, + "compression": { "enabled": True, "threshold": 0.50, # compress when context usage exceeds this ratio @@ -495,6 +521,12 @@ DEFAULT_CONFIG = { }, + # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). + # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. + "prompt_caching": { + "cache_ttl": "5m", + }, + # AWS Bedrock provider configuration. # Only used when model.provider is "bedrock". "bedrock": { @@ -739,6 +771,10 @@ DEFAULT_CONFIG = { "inherit_mcp_toolsets": True, "max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget, # independent of the parent's max_iterations) + "child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s, + # no ceiling). High-reasoning models on large tasks + # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets; + # raise if children time out before producing output. "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling diff --git a/hermes_cli/copilot_auth.py b/hermes_cli/copilot_auth.py index 24859da1a..348e4efe8 100644 --- a/hermes_cli/copilot_auth.py +++ b/hermes_cli/copilot_auth.py @@ -275,6 +275,99 @@ def copilot_device_code_login( return None +# ─── Copilot Token Exchange ──────────────────────────────────────────────── + +# Module-level cache for exchanged Copilot API tokens. +# Maps raw_token_fingerprint -> (api_token, expires_at_epoch). +_jwt_cache: dict[str, tuple[str, float]] = {} +_JWT_REFRESH_MARGIN_SECONDS = 120 # refresh 2 min before expiry + +# Token exchange endpoint and headers (matching VS Code / Copilot CLI) +_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token" +_EDITOR_VERSION = "vscode/1.104.1" +_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7" + + +def _token_fingerprint(raw_token: str) -> str: + """Short fingerprint of a raw token for cache keying (avoids storing full token).""" + import hashlib + return hashlib.sha256(raw_token.encode()).hexdigest()[:16] + + +def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]: + """Exchange a raw GitHub token for a short-lived Copilot API token. + + Calls ``GET https://api.github.com/copilot_internal/v2/token`` with + the raw GitHub token and returns ``(api_token, expires_at)``. + + The returned token is a semicolon-separated string (not a standard JWT) + used as ``Authorization: Bearer `` for Copilot API requests. + + Results are cached in-process and reused until close to expiry. + Raises ``ValueError`` on failure. + """ + import urllib.request + + fp = _token_fingerprint(raw_token) + + # Check cache first + cached = _jwt_cache.get(fp) + if cached: + api_token, expires_at = cached + if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS: + return api_token, expires_at + + req = urllib.request.Request( + _TOKEN_EXCHANGE_URL, + method="GET", + headers={ + "Authorization": f"token {raw_token}", + "User-Agent": _EXCHANGE_USER_AGENT, + "Accept": "application/json", + "Editor-Version": _EDITOR_VERSION, + }, + ) + + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + except Exception as exc: + raise ValueError(f"Copilot token exchange failed: {exc}") from exc + + api_token = data.get("token", "") + expires_at = data.get("expires_at", 0) + if not api_token: + raise ValueError("Copilot token exchange returned empty token") + + # Convert expires_at to float if needed + expires_at = float(expires_at) if expires_at else time.time() + 1800 + + _jwt_cache[fp] = (api_token, expires_at) + logger.debug( + "Copilot token exchanged, expires_at=%s", + expires_at, + ) + return api_token, expires_at + + +def get_copilot_api_token(raw_token: str) -> str: + """Exchange a raw GitHub token for a Copilot API token, with fallback. + + Convenience wrapper: returns the exchanged token on success, or the + raw token unchanged if the exchange fails (e.g. network error, unsupported + account type). This preserves existing behaviour for accounts that don't + need exchange while enabling access to internal-only models for those that do. + """ + if not raw_token: + return raw_token + try: + api_token, _ = exchange_copilot_token(raw_token) + return api_token + except Exception as exc: + logger.debug("Copilot token exchange failed, using raw token: %s", exc) + return raw_token + + # ─── Copilot API Headers ─────────────────────────────────────────────────── def copilot_request_headers( diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index e0ab6007a..78639d465 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -93,6 +93,9 @@ def cron_list(show_all: bool = False): script = job.get("script") if script: print(f" Script: {script}") + workdir = job.get("workdir") + if workdir: + print(f" Workdir: {workdir}") # Execution history last_status = job.get("last_status") @@ -168,6 +171,7 @@ def cron_create(args): skill=getattr(args, "skill", None), skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)), script=getattr(args, "script", None), + workdir=getattr(args, "workdir", None), ) if not result.get("success"): print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED)) @@ -180,6 +184,8 @@ def cron_create(args): job_data = result.get("job", {}) if job_data.get("script"): print(f" Script: {job_data['script']}") + if job_data.get("workdir"): + print(f" Workdir: {job_data['workdir']}") print(f" Next run: {result['next_run_at']}") return 0 @@ -218,6 +224,7 @@ def cron_edit(args): repeat=getattr(args, "repeat", None), skills=final_skills, script=getattr(args, "script", None), + workdir=getattr(args, "workdir", None), ) if not result.get("success"): print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED)) @@ -233,6 +240,8 @@ def cron_edit(args): print(" Skills: none") if updated.get("script"): print(f" Script: {updated['script']}") + if updated.get("workdir"): + print(f" Workdir: {updated['workdir']}") return 0 diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 064b1d68d..cba4ebcdd 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -29,6 +29,7 @@ if _env_path.exists(): load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8") from hermes_cli.colors import Colors, color +from hermes_cli.models import _HERMES_USER_AGENT from hermes_constants import OPENROUTER_MODELS_URL from utils import base_url_host_matches @@ -295,16 +296,33 @@ def run_doctor(args): except Exception: pass try: - from hermes_cli.auth import resolve_provider as _resolve_provider + from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers + from hermes_cli.providers import resolve_provider_full as _resolve_provider_full except Exception: - _resolve_provider = None + _compatible_custom_providers = None + _resolve_provider_full = None + + custom_providers = [] + if _compatible_custom_providers is not None: + try: + custom_providers = _compatible_custom_providers(cfg) + except Exception: + custom_providers = [] + + user_providers = cfg.get("providers") + if isinstance(user_providers, dict): + known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip()) + for entry in custom_providers: + if not isinstance(entry, dict): + continue + name = str(entry.get("name") or "").strip() + if name: + known_providers.add("custom:" + name.lower().replace(" ", "-")) canonical_provider = provider - if provider and _resolve_provider is not None and provider != "auto": - try: - canonical_provider = _resolve_provider(provider) - except Exception: - canonical_provider = None + if provider and _resolve_provider_full is not None and provider != "auto": + provider_def = _resolve_provider_full(provider, user_providers, custom_providers) + canonical_provider = provider_def.id if provider_def is not None else None if provider and provider != "auto": if canonical_provider is None or (known_providers and canonical_provider not in known_providers): @@ -957,7 +975,10 @@ def run_doctor(args): if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"): _base = _base.rstrip("/") + "/v1" _url = (_base.rstrip("/") + "/models") if _base else _default_url - _headers = {"Authorization": f"Bearer {_key}"} + _headers = { + "Authorization": f"Bearer {_key}", + "User-Agent": _HERMES_USER_AGENT, + } if base_url_host_matches(_base, "api.kimi.com"): _headers["User-Agent"] = "claude-code/0.1.0" _resp = httpx.get( diff --git a/hermes_cli/dump.py b/hermes_cli/dump.py index 90364a261..3d7280244 100644 --- a/hermes_cli/dump.py +++ b/hermes_cli/dump.py @@ -267,6 +267,8 @@ def run_dump(args): ("ANTHROPIC_API_KEY", "anthropic"), ("ANTHROPIC_TOKEN", "anthropic_token"), ("NOUS_API_KEY", "nous"), + ("GOOGLE_API_KEY", "google/gemini"), + ("GEMINI_API_KEY", "gemini"), ("GLM_API_KEY", "glm/zai"), ("ZAI_API_KEY", "zai"), ("KIMI_API_KEY", "kimi"), diff --git a/hermes_cli/main.py b/hermes_cli/main.py index d7de30960..7de68d2cb 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -166,6 +166,27 @@ from hermes_cli.env_loader import load_hermes_dotenv load_hermes_dotenv(project_env=PROJECT_ROOT / ".env") +# Bridge security.redact_secrets from config.yaml → HERMES_REDACT_SECRETS env +# var BEFORE hermes_logging imports agent.redact (which snapshots the flag at +# module-import time). Without this, config.yaml's toggle is ignored because +# the setup_logging() call below imports agent.redact, which reads the env var +# exactly once. Env var in .env still wins — this is config.yaml fallback only. +try: + if "HERMES_REDACT_SECRETS" not in os.environ: + import yaml as _yaml_early + _cfg_path = get_hermes_home() / "config.yaml" + if _cfg_path.exists(): + with open(_cfg_path, encoding="utf-8") as _f: + _early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {}) + if isinstance(_early_sec_cfg, dict): + _early_redact = _early_sec_cfg.get("redact_secrets") + if _early_redact is not None: + os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower() + del _early_sec_cfg + del _cfg_path +except Exception: + pass # best-effort — redaction stays at default (enabled) on config errors + # Initialize centralized file logging early — all `hermes` subcommands # (chat, setup, gateway, config, etc.) write to agent.log + errors.log. try: @@ -1429,6 +1450,7 @@ def select_provider_and_model(args=None): load_config, get_env_value, ) + from hermes_cli.providers import resolve_provider_full config = load_config() current_model = config.get("model") @@ -1446,14 +1468,30 @@ def select_provider_and_model(args=None): effective_provider = ( config_provider or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto" ) - try: - active = resolve_provider(effective_provider) - except AuthError as exc: - warning = format_auth_error(exc) - print(f"Warning: {warning} Falling back to auto provider detection.") + compatible_custom_providers = get_compatible_custom_providers(config) + active = None + if effective_provider != "auto": + active_def = resolve_provider_full( + effective_provider, + config.get("providers"), + compatible_custom_providers, + ) + if active_def is not None: + active = active_def.id + else: + warning = ( + f"Unknown provider '{effective_provider}'. Check 'hermes model' for " + "available providers, or run 'hermes doctor' to diagnose config " + "issues." + ) + print(f"Warning: {warning} Falling back to auto provider detection.") + if active is None: try: active = resolve_provider("auto") - except AuthError: + except AuthError as exc: + if effective_provider == "auto": + warning = format_auth_error(exc) + print(f"Warning: {warning} Falling back to auto provider detection.") active = None # no provider yet; default to first in list # Detect custom endpoint @@ -2311,7 +2349,41 @@ def _model_flow_openai_codex(config, current_model=""): from hermes_cli.codex_models import get_codex_model_ids status = get_codex_auth_status() - if not status.get("logged_in"): + if status.get("logged_in"): + print(" OpenAI Codex credentials: ✓") + print() + print(" 1. Use existing credentials") + print(" 2. Reauthenticate (new OAuth login)") + print(" 3. Cancel") + print() + try: + choice = input(" Choice [1/2/3]: ").strip() + except (KeyboardInterrupt, EOFError): + choice = "1" + + if choice == "2": + print("Starting a fresh OpenAI Codex login...") + print() + try: + mock_args = argparse.Namespace() + _login_openai_codex( + mock_args, + PROVIDER_REGISTRY["openai-codex"], + force_new_login=True, + ) + except SystemExit: + print("Login cancelled or failed.") + return + except Exception as exc: + print(f"Login failed: {exc}") + return + status = get_codex_auth_status() + if not status.get("logged_in"): + print("Login failed.") + return + elif choice == "3": + return + else: print("Not logged into OpenAI Codex. Starting login...") print() try: @@ -2828,11 +2900,16 @@ def _model_flow_named_custom(config, provider_info): name = provider_info["name"] base_url = provider_info["base_url"] + api_mode = provider_info.get("api_mode", "") api_key = provider_info.get("api_key", "") key_env = provider_info.get("key_env", "") saved_model = provider_info.get("model", "") provider_key = (provider_info.get("provider_key") or "").strip() + # Resolve key from env var if api_key not set directly + if not api_key and key_env: + api_key = os.environ.get(key_env, "") + print(f" Provider: {name}") print(f" URL: {base_url}") if saved_model: @@ -2840,7 +2917,10 @@ def _model_flow_named_custom(config, provider_info): print() print("Fetching available models...") - models = fetch_api_models(api_key, base_url, timeout=8.0) + models = fetch_api_models( + api_key, base_url, timeout=8.0, + api_mode=api_mode or None, + ) if models: default_idx = 0 @@ -3930,12 +4010,71 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): print("Cancelled.") return save_env_value(key_env, new_key) + existing_key = new_key print("API key saved.") print() else: print(f" {pconfig.name} API key: {existing_key[:8]}... ✓") print() + # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash) + # are exhausted in a handful of agent turns, so refuse to wire up the + # provider with a free-tier key. Probe is best-effort; network or auth + # errors fall through without blocking. + if provider_id == "gemini" and existing_key: + try: + from agent.gemini_native_adapter import probe_gemini_tier + except Exception: + probe_gemini_tier = None + if probe_gemini_tier is not None: + print(" Checking Gemini API tier...") + probe_base = ( + (get_env_value(base_url_env) if base_url_env else "") + or os.getenv(base_url_env or "", "") + or pconfig.inference_base_url + ) + tier = probe_gemini_tier(existing_key, probe_base) + if tier == "free": + print() + print( + "❌ This Google API key is on the free tier " + "(<= 250 requests/day for gemini-2.5-flash)." + ) + print( + " Hermes typically makes 3-10 API calls per user turn " + "(tool iterations + auxiliary tasks)," + ) + print( + " so the free tier is exhausted after a handful of " + "messages and cannot sustain" + ) + print(" an agent session.") + print() + print( + " To use Gemini with Hermes, enable billing on your " + "Google Cloud project and regenerate" + ) + print( + " the key in a billing-enabled project: " + "https://aistudio.google.com/apikey" + ) + print() + print( + " Alternatives with workable free usage: DeepSeek, " + "OpenRouter (free models), Groq, Nous." + ) + print() + print("Not saving Gemini as the default provider.") + return + if tier == "paid": + print(" Tier check: paid ✓") + else: + # "unknown" -- network issue, auth problem, unexpected response. + # Don't block; the runtime 429 handler will surface free-tier + # guidance if the key turns out to be free tier. + print(" Tier check: could not verify (proceeding anyway).") + print() + # Optional base URL override current_base = "" if base_url_env: @@ -4177,6 +4316,8 @@ def _model_flow_anthropic(config, current_model=""): from agent.anthropic_adapter import ( read_claude_code_credentials, is_claude_code_token_valid, + _is_oauth_token, + _resolve_claude_code_token_from_credentials, ) cc_creds = read_claude_code_credentials() @@ -4185,7 +4326,14 @@ def _model_flow_anthropic(config, current_model=""): except Exception: pass - has_creds = bool(existing_key) or cc_available + # Stale-OAuth guard: if the only existing cred is an expired OAuth token + # (no valid cc_creds to fall back on), treat it as missing so the re-auth + # path is offered instead of silently accepting a broken token. + existing_is_stale_oauth = False + if existing_key and _is_oauth_token(existing_key) and not cc_available: + existing_is_stale_oauth = True + + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available needs_auth = not has_creds if has_creds: @@ -6567,9 +6715,15 @@ def cmd_dashboard(args): try: import fastapi # noqa: F401 import uvicorn # noqa: F401 - except ImportError: - print("Web UI dependencies not installed.") - print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'") + except ImportError as e: + print("Web UI dependencies not installed (need fastapi + uvicorn).") + print( + f"Re-install the package into this interpreter so metadata updates apply:\n" + f" cd {PROJECT_ROOT}\n" + f" {sys.executable} -m pip install -e .\n" + "If `pip` is missing in this venv, use: uv pip install -e ." + ) + print(f"Import error: {e}") sys.exit(1) if "HERMES_WEB_DIST" not in os.environ: @@ -6578,11 +6732,13 @@ def cmd_dashboard(args): from hermes_cli.web_server import start_server + embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1" start_server( host=args.host, port=args.port, open_browser=not args.no_open, allow_public=getattr(args, "insecure", False), + embedded_chat=embedded_chat, ) @@ -7185,7 +7341,7 @@ For more help on a command: ) logout_parser.add_argument( "--provider", - choices=["nous", "openai-codex"], + choices=["nous", "openai-codex", "spotify"], default=None, help="Provider to log out from (default: active provider)", ) @@ -7242,6 +7398,17 @@ For more help on a command: "reset", help="Clear exhaustion status for all credentials for a provider" ) auth_reset.add_argument("provider", help="Provider id") + auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider") + auth_status.add_argument("provider", help="Provider id") + auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state") + auth_logout.add_argument("provider", help="Provider id") + auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE") + auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login") + auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)") + auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app") + auth_spotify.add_argument("--scope", help="Override requested Spotify scopes") + auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically") + auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds") auth_parser.set_defaults(func=cmd_auth) # ========================================================================= @@ -7298,6 +7465,10 @@ For more help on a command: "--script", help="Path to a Python script whose stdout is injected into the prompt each run", ) + cron_create.add_argument( + "--workdir", + help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).", + ) # cron edit cron_edit = cron_subparsers.add_parser( @@ -7336,6 +7507,10 @@ For more help on a command: "--script", help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.", ) + cron_edit.add_argument( + "--workdir", + help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.", + ) # lifecycle actions cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job") @@ -8749,6 +8924,14 @@ Examples: action="store_true", help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)", ) + dashboard_parser.add_argument( + "--tui", + action="store_true", + help=( + "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). " + "Alternatively set HERMES_DASHBOARD_TUI=1." + ), + ) dashboard_parser.set_defaults(func=cmd_dashboard) # ========================================================================= diff --git a/hermes_cli/model_normalize.py b/hermes_cli/model_normalize.py index 76dace065..99e6c34e4 100644 --- a/hermes_cli/model_normalize.py +++ b/hermes_cli/model_normalize.py @@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats: model IDs, but Claude still uses hyphenated native names like ``claude-sonnet-4-6``. - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``. -- **DeepSeek** only accepts two model identifiers: - ``deepseek-chat`` and ``deepseek-reasoner``. +- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner`` + (R1-family), and the first-class V-series IDs (``deepseek-v4-pro``, + ``deepseek-v4-flash``, and any future ``deepseek-v-*``). Older + Hermes revisions folded every non-reasoner input into + ``deepseek-chat``, which on aggregators routes to V3 — so a user + picking V4 Pro was silently downgraded. - **Custom** and remaining providers pass the name through as-is. This module centralises that translation so callers can simply write:: @@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern. from __future__ import annotations +import re from typing import Optional # --------------------------------------------------------------------------- @@ -100,6 +105,15 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({ "custom", }) +# Providers whose APIs require lowercase model IDs. Xiaomi's +# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro`` +# that users might copy from marketing docs — it only accepts +# ``mimo-v2.5-pro``. After stripping a matching provider prefix, these +# providers also get ``.lower()`` applied. +_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({ + "xiaomi", +}) + # --------------------------------------------------------------------------- # DeepSeek special handling # --------------------------------------------------------------------------- @@ -115,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({ }) _DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({ - "deepseek-chat", - "deepseek-reasoner", + "deepseek-chat", # V3 on DeepSeek direct and most aggregators + "deepseek-reasoner", # R1-family reasoning model + "deepseek-v4-pro", # V4 Pro — first-class model ID + "deepseek-v4-flash", # V4 Flash — first-class model ID }) +# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``, +# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``). +# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns +# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called +# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases +# of ``deepseek-chat`` and must not be folded into it. +_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$") + def _normalize_for_deepseek(model_name: str) -> str: - """Map any model input to one of DeepSeek's two accepted identifiers. + """Map a model input to a DeepSeek-accepted identifier. Rules: - - Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through. - - Contains any reasoner keyword (r1, think, reasoning, cot, reasoner) + - Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/ + ``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through. + - Matches the V-series pattern ``deepseek-v...`` -> pass through + (covers future ``deepseek-v5-*`` and dated variants without a release). + - Contains a reasoner keyword (r1, think, reasoning, cot, reasoner) -> ``deepseek-reasoner``. - Everything else -> ``deepseek-chat``. @@ -133,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str: model_name: The bare model name (vendor prefix already stripped). Returns: - One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``. + A DeepSeek-accepted model identifier. """ bare = _strip_vendor_prefix(model_name).lower() if bare in _DEEPSEEK_CANONICAL_MODELS: return bare + # V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants) + if _DEEPSEEK_V_SERIES_RE.match(bare): + return bare + # Check for reasoner-like keywords anywhere in the name for keyword in _DEEPSEEK_REASONER_KEYWORDS: if keyword in bare: @@ -347,6 +378,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str: >>> normalize_model_for_provider("claude-sonnet-4.6", "zai") 'claude-sonnet-4.6' + + >>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi") + 'mimo-v2.5-pro' """ name = (model_input or "").strip() if not name: @@ -410,7 +444,12 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str: # --- Direct providers: repair matching provider prefixes only --- if provider in _MATCHING_PREFIX_STRIP_PROVIDERS: - return _strip_matching_provider_prefix(name, provider) + result = _strip_matching_provider_prefix(name, provider) + # Some providers require lowercase model IDs (e.g. Xiaomi's API + # rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro"). + if provider in _LOWERCASE_MODEL_PROVIDERS: + result = result.lower() + return result # --- Authoritative native providers: preserve user-facing slugs as-is --- if provider in _AUTHORITATIVE_NATIVE_PROVIDERS: diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 41fbe36de..cc4ec055f 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -527,6 +527,42 @@ def _resolve_alias_fallback( return None +def resolve_display_context_length( + model: str, + provider: str, + base_url: str = "", + api_key: str = "", + model_info: Optional[ModelInfo] = None, +) -> Optional[int]: + """Resolve the context length to show in /model output. + + models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai) + but provider-enforced limits can be lower (e.g. Codex OAuth caps the + same slug at 272k). The authoritative source is + ``agent.model_metadata.get_model_context_length`` which already knows + about Codex OAuth, Copilot, Nous, and falls back to models.dev for the + rest. + + Prefer the provider-aware value; fall back to ``model_info.context_window`` + only if the resolver returns nothing. + """ + try: + from agent.model_metadata import get_model_context_length + ctx = get_model_context_length( + model, + base_url=base_url or "", + api_key=api_key or "", + provider=provider or None, + ) + if ctx: + return int(ctx) + except Exception: + pass + if model_info is not None and model_info.context_window: + return int(model_info.context_window) + return None + + # --------------------------------------------------------------------------- # Core model-switching pipeline # --------------------------------------------------------------------------- @@ -771,7 +807,10 @@ def switch_model( if provider_changed or explicit_provider: try: - runtime = resolve_runtime_provider(requested=target_provider) + runtime = resolve_runtime_provider( + requested=target_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") @@ -788,7 +827,10 @@ def switch_model( ) else: try: - runtime = resolve_runtime_provider(requested=current_provider) + runtime = resolve_runtime_provider( + requested=current_provider, + target_model=new_model, + ) api_key = runtime.get("api_key", "") base_url = runtime.get("base_url", "") api_mode = runtime.get("api_mode", "") @@ -815,6 +857,7 @@ def switch_model( target_provider, api_key=api_key, base_url=base_url, + api_mode=api_mode or None, ) except Exception as e: validation = { @@ -936,7 +979,7 @@ def list_authenticated_providers( from hermes_cli.auth import PROVIDER_REGISTRY from hermes_cli.models import ( OPENROUTER_MODELS, _PROVIDER_MODELS, - _MODELS_DEV_PREFERRED, _merge_with_models_dev, + _MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids, ) results: List[dict] = [] @@ -984,6 +1027,14 @@ def list_authenticated_providers( # Check if any env var is set has_creds = any(os.environ.get(ev) for ev in env_vars) + if not has_creds: + try: + from hermes_cli.auth import _load_auth_store + store = _load_auth_store() + if store and hermes_id in store.get("credential_pool", {}): + has_creds = True + except Exception: + pass if not has_creds: continue @@ -1095,11 +1146,14 @@ def list_authenticated_providers( if not has_creds: continue - # Use curated list — look up by Hermes slug, fall back to overlay key - model_ids = curated.get(hermes_slug, []) or curated.get(pid, []) - # Merge with models.dev for preferred providers (same rationale as above). - if hermes_slug in _MODELS_DEV_PREFERRED: - model_ids = _merge_with_models_dev(hermes_slug, model_ids) + if hermes_slug in {"copilot", "copilot-acp"}: + model_ids = provider_model_ids(hermes_slug) + else: + # Use curated list — look up by Hermes slug, fall back to overlay key + model_ids = curated.get(hermes_slug, []) or curated.get(pid, []) + # Merge with models.dev for preferred providers (same rationale as above). + if hermes_slug in _MODELS_DEV_PREFERRED: + model_ids = _merge_with_models_dev(hermes_slug, model_ids) total = len(model_ids) top = model_ids[:max_models] @@ -1222,6 +1276,15 @@ def list_authenticated_providers( if m and m not in models_list: models_list.append(m) + # Official OpenAI API rows in providers: often have base_url but no + # explicit models: dict — avoid a misleading zero count in /model. + if not models_list: + url_lower = str(api_url).strip().lower() + if "api.openai.com" in url_lower: + fb = curated.get("openai") or [] + if fb: + models_list = list(fb) + # Try to probe /v1/models if URL is set (but don't block on it) # For now just show what we know from config results.append({ diff --git a/hermes_cli/models.py b/hermes_cli/models.py index a1f2cbec6..3a902ffdf 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"] # (model_id, display description shown in menus) OPENROUTER_MODELS: list[tuple[str, str]] = [ ("moonshotai/kimi-k2.6", "recommended"), + ("deepseek/deepseek-v4-pro", ""), + ("deepseek/deepseek-v4-flash", ""), ("anthropic/claude-opus-4.7", ""), ("anthropic/claude-opus-4.6", ""), ("anthropic/claude-sonnet-4.6", ""), @@ -40,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("anthropic/claude-sonnet-4.5", ""), ("anthropic/claude-haiku-4.5", ""), ("openrouter/elephant-alpha", "free"), - ("openai/gpt-5.4", ""), + ("openai/gpt-5.5", ""), ("openai/gpt-5.4-mini", ""), ("xiaomi/mimo-v2.5-pro", ""), ("xiaomi/mimo-v2.5", ""), @@ -63,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("nvidia/nemotron-3-super-120b-a12b:free", "free"), ("arcee-ai/trinity-large-preview:free", "free"), ("arcee-ai/trinity-large-thinking", ""), - ("openai/gpt-5.4-pro", ""), + ("openai/gpt-5.5-pro", ""), ("openai/gpt-5.4-nano", ""), ] @@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]: _PROVIDER_MODELS: dict[str, list[str]] = { "nous": [ "moonshotai/kimi-k2.6", + "deepseek/deepseek-v4-pro", + "deepseek/deepseek-v4-flash", "xiaomi/mimo-v2.5-pro", "xiaomi/mimo-v2.5", "anthropic/claude-opus-4.7", @@ -116,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "anthropic/claude-sonnet-4.6", "anthropic/claude-sonnet-4.5", "anthropic/claude-haiku-4.5", - "openai/gpt-5.4", + "openai/gpt-5.5", "openai/gpt-5.4-mini", "openai/gpt-5.3-codex", "google/gemini-3-pro-preview", @@ -135,9 +139,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", "arcee-ai/trinity-large-thinking", - "openai/gpt-5.4-pro", + "openai/gpt-5.5-pro", "openai/gpt-5.4-nano", ], + # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and + # provider_model_ids fallback when /v1/models is unavailable. + "openai": [ + "gpt-5.4", + "gpt-5.4-mini", + "gpt-5-mini", + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-4.1", + "gpt-4o", + "gpt-4o-mini", + ], "openai-codex": _codex_curated_models(), "copilot-acp": [ "copilot-acp", @@ -151,10 +167,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "gpt-4.1", "gpt-4o", "gpt-4o-mini", - "claude-opus-4.6", "claude-sonnet-4.6", + "claude-sonnet-4", "claude-sonnet-4.5", "claude-haiku-4.5", + "gemini-3.1-pro-preview", + "gemini-3-pro-preview", + "gemini-3-flash-preview", "gemini-2.5-pro", "grok-code-fast-1", ], @@ -246,6 +265,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "claude-haiku-4-5-20251001", ], "deepseek": [ + "deepseek-v4-pro", + "deepseek-v4-flash", "deepseek-chat", "deepseek-reasoner", ], @@ -676,7 +697,7 @@ def get_nous_recommended_aux_model( # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from -# this list: hermes model, /model, /provider, list_authenticated_providers. +# this list: hermes model, /model, list_authenticated_providers. # # Fields: # slug — internal provider ID (used in config.yaml, --provider flag) @@ -1104,7 +1125,10 @@ def fetch_models_with_pricing( return _pricing_cache[cache_key] url = cache_key.rstrip("/") + "/v1/models" - headers: dict[str, str] = {"Accept": "application/json"} + headers: dict[str, str] = { + "Accept": "application/json", + "User-Agent": _HERMES_USER_AGENT, + } if api_key: headers["Authorization"] = f"Bearer {api_key}" @@ -1736,6 +1760,17 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) live = fetch_ollama_cloud_models(force_refresh=force_refresh) if live: return live + if normalized == "openai": + api_key = os.getenv("OPENAI_API_KEY", "").strip() + if api_key: + base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/") + base = base_raw or "https://api.openai.com/v1" + try: + live = fetch_api_models(api_key, base) + if live: + return live + except Exception: + pass if normalized == "custom": base_url = _get_custom_base_url() if base_url: @@ -1890,6 +1925,51 @@ def fetch_github_model_catalog( return None +# ─── Copilot catalog context-window helpers ───────────────────────────────── + +# Module-level cache: {model_id: max_prompt_tokens} +_copilot_context_cache: dict[str, int] = {} +_copilot_context_cache_time: float = 0.0 +_COPILOT_CONTEXT_CACHE_TTL = 3600 # 1 hour + + +def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]: + """Look up max_prompt_tokens for a Copilot model from the live /models API. + + Results are cached in-process for 1 hour to avoid repeated API calls. + Returns the token limit or None if not found. + """ + global _copilot_context_cache, _copilot_context_cache_time + + # Serve from cache if fresh + if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL): + if model_id in _copilot_context_cache: + return _copilot_context_cache[model_id] + # Cache is fresh but model not in it — don't re-fetch + return None + + # Fetch and populate cache + catalog = fetch_github_model_catalog(api_key=api_key) + if not catalog: + return None + + cache: dict[str, int] = {} + for item in catalog: + mid = str(item.get("id") or "").strip() + if not mid: + continue + caps = item.get("capabilities") or {} + limits = caps.get("limits") or {} + max_prompt = limits.get("max_prompt_tokens") + if isinstance(max_prompt, int) and max_prompt > 0: + cache[mid] = max_prompt + + _copilot_context_cache = cache + _copilot_context_cache_time = time.time() + + return cache.get(model_id) + + def _is_github_models_base_url(base_url: Optional[str]) -> bool: normalized = (base_url or "").strip().rstrip("/").lower() return ( @@ -1923,6 +2003,7 @@ _COPILOT_MODEL_ALIASES = { "openai/o4-mini": "gpt-5-mini", "anthropic/claude-opus-4.6": "claude-opus-4.6", "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6", + "anthropic/claude-sonnet-4": "claude-sonnet-4", "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5", "anthropic/claude-haiku-4.5": "claude-haiku-4.5", # Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use @@ -1932,10 +2013,12 @@ _COPILOT_MODEL_ALIASES = { # "model_not_supported". See issue #6879. "claude-opus-4-6": "claude-opus-4.6", "claude-sonnet-4-6": "claude-sonnet-4.6", + "claude-sonnet-4-0": "claude-sonnet-4", "claude-sonnet-4-5": "claude-sonnet-4.5", "claude-haiku-4-5": "claude-haiku-4.5", "anthropic/claude-opus-4-6": "claude-opus-4.6", "anthropic/claude-sonnet-4-6": "claude-sonnet-4.6", + "anthropic/claude-sonnet-4-0": "claude-sonnet-4", "anthropic/claude-sonnet-4-5": "claude-sonnet-4.5", "anthropic/claude-haiku-4-5": "claude-haiku-4.5", } @@ -2160,8 +2243,15 @@ def probe_api_models( api_key: Optional[str], base_url: Optional[str], timeout: float = 5.0, + api_mode: Optional[str] = None, ) -> dict[str, Any]: - """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics.""" + """Probe a ``/models`` endpoint with light URL heuristics. + + For ``anthropic_messages`` mode, uses ``x-api-key`` and + ``anthropic-version`` headers (Anthropic's native auth) instead of + ``Authorization: Bearer``. The response shape (``data[].id``) is + identical, so the same parser works for both. + """ normalized = (base_url or "").strip().rstrip("/") if not normalized: return { @@ -2193,7 +2283,10 @@ def probe_api_models( tried: list[str] = [] headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT} - if api_key: + if api_key and api_mode == "anthropic_messages": + headers["x-api-key"] = api_key + headers["anthropic-version"] = "2023-06-01" + elif api_key: headers["Authorization"] = f"Bearer {api_key}" if normalized.startswith(COPILOT_BASE_URL): headers.update(copilot_default_headers()) @@ -2235,7 +2328,10 @@ def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]: base_url = AI_GATEWAY_BASE_URL url = base_url.rstrip("/") + "/models" - headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"} + headers: dict[str, str] = { + "Authorization": f"Bearer {api_key}", + "User-Agent": _HERMES_USER_AGENT, + } req = urllib.request.Request(url, headers=headers) try: with urllib.request.urlopen(req, timeout=timeout) as resp: @@ -2255,13 +2351,14 @@ def fetch_api_models( api_key: Optional[str], base_url: Optional[str], timeout: float = 5.0, + api_mode: Optional[str] = None, ) -> Optional[list[str]]: """Fetch the list of available model IDs from the provider's ``/models`` endpoint. Returns a list of model ID strings, or ``None`` if the endpoint could not be reached (network error, timeout, auth failure, etc.). """ - return probe_api_models(api_key, base_url, timeout=timeout).get("models") + return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models") # --------------------------------------------------------------------------- @@ -2389,6 +2486,7 @@ def validate_requested_model( *, api_key: Optional[str] = None, base_url: Optional[str] = None, + api_mode: Optional[str] = None, ) -> dict[str, Any]: """ Validate a ``/model`` value for the active provider. @@ -2430,7 +2528,11 @@ def validate_requested_model( } if normalized == "custom": - probe = probe_api_models(api_key, base_url) + # Try probing with correct auth for the api_mode. + if api_mode == "anthropic_messages": + probe = probe_api_models(api_key, base_url, api_mode=api_mode) + else: + probe = probe_api_models(api_key, base_url) api_models = probe.get("models") if api_models is not None: if requested_for_lookup in set(api_models): @@ -2479,12 +2581,17 @@ def validate_requested_model( f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. " f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification." ) + if api_mode == "anthropic_messages": + message += ( + "\n Many Anthropic-compatible proxies do not implement the Models API " + "(GET /v1/models). The model name has been accepted without verification." + ) if probe.get("suggested_base_url"): message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`" return { - "accepted": False, - "persist": False, + "accepted": api_mode == "anthropic_messages", + "persist": True, "recognized": False, "message": message, } @@ -2572,10 +2679,100 @@ def validate_requested_model( ), } + # Native Anthropic provider: /v1/models requires x-api-key (or Bearer for + # OAuth) plus anthropic-version headers. The generic OpenAI-style probe + # below uses plain Bearer auth and 401s against Anthropic, so dispatch to + # the native fetcher which handles both API keys and Claude-Code OAuth + # tokens. (The api_mode=="anthropic_messages" branch below handles the + # Messages-API transport case separately.) + if normalized == "anthropic": + anthropic_models = _fetch_anthropic_models() + if anthropic_models is not None: + if requested_for_lookup in set(anthropic_models): + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + auto = get_close_matches(requested_for_lookup, anthropic_models, n=1, cutoff=0.9) + if auto: + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": auto[0], + "message": f"Auto-corrected `{requested}` → `{auto[0]}`", + } + suggestions = get_close_matches(requested, anthropic_models, n=3, cutoff=0.5) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) + # Accept anyway — Anthropic sometimes gates newer/preview models + # (e.g. snapshot IDs, early-access releases) behind accounts + # even though they aren't listed on /v1/models. + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in Anthropic's /v1/models listing. " + f"It may still work if you have early-access or snapshot IDs." + f"{suggestion_text}" + ), + } + # _fetch_anthropic_models returned None — no token resolvable or + # network failure. Fall through to the generic warning below. + + # Anthropic Messages API: many proxies don't implement /v1/models. + # Try probing with correct auth; if it fails, accept with a warning. + if api_mode == "anthropic_messages": + api_models = fetch_api_models(api_key, base_url, api_mode=api_mode) + if api_models is not None: + if requested_for_lookup in set(api_models): + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9) + if auto: + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": auto[0], + "message": f"Auto-corrected `{requested}` → `{auto[0]}`", + } + # Probe failed or model not found — accept anyway (proxy likely + # doesn't implement the Anthropic Models API). + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: could not verify `{requested}` against this endpoint's " + f"model listing. Many Anthropic-compatible proxies do not " + f"implement GET /v1/models. The model name has been accepted " + f"without verification." + ), + } + # Probe the live API to check if the model actually exists api_models = fetch_api_models(api_key, base_url) if api_models is not None: + # Gemini's OpenAI-compat /v1beta/openai/models endpoint returns IDs + # prefixed with "models/" (e.g. "models/gemini-2.5-flash") — native + # Gemini-API convention. Our curated list and user input both use + # the bare ID, so a direct set-membership check drops every known + # Gemini model. Strip the prefix before comparison. See #12532. + if normalized == "gemini": + api_models = [ + m[len("models/"):] if isinstance(m, str) and m.startswith("models/") else m + for m in api_models + ] if requested_for_lookup in set(api_models): # API confirmed the model exists return { diff --git a/hermes_cli/platforms.py b/hermes_cli/platforms.py index 1fc3a3a85..05507eace 100644 --- a/hermes_cli/platforms.py +++ b/hermes_cli/platforms.py @@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([ ("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")), ("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")), ("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")), + ("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")), ]) diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 28cb3b1b5..7eb9a400c 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -71,6 +71,14 @@ VALID_HOOKS: Set[str] = { "on_session_finalize", "on_session_reset", "subagent_stop", + # Gateway pre-dispatch hook. Fired once per incoming MessageEvent + # after the internal-event guard but BEFORE auth/pairing and agent + # dispatch. Plugins may return a dict to influence flow: + # {"action": "skip", "reason": "..."} -> drop message (no reply) + # {"action": "rewrite", "text": "..."} -> replace event.text, continue + # {"action": "allow"} / None -> normal dispatch + # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store. + "pre_gateway_dispatch", } ENTRY_POINTS_GROUP = "hermes_agent.plugins" diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index e842086a4..f65ceac7a 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -116,6 +116,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { transport="openai_chat", base_url_env_var="DASHSCOPE_BASE_URL", ), + "alibaba-coding-plan": HermesOverlay( + transport="openai_chat", + base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL", + ), "vercel": HermesOverlay( transport="openai_chat", is_aggregator=True, @@ -259,6 +263,9 @@ ALIASES: Dict[str, str] = { "aliyun": "alibaba", "qwen": "alibaba", "alibaba-cloud": "alibaba", + "alibaba_coding": "alibaba-coding-plan", + "alibaba-coding": "alibaba-coding-plan", + "alibaba_coding_plan": "alibaba-coding-plan", # google-gemini-cli (OAuth + Code Assist) "gemini-cli": "google-gemini-cli", diff --git a/hermes_cli/pty_bridge.py b/hermes_cli/pty_bridge.py new file mode 100644 index 000000000..9a8a73bad --- /dev/null +++ b/hermes_cli/pty_bridge.py @@ -0,0 +1,229 @@ +"""PTY bridge for `hermes dashboard` chat tab. + +Wraps a child process behind a pseudo-terminal so its ANSI output can be +streamed to a browser-side terminal emulator (xterm.js) and typed +keystrokes can be fed back in. The only caller today is the +``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``. + +Design constraints: + +* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which + exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python + has no PTY; :class:`PtyUnavailableError` is raised with a user-readable + install/platform message so the dashboard can render a banner instead of + crashing. +* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`, + which is a pure-Python wrapper around the OS calls. The browser talks + to the same ``hermes --tui`` binary it would launch from the CLI, so + every TUI feature (slash popover, model picker, tool rows, markdown, + skin engine, clarify/sudo/approval prompts) ships automatically. +* **Byte-safe I/O.** Reads and writes go through the PTY master fd + directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because + streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land + mid-read. +""" + +from __future__ import annotations + +import errno +import fcntl +import os +import select +import signal +import struct +import sys +import termios +import time +from typing import Optional, Sequence + +try: + import ptyprocess # type: ignore + _PTY_AVAILABLE = not sys.platform.startswith("win") +except ImportError: # pragma: no cover - dev env without ptyprocess + ptyprocess = None # type: ignore + _PTY_AVAILABLE = False + + +__all__ = ["PtyBridge", "PtyUnavailableError"] + + +class PtyUnavailableError(RuntimeError): + """Raised when a PTY cannot be created on this platform. + + Today this means native Windows (no ConPTY bindings) or a dev + environment missing the ``ptyprocess`` dependency. The dashboard + surfaces the message to the user as a chat-tab banner. + """ + + +class PtyBridge: + """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming. + + Not thread-safe. A single bridge is owned by the WebSocket handler + that spawned it; the reader runs in an executor thread while writes + happen on the event-loop thread. Both sides are OK because the + kernel PTY is the actual synchronization point — we never call + :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and + ``os.write`` on the master fd, which is safe. + """ + + def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined] + self._proc = proc + self._fd: int = proc.fd + self._closed = False + + # -- lifecycle -------------------------------------------------------- + + @classmethod + def is_available(cls) -> bool: + """True if a PTY can be spawned on this platform.""" + return bool(_PTY_AVAILABLE) + + @classmethod + def spawn( + cls, + argv: Sequence[str], + *, + cwd: Optional[str] = None, + env: Optional[dict] = None, + cols: int = 80, + rows: int = 24, + ) -> "PtyBridge": + """Spawn ``argv`` behind a new PTY and return a bridge. + + Raises :class:`PtyUnavailableError` if the platform can't host a + PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for + ordinary exec failures (missing binary, bad cwd, etc.). + """ + if not _PTY_AVAILABLE: + if sys.platform.startswith("win"): + raise PtyUnavailableError( + "Pseudo-terminals are unavailable on this platform. " + "Hermes Agent supports Windows only via WSL." + ) + if ptyprocess is None: + raise PtyUnavailableError( + "The `ptyprocess` package is missing. " + "Install with: pip install ptyprocess " + "(or pip install -e '.[pty]')." + ) + raise PtyUnavailableError("Pseudo-terminals are unavailable.") + # Let caller-supplied env fully override inheritance; if they pass + # None we inherit the server's env (same semantics as subprocess). + spawn_env = os.environ.copy() if env is None else env + proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr] + list(argv), + cwd=cwd, + env=spawn_env, + dimensions=(rows, cols), + ) + return cls(proc) + + @property + def pid(self) -> int: + return int(self._proc.pid) + + def is_alive(self) -> bool: + if self._closed: + return False + try: + return bool(self._proc.isalive()) + except Exception: + return False + + # -- I/O -------------------------------------------------------------- + + def read(self, timeout: float = 0.2) -> Optional[bytes]: + """Read up to 64 KiB of raw bytes from the PTY master. + + Returns: + * bytes — zero or more bytes of child output + * empty bytes (``b""``) — no data available within ``timeout`` + * None — child has exited and the master fd is at EOF + + Never blocks longer than ``timeout`` seconds. Safe to call after + :meth:`close`; returns ``None`` in that case. + """ + if self._closed: + return None + try: + readable, _, _ = select.select([self._fd], [], [], timeout) + except (OSError, ValueError): + return None + if not readable: + return b"" + try: + data = os.read(self._fd, 65536) + except OSError as exc: + # EIO on Linux = slave side closed. EBADF = already closed. + if exc.errno in (errno.EIO, errno.EBADF): + return None + raise + if not data: + return None + return data + + def write(self, data: bytes) -> None: + """Write raw bytes to the PTY master (i.e. the child's stdin).""" + if self._closed or not data: + return + # os.write can return a short write under load; loop until drained. + view = memoryview(data) + while view: + try: + n = os.write(self._fd, view) + except OSError as exc: + if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE): + return + raise + if n <= 0: + return + view = view[n:] + + def resize(self, cols: int, rows: int) -> None: + """Forward a terminal resize to the child via ``TIOCSWINSZ``.""" + if self._closed: + return + # struct winsize: rows, cols, xpixel, ypixel (all unsigned short) + winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0) + try: + fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize) + except OSError: + pass + + # -- teardown --------------------------------------------------------- + + def close(self) -> None: + """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds. + + Idempotent. Reaping the child is important so we don't leak + zombies across the lifetime of the dashboard process. + """ + if self._closed: + return + self._closed = True + + # SIGHUP is the conventional "your terminal went away" signal. + # We escalate if the child ignores it. + for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL): + if not self._proc.isalive(): + break + try: + self._proc.kill(sig) + except Exception: + pass + deadline = time.monotonic() + 0.5 + while self._proc.isalive() and time.monotonic() < deadline: + time.sleep(0.02) + + try: + self._proc.close(force=True) + except Exception: + pass + + # Context-manager sugar — handy in tests and ad-hoc scripts. + def __enter__(self) -> "PtyBridge": + return self + + def __exit__(self, *_exc) -> None: + self.close() diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 922946e2a..cbfcbdbd6 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -36,6 +36,29 @@ def _normalize_custom_provider_name(value: str) -> str: return value.strip().lower().replace(" ", "-") +def _loopback_hostname(host: str) -> bool: + h = (host or "").lower().rstrip(".") + return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"} + + +def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool: + """Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution. + + GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a + previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``, + so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions. + """ + cfg_provider_norm = (cfg_provider or "").strip().lower() + bu = (cfg_base_url or "").strip() + if not bu: + return False + if cfg_provider_norm == "custom": + return True + if base_url_host_matches(bu, "openrouter.ai"): + return False + return _loopback_hostname(base_url_hostname(bu)) + + def _detect_api_mode_for_url(base_url: str) -> Optional[str]: """Auto-detect api_mode from the resolved base URL. @@ -160,8 +183,16 @@ def _resolve_runtime_from_pool_entry( requested_provider: str, model_cfg: Optional[Dict[str, Any]] = None, pool: Optional[CredentialPool] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: model_cfg = model_cfg or _get_model_config() + # When the caller is resolving for a specific target model (e.g. a /model + # mid-session switch), prefer that over the persisted model.default. This + # prevents api_mode being computed from a stale config default that no + # longer matches the model actually being used — the bug that caused + # opencode-zen /v1 to be stripped for chat_completions requests when + # config.default was still a Claude model. + effective_model = (target_model or model_cfg.get("default") or "") base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") api_mode = "chat_completions" @@ -207,7 +238,7 @@ def _resolve_runtime_from_pool_entry( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + api_mode = opencode_model_api_mode(provider, effective_model) else: # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, # Kimi /coding, api.openai.com → codex_responses, api.x.ai → @@ -323,12 +354,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An # Found match by provider key base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" if base_url: - return { + result = { "name": entry.get("name", ep_name), "base_url": base_url.strip(), "api_key": resolved_api_key, "model": entry.get("default_model", ""), } + api_mode = _parse_api_mode(entry.get("api_mode")) + if api_mode: + result["api_mode"] = api_mode + return result # Also check the 'name' field if present display_name = entry.get("name", "") if display_name: @@ -337,12 +372,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An # Found match by display name base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" if base_url: - return { + result = { "name": display_name, "base_url": base_url.strip(), "api_key": resolved_api_key, "model": entry.get("default_model", ""), } + api_mode = _parse_api_mode(entry.get("api_mode")) + if api_mode: + result["api_mode"] = api_mode + return result # Fall back to custom_providers: list (legacy format) custom_providers = config.get("custom_providers") @@ -464,6 +503,7 @@ def _resolve_openrouter_runtime( cfg_provider = cfg_provider.strip().lower() env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() + env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip() # Use config base_url when available and the provider context matches. # OPENAI_BASE_URL env var is no longer consulted — config.yaml is @@ -473,11 +513,14 @@ def _resolve_openrouter_runtime( if requested_norm == "auto": if not cfg_provider or cfg_provider == "auto": use_config_base_url = True - elif requested_norm == "custom" and cfg_provider == "custom": + elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom( + cfg_base_url, cfg_provider + ): use_config_base_url = True base_url = ( (explicit_base_url or "").strip() + or env_custom_base_url or (cfg_base_url.strip() if use_config_base_url else "") or env_openrouter_base_url or OPENROUTER_BASE_URL @@ -689,8 +732,18 @@ def resolve_runtime_provider( requested: Optional[str] = None, explicit_api_key: Optional[str] = None, explicit_base_url: Optional[str] = None, + target_model: Optional[str] = None, ) -> Dict[str, Any]: - """Resolve runtime provider credentials for agent execution.""" + """Resolve runtime provider credentials for agent execution. + + target_model: Optional override for model_cfg.get("default") when + computing provider-specific api_mode (e.g. OpenCode Zen/Go where different + models route through different API surfaces). Callers performing an + explicit mid-session model switch should pass the new model here so + api_mode is derived from the model they are switching TO, not the stale + persisted default. Other callers can leave it None to preserve existing + behavior (api_mode derived from config). + """ requested_provider = resolve_requested_provider(requested) custom_runtime = _resolve_named_custom_runtime( @@ -772,6 +825,7 @@ def resolve_runtime_provider( requested_provider=requested_provider, model_cfg=model_cfg, pool=pool, + target_model=target_model, ) if provider == "nous": @@ -990,7 +1044,11 @@ def resolve_runtime_provider( api_mode = configured_mode elif provider in ("opencode-zen", "opencode-go"): from hermes_cli.models import opencode_model_api_mode - api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) + # Prefer the target_model from the caller (explicit mid-session + # switch) over the stale model.default; see _resolve_runtime_from_pool_entry + # for the same rationale. + _effective = target_model or model_cfg.get("default", "") + api_mode = opencode_model_api_mode(provider, _effective) else: # Auto-detect Anthropic-compatible endpoints by URL convention # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index ebc7de940..e28acd41b 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -500,6 +500,15 @@ def _print_setup_summary(config: dict, hermes_home): if get_env_value("HASS_TOKEN"): tool_status.append(("Smart Home (Home Assistant)", True, None)) + # Spotify (OAuth via hermes auth spotify — check auth.json, not env vars) + try: + from hermes_cli.auth import get_provider_auth_state + _spotify_state = get_provider_auth_state("spotify") or {} + if _spotify_state.get("access_token") or _spotify_state.get("refresh_token"): + tool_status.append(("Spotify (PKCE OAuth)", True, None)) + except Exception: + pass + # Skills Hub if get_env_value("GITHUB_TOKEN"): tool_status.append(("Skills Hub (GitHub)", True, None)) diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 8541f0a05..d07e1a822 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -164,19 +164,26 @@ def show_status(args): qwen_status = {} nous_logged_in = bool(nous_status.get("logged_in")) + nous_error = nous_status.get("error") + nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)" print( f" {'Nous Portal':<12} {check_mark(nous_logged_in)} " - f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}" + f"{nous_label}" ) - if nous_logged_in: - portal_url = nous_status.get("portal_base_url") or "(unknown)" - access_exp = _format_iso_timestamp(nous_status.get("access_expires_at")) - key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at")) - refresh_label = "yes" if nous_status.get("has_refresh_token") else "no" + portal_url = nous_status.get("portal_base_url") or "(unknown)" + access_exp = _format_iso_timestamp(nous_status.get("access_expires_at")) + key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at")) + refresh_label = "yes" if nous_status.get("has_refresh_token") else "no" + if nous_logged_in or portal_url != "(unknown)" or nous_error: print(f" Portal URL: {portal_url}") + if nous_logged_in or nous_status.get("access_expires_at"): print(f" Access exp: {access_exp}") + if nous_logged_in or nous_status.get("agent_key_expires_at"): print(f" Key exp: {key_exp}") + if nous_logged_in or nous_status.get("has_refresh_token"): print(f" Refresh: {refresh_label}") + if nous_error and not nous_logged_in: + print(f" Error: {nous_error}") codex_logged_in = bool(codex_status.get("logged_in")) print( diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 0c1bebe67..db66e1db1 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -127,7 +127,7 @@ TIPS = [ # --- Tools & Capabilities --- "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.", - "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.", + "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.", "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.", "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.", "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.", diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index e89f96178..32645aea3 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -67,12 +67,13 @@ CONFIGURABLE_TOOLSETS = [ ("messaging", "📨 Cross-Platform Messaging", "send_message"), ("rl", "🧪 RL Training", "Tinker-Atropos training tools"), ("homeassistant", "🏠 Home Assistant", "smart home device control"), + ("spotify", "🎵 Spotify", "playback, search, playlists, library"), ] # Toolsets that are OFF by default for new installs. # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled), # but the setup checklist won't pre-select them for first-time users. -_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"} +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"} def _get_effective_configurable_toolsets(): @@ -361,6 +362,18 @@ TOOL_CATEGORIES = { }, ], }, + "spotify": { + "name": "Spotify", + "icon": "🎵", + "providers": [ + { + "name": "Spotify Web API", + "tag": "PKCE OAuth — opens the setup wizard", + "env_vars": [], + "post_setup": "spotify", + }, + ], + }, "rl": { "name": "RL Training", "icon": "🧪", @@ -461,6 +474,35 @@ def _run_post_setup(post_setup_key: str): _print_warning(" kittentts install timed out (>5min)") _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + elif post_setup_key == "spotify": + # Run the full `hermes auth spotify` flow — if the user has no + # client_id yet, this drops them into the interactive wizard + # (opens the Spotify dashboard, prompts for client_id, persists + # to ~/.hermes/.env), then continues straight into PKCE. If they + # already have an app, it skips the wizard and just does OAuth. + from types import SimpleNamespace + try: + from hermes_cli.auth import login_spotify_command + except Exception as exc: + _print_warning(f" Could not load Spotify auth: {exc}") + _print_info(" Run manually: hermes auth spotify") + return + _print_info(" Starting Spotify login...") + try: + login_spotify_command(SimpleNamespace( + client_id=None, redirect_uri=None, scope=None, + no_browser=False, timeout=None, + )) + _print_success(" Spotify authenticated") + except SystemExit as exc: + # User aborted the wizard, or OAuth failed — don't fail the + # toolset enable; they can retry with `hermes auth spotify`. + _print_warning(f" Spotify login did not complete: {exc}") + _print_info(" Run later: hermes auth spotify") + except Exception as exc: + _print_warning(f" Spotify login failed: {exc}") + _print_info(" Run manually: hermes auth spotify") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") @@ -590,7 +632,10 @@ def _get_platform_tools( default_off.remove(platform) enabled_toolsets -= default_off - # Plugin toolsets: enabled by default unless explicitly disabled. + # Plugin toolsets: enabled by default unless explicitly disabled, or + # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify — + # shipped as a bundled plugin but user must opt in via `hermes tools` + # so we don't ship 7 Spotify tool schemas to users who don't use it). # A plugin toolset is "known" for a platform once `hermes tools` # has been saved for that platform (tracked via known_plugin_toolsets). # Unknown plugins default to enabled; known-but-absent = disabled. @@ -602,6 +647,9 @@ def _get_platform_tools( if pts in toolset_names: # Explicitly listed in config — enabled enabled_toolsets.add(pts) + elif pts in _DEFAULT_OFF_TOOLSETS: + # Opt-in plugin toolset — stay off until user picks it + continue elif pts not in known_for_platform: # New plugin not yet seen by hermes tools — default enabled enabled_toolsets.add(pts) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py new file mode 100644 index 000000000..0a355ce4f --- /dev/null +++ b/hermes_cli/voice.py @@ -0,0 +1,548 @@ +"""Process-wide voice recording + TTS API for the TUI gateway. + +Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool`` +(text-to-speech) behind idempotent, stateful entry points that the gateway's +``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can +call from a dedicated thread. The gateway imports this module lazily so that +missing optional audio deps (sounddevice, faster-whisper, numpy) surface as +an ``ImportError`` at call time, not at startup. + +Two usage modes are exposed: + +* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single + manually-bounded capture used when the caller drives the start/stop pair + explicitly. +* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors + the classic CLI voice mode: recording auto-stops on silence, transcribes, + hands the result to a callback, and then auto-restarts for the next turn. + Three consecutive no-speech cycles stop the loop and fire + ``on_silent_limit`` so the UI can turn the mode off. +""" + +from __future__ import annotations + +import logging +import os +import sys +import threading +from typing import Any, Callable, Optional + +from tools.voice_mode import ( + create_audio_recorder, + is_whisper_hallucination, + play_audio_file, + transcribe_recording, +) + +logger = logging.getLogger(__name__) + + +def _debug(msg: str) -> None: + """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1. + + Goes to stderr so the TUI gateway wraps it as a gateway.stderr event, + which createGatewayEventHandler shows as an Activity line — exactly + what we need to diagnose "why didn't the loop auto-restart?" in the + user's real terminal without shipping a separate debug RPC. + + Any OSError / BrokenPipeError is swallowed because this fires from + background threads (silence callback, TTS daemon, beep) where a + broken stderr pipe must not kill the whole gateway — the main + command pipe (stdin+stdout) is what actually matters. + """ + if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1": + return + try: + print(f"[voice] {msg}", file=sys.stderr, flush=True) + except (BrokenPipeError, OSError): + pass + + +def _beeps_enabled() -> bool: + """CLI parity: voice.beep_enabled in config.yaml (default True).""" + try: + from hermes_cli.config import load_config + + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + + +def _play_beep(frequency: int, count: int = 1) -> None: + """Audible cue matching cli.py's record/stop beeps. + + 880 Hz single-beep on start (cli.py:_voice_start_recording line 7532), + 660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585). + Best-effort — sounddevice failures are silently swallowed so the + voice loop never breaks because a speaker was unavailable. + """ + if not _beeps_enabled(): + return + try: + from tools.voice_mode import play_beep + + play_beep(frequency=frequency, count=count) + except Exception as e: + _debug(f"beep {frequency}Hz failed: {e}") + +# ── Push-to-talk state ─────────────────────────────────────────────── +_recorder = None +_recorder_lock = threading.Lock() + +# ── Continuous (VAD) state ─────────────────────────────────────────── +_continuous_lock = threading.Lock() +_continuous_active = False +_continuous_recorder: Any = None + +# ── TTS-vs-STT feedback guard ──────────────────────────────────────── +# When TTS plays the agent reply over the speakers, the live microphone +# picks it up and transcribes the agent's own voice as user input — an +# infinite loop the agent happily joins ("Ha, looks like we're in a loop"). +# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is +# playing, set while silent. _continuous_on_silence waits on it before +# re-arming the recorder, and speak_text itself cancels any live capture +# before starting playback so the tail of the previous utterance doesn't +# leak into the mic. +_tts_playing = threading.Event() +_tts_playing.set() # initially "not playing" +_continuous_on_transcript: Optional[Callable[[str], None]] = None +_continuous_on_status: Optional[Callable[[str], None]] = None +_continuous_on_silent_limit: Optional[Callable[[], None]] = None +_continuous_no_speech_count = 0 +_CONTINUOUS_NO_SPEECH_LIMIT = 3 + + +# ── Push-to-talk API ───────────────────────────────────────────────── + + +def start_recording() -> None: + """Begin capturing from the default input device (push-to-talk). + + Idempotent — calling again while a recording is in progress is a no-op. + """ + global _recorder + + with _recorder_lock: + if _recorder is not None and getattr(_recorder, "is_recording", False): + return + rec = create_audio_recorder() + rec.start() + _recorder = rec + + +def stop_and_transcribe() -> Optional[str]: + """Stop the active push-to-talk recording, transcribe, return text. + + Returns ``None`` when no recording is active, when the microphone + captured no speech, or when Whisper returned a known hallucination. + """ + global _recorder + + with _recorder_lock: + rec = _recorder + _recorder = None + + if rec is None: + return None + + wav_path = rec.stop() + if not wav_path: + return None + + try: + result = transcribe_recording(wav_path) + except Exception as e: + logger.warning("voice transcription failed: %s", e) + return None + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass + + # transcribe_recording returns {"success": bool, "transcript": str, ...} + # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript"). + if not result.get("success"): + return None + text = (result.get("transcript") or "").strip() + if not text or is_whisper_hallucination(text): + return None + + return text + + +# ── Continuous (VAD) API ───────────────────────────────────────────── + + +def start_continuous( + on_transcript: Callable[[str], None], + on_status: Optional[Callable[[str], None]] = None, + on_silent_limit: Optional[Callable[[], None]] = None, + silence_threshold: int = 200, + silence_duration: float = 3.0, +) -> None: + """Start a VAD-driven continuous recording loop. + + The loop calls ``on_transcript(text)`` each time speech is detected and + transcribed successfully, then auto-restarts. After + ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech + picked up at all) the loop stops itself and calls ``on_silent_limit`` + so the UI can reflect "voice off". Idempotent — calling while already + active is a no-op. + + ``on_status`` is called with ``"listening"`` / ``"transcribing"`` / + ``"idle"`` so the UI can show a live indicator. + """ + global _continuous_active, _continuous_recorder + global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit + global _continuous_no_speech_count + + with _continuous_lock: + if _continuous_active: + _debug("start_continuous: already active — no-op") + return + _continuous_active = True + _continuous_on_transcript = on_transcript + _continuous_on_status = on_status + _continuous_on_silent_limit = on_silent_limit + _continuous_no_speech_count = 0 + + if _continuous_recorder is None: + _continuous_recorder = create_audio_recorder() + + _continuous_recorder._silence_threshold = silence_threshold + _continuous_recorder._silence_duration = silence_duration + rec = _continuous_recorder + + _debug( + f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)" + ) + + # CLI parity: single 880 Hz beep *before* opening the stream — placing + # the beep after stream.start() on macOS triggers a CoreAudio conflict + # (cli.py:7528 comment). + _play_beep(frequency=880, count=1) + + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to start continuous recording: %s", e) + _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + raise + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +def stop_continuous() -> None: + """Stop the active continuous loop and release the microphone. + + Idempotent — calling while not active is a no-op. Any in-flight + transcription completes but its result is discarded (the callback + checks ``_continuous_active`` before firing). + """ + global _continuous_active, _continuous_on_transcript + global _continuous_on_status, _continuous_on_silent_limit + global _continuous_recorder, _continuous_no_speech_count + + with _continuous_lock: + if not _continuous_active: + return + _continuous_active = False + rec = _continuous_recorder + on_status = _continuous_on_status + _continuous_on_transcript = None + _continuous_on_status = None + _continuous_on_silent_limit = None + _continuous_no_speech_count = 0 + + if rec is not None: + try: + # cancel() (not stop()) discards buffered frames — the loop + # is over, we don't want to transcribe a half-captured turn. + rec.cancel() + except Exception as e: + logger.warning("failed to cancel recorder: %s", e) + + # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the + # silence-auto-stop path plays). + _play_beep(frequency=660, count=2) + + if on_status: + try: + on_status("idle") + except Exception: + pass + + +def is_continuous_active() -> bool: + """Whether a continuous voice loop is currently running.""" + with _continuous_lock: + return _continuous_active + + +def _continuous_on_silence() -> None: + """AudioRecorder silence callback — runs in a daemon thread. + + Stops the current capture, transcribes, delivers the text via + ``on_transcript``, and — if the loop is still active — starts the + next capture. Three consecutive silent cycles end the loop. + """ + global _continuous_active, _continuous_no_speech_count + + _debug("_continuous_on_silence: fired") + + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: loop inactive — abort") + return + rec = _continuous_recorder + on_transcript = _continuous_on_transcript + on_status = _continuous_on_status + on_silent_limit = _continuous_on_silent_limit + + if rec is None: + _debug("_continuous_on_silence: no recorder — abort") + return + + if on_status: + try: + on_status("transcribing") + except Exception: + pass + + wav_path = rec.stop() + # Peak RMS is the critical diagnostic when stop() returns None despite + # the VAD firing — tells us at a glance whether the mic was too quiet + # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree. + peak_rms = getattr(rec, "_peak_rms", -1) + _debug( + f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})" + ) + + # CLI parity: double 660 Hz beep after the stream stops (safe from the + # CoreAudio conflict that blocks pre-start beeps). + _play_beep(frequency=660, count=2) + + transcript: Optional[str] = None + + if wav_path: + try: + result = transcribe_recording(wav_path) + # transcribe_recording returns {"success": bool, "transcript": str, + # "error": str?} — NOT {"text": str}. Using the wrong key silently + # produced empty transcripts even when Groq/local STT returned fine, + # which masqueraded as "not hearing the user" to the caller. + success = bool(result.get("success")) + text = (result.get("transcript") or "").strip() + err = result.get("error") + _debug( + f"_continuous_on_silence: transcribe -> success={success} " + f"text={text!r} err={err!r}" + ) + if success and text and not is_whisper_hallucination(text): + transcript = text + except Exception as e: + logger.warning("continuous transcription failed: %s", e) + _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}") + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass + + with _continuous_lock: + if not _continuous_active: + # User stopped us while we were transcribing — discard. + _debug("_continuous_on_silence: stopped during transcribe — no restart") + return + if transcript: + _continuous_no_speech_count = 0 + else: + _continuous_no_speech_count += 1 + should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT + no_speech = _continuous_no_speech_count + + if transcript and on_transcript: + try: + on_transcript(transcript) + except Exception as e: + logger.warning("on_transcript callback raised: %s", e) + + if should_halt: + _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting") + with _continuous_lock: + _continuous_active = False + _continuous_no_speech_count = 0 + if on_silent_limit: + try: + on_silent_limit() + except Exception: + pass + try: + rec.cancel() + except Exception: + pass + if on_status: + try: + on_status("idle") + except Exception: + pass + return + + # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to + # finish before re-arming the mic, then leave a small gap to avoid + # catching the tail of the speaker output. Without this the voice + # loop becomes a feedback loop — the agent's spoken reply lands + # back in the mic and gets re-submitted. + if not _tts_playing.is_set(): + _debug("_continuous_on_silence: waiting for TTS to finish") + _tts_playing.wait(timeout=60) + import time as _time + _time.sleep(0.3) + + # User may have stopped the loop during the wait. + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: stopped while waiting for TTS") + return + + # Restart for the next turn. + _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") + _play_beep(frequency=880, count=1) + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to restart continuous recording: %s", e) + _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + return + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +# ── TTS API ────────────────────────────────────────────────────────── + + +def speak_text(text: str) -> None: + """Synthesize ``text`` with the configured TTS provider and play it. + + Mirrors cli.py:_voice_speak_response exactly — same markdown strip + pipeline, same 4000-char cap, same explicit mp3 output path, same + MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup + of both extensions. Keeping these in sync means a voice-mode TTS + session in the TUI sounds identical to one in the classic CLI. + + While playback is in flight the module-level _tts_playing Event is + cleared so the continuous-recording loop knows to wait before + re-arming the mic (otherwise the agent's spoken reply feedback-loops + through the microphone and the agent ends up replying to itself). + """ + if not text or not text.strip(): + return + + import re + import tempfile + import time + + # Cancel any live capture before we open the speakers — otherwise the + # last ~200ms of the user's turn tail + the first syllables of our TTS + # both end up in the next recording window. The continuous loop will + # re-arm itself after _tts_playing flips back (see _continuous_on_silence). + paused_recording = False + with _continuous_lock: + if ( + _continuous_active + and _continuous_recorder is not None + and getattr(_continuous_recorder, "is_recording", False) + ): + try: + _continuous_recorder.cancel() + paused_recording = True + except Exception as e: + logger.warning("failed to pause recorder for TTS: %s", e) + + _tts_playing.clear() + _debug(f"speak_text: TTS begin (paused_recording={paused_recording})") + + try: + from tools.tts_tool import text_to_speech_tool + + tts_text = text[:4000] if len(text) > 4000 else text + tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks + tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text + tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs + tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold + tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic + tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code + tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers + tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets + tts_text = re.sub(r'---+', '', tts_text) # horizontal rules + tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines + tts_text = tts_text.strip() + if not tts_text: + return + + # MP3 output path, pre-chosen so we can play the MP3 directly even + # when text_to_speech_tool auto-converts to OGG for messaging + # platforms. afplay's OGG support is flaky, MP3 always works. + os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True) + mp3_path = os.path.join( + tempfile.gettempdir(), + "hermes_voice", + f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3", + ) + + _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}") + text_to_speech_tool(text=tts_text, output_path=mp3_path) + + if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0: + _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)") + play_audio_file(mp3_path) + try: + os.unlink(mp3_path) + ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg" + if os.path.isfile(ogg_path): + os.unlink(ogg_path) + except OSError: + pass + else: + _debug(f"speak_text: TTS tool produced no audio at {mp3_path}") + except Exception as e: + logger.warning("Voice TTS playback failed: %s", e) + _debug(f"speak_text raised {type(e).__name__}: {e}") + finally: + _tts_playing.set() + _debug("speak_text: TTS done") + + # Re-arm the mic so the user can answer without pressing Ctrl+B. + # Small delay lets the OS flush speaker output and afplay fully + # release the audio device before sounddevice re-opens the input. + if paused_recording: + time.sleep(0.3) + with _continuous_lock: + if _continuous_active and _continuous_recorder is not None: + try: + _continuous_recorder.start( + on_silence_stop=_continuous_on_silence + ) + _debug("speak_text: recording resumed after TTS") + except Exception as e: + logger.warning( + "failed to resume recorder after TTS: %s", e + ) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 083e0714f..8c33a383e 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -49,7 +49,7 @@ from hermes_cli.config import ( from gateway.status import get_running_pid, read_runtime_status try: - from fastapi import FastAPI, HTTPException, Request + from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles @@ -73,6 +73,10 @@ app = FastAPI(title="Hermes Agent", version=__version__) _SESSION_TOKEN = secrets.token_urlsafe(32) _SESSION_HEADER_NAME = "X-Hermes-Session-Token" +# In-browser Chat tab (/chat, /api/pty, …). Off unless ``hermes dashboard --tui`` +# or HERMES_DASHBOARD_TUI=1. Set from :func:`start_server`. +_DASHBOARD_EMBEDDED_CHAT_ENABLED = False + # Simple rate limiter for the reveal endpoint _reveal_timestamps: List[float] = [] _REVEAL_MAX_PER_WINDOW = 5 @@ -283,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = { "display.busy_input_mode": { "type": "select", "description": "Input behavior while agent is running", - "options": ["queue", "interrupt", "block"], + "options": ["interrupt", "queue"], }, "memory.provider": { "type": "select", @@ -1529,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]: with urllib.request.urlopen(req, timeout=20) as resp: result = json.loads(resp.read().decode()) except Exception as e: - sess["status"] = "error" - sess["error_message"] = f"Token exchange failed: {e}" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = f"Token exchange failed: {e}" return {"ok": False, "status": "error", "message": sess["error_message"]} access_token = result.get("access_token", "") refresh_token = result.get("refresh_token", "") expires_in = int(result.get("expires_in") or 3600) if not access_token: - sess["status"] = "error" - sess["error_message"] = "No access token returned" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = "No access token returned" return {"ok": False, "status": "error", "message": sess["error_message"]} expires_at_ms = int(time.time() * 1000) + (expires_in * 1000) try: _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms) except Exception as e: - sess["status"] = "error" - sess["error_message"] = f"Save failed: {e}" + with _oauth_sessions_lock: + sess["status"] = "error" + sess["error_message"] = f"Save failed: {e}" return {"ok": False, "status": "error", "message": sess["error_message"]} - sess["status"] = "approved" + with _oauth_sessions_lock: + sess["status"] = "approved" _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id) return {"ok": True, "status": "approved"} @@ -2263,6 +2271,329 @@ async def get_usage_analytics(days: int = 30): db.close() +# --------------------------------------------------------------------------- +# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab. +# +# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind +# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a +# WebSocket. The browser renders the ANSI through xterm.js (see +# web/src/pages/ChatPage.tsx). +# +# Auth: ``?token=`` query param (browsers can't set +# Authorization on the WS upgrade). Same ephemeral ``_SESSION_TOKEN`` as +# REST. Localhost-only — we defensively reject non-loopback clients even +# though uvicorn binds to 127.0.0.1. +# --------------------------------------------------------------------------- + +import re +import asyncio + +from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError + +_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]") +_PTY_READ_CHUNK_TIMEOUT = 0.2 +_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$") +# Starlette's TestClient reports the peer as "testclient"; treat it as +# loopback so tests don't need to rewrite request scope. +_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"}) + +# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard) +# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id +# the chat tab generates on mount; entries auto-evict when the last subscriber +# drops AND the publisher has disconnected. +_event_channels: dict[str, set] = {} +_event_lock = asyncio.Lock() + + +def _resolve_chat_argv( + resume: Optional[str] = None, + sidecar_url: Optional[str] = None, +) -> tuple[list[str], Optional[str], Optional[dict]]: + """Resolve the argv + cwd + env for the chat PTY. + + Default: whatever ``hermes --tui`` would run. Tests monkeypatch this + function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``) + so nothing has to build Node or the TUI bundle. + + Session resume is propagated via the ``HERMES_TUI_RESUME`` env var — + matching what ``hermes_cli.main._launch_tui`` does for the CLI path. + Appending ``--resume `` to argv doesn't work because ``ui-tui`` does + not parse its argv. + + `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so + the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the + dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`). + """ + from hermes_cli.main import PROJECT_ROOT, _make_tui_argv + + argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False) + env: Optional[dict] = None + + if resume or sidecar_url: + env = os.environ.copy() + + if resume: + env["HERMES_TUI_RESUME"] = resume + + if sidecar_url: + env["HERMES_TUI_SIDECAR_URL"] = sidecar_url + + return list(argv), str(cwd) if cwd else None, env + + +def _build_sidecar_url(channel: str) -> Optional[str]: + """ws:// URL the PTY child should publish events to, or None when unbound.""" + host = getattr(app.state, "bound_host", None) + port = getattr(app.state, "bound_port", None) + + if not host or not port: + return None + + netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}" + qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel}) + + return f"ws://{netloc}/api/pub?{qs}" + + +async def _broadcast_event(channel: str, payload: str) -> None: + """Fan out one publisher frame to every subscriber on `channel`.""" + async with _event_lock: + subs = list(_event_channels.get(channel, ())) + + for sub in subs: + try: + await sub.send_text(payload) + except Exception: + # Subscriber went away mid-send; the /api/events finally clause + # will remove it from the registry on its next iteration. + pass + + +def _channel_or_close_code(ws: WebSocket) -> Optional[str]: + """Return the channel id from the query string or None if invalid.""" + channel = ws.query_params.get("channel", "") + + return channel if _VALID_CHANNEL_RE.match(channel) else None + + +@app.websocket("/api/pty") +async def pty_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + # --- auth + loopback check (before accept so we can close cleanly) --- + token = ws.query_params.get("token", "") + expected = _SESSION_TOKEN + if not hmac.compare_digest(token.encode(), expected.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + await ws.accept() + + # --- spawn PTY ------------------------------------------------------ + resume = ws.query_params.get("resume") or None + channel = _channel_or_close_code(ws) + sidecar_url = _build_sidecar_url(channel) if channel else None + + try: + argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url) + except SystemExit as exc: + # _make_tui_argv calls sys.exit(1) when node/npm is missing. + await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + + + try: + bridge = PtyBridge.spawn(argv, cwd=cwd, env=env) + except PtyUnavailableError as exc: + await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + except (FileNotFoundError, OSError) as exc: + await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n") + await ws.close(code=1011) + return + + loop = asyncio.get_running_loop() + + # --- reader task: PTY master → WebSocket ---------------------------- + async def pump_pty_to_ws() -> None: + while True: + chunk = await loop.run_in_executor( + None, bridge.read, _PTY_READ_CHUNK_TIMEOUT + ) + if chunk is None: # EOF + return + if not chunk: # no data this tick; yield control and retry + await asyncio.sleep(0) + continue + try: + await ws.send_bytes(chunk) + except Exception: + return + + reader_task = asyncio.create_task(pump_pty_to_ws()) + + # --- writer loop: WebSocket → PTY master ---------------------------- + try: + while True: + msg = await ws.receive() + msg_type = msg.get("type") + if msg_type == "websocket.disconnect": + break + raw = msg.get("bytes") + if raw is None: + text = msg.get("text") + raw = text.encode("utf-8") if isinstance(text, str) else b"" + if not raw: + continue + + # Resize escape is consumed locally, never written to the PTY. + match = _RESIZE_RE.match(raw) + if match and match.end() == len(raw): + cols = int(match.group(1)) + rows = int(match.group(2)) + bridge.resize(cols=cols, rows=rows) + continue + + bridge.write(raw) + except WebSocketDisconnect: + pass + finally: + reader_task.cancel() + try: + await reader_task + except (asyncio.CancelledError, Exception): + pass + bridge.close() + + +# --------------------------------------------------------------------------- +# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab. +# +# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the +# dashboard can render structured metadata (model badge, tool-call sidebar, +# slash launcher, session info) alongside the xterm.js terminal that PTY +# already paints. Both transports bind to the same session id when one is +# active, so a tool.start emitted by the agent fans out to both sinks. +# --------------------------------------------------------------------------- + + +@app.websocket("/api/ws") +async def gateway_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + from tui_gateway.ws import handle_ws + + await handle_ws(ws) + + +# --------------------------------------------------------------------------- +# /api/pub + /api/events — chat-tab event broadcast. +# +# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by +# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every +# dispatcher emit through it. The dashboard fans those frames out to any +# subscriber that opened /api/events on the same channel id. This is what +# gives the React sidebar its tool-call feed without breaking the PTY +# child's stdio handshake with Ink. +# --------------------------------------------------------------------------- + + +@app.websocket("/api/pub") +async def pub_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + channel = _channel_or_close_code(ws) + if not channel: + await ws.close(code=4400) + return + + await ws.accept() + + try: + while True: + await _broadcast_event(channel, await ws.receive_text()) + except WebSocketDisconnect: + pass + + +@app.websocket("/api/events") +async def events_ws(ws: WebSocket) -> None: + if not _DASHBOARD_EMBEDDED_CHAT_ENABLED: + await ws.close(code=4403) + return + + token = ws.query_params.get("token", "") + if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()): + await ws.close(code=4401) + return + + client_host = ws.client.host if ws.client else "" + if client_host and client_host not in _LOOPBACK_HOSTS: + await ws.close(code=4403) + return + + channel = _channel_or_close_code(ws) + if not channel: + await ws.close(code=4400) + return + + await ws.accept() + + async with _event_lock: + _event_channels.setdefault(channel, set()).add(ws) + + try: + while True: + # Subscribers don't speak — the receive() just blocks until + # disconnect so the connection stays open as long as the + # browser holds it. + await ws.receive_text() + except WebSocketDisconnect: + pass + finally: + async with _event_lock: + subs = _event_channels.get(channel) + + if subs is not None: + subs.discard(ws) + + if not subs: + _event_channels.pop(channel, None) + + def mount_spa(application: FastAPI): """Mount the built SPA. Falls back to index.html for client-side routing. @@ -2284,8 +2615,10 @@ def mount_spa(application: FastAPI): def _serve_index(): """Return index.html with the session token injected.""" html = _index_path.read_text() + chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false" token_script = ( - f'' + f'" ) html = html.replace("", f"{token_script}", 1) return HTMLResponse( @@ -2798,10 +3131,15 @@ def start_server( port: int = 9119, open_browser: bool = True, allow_public: bool = False, + *, + embedded_chat: bool = False, ): """Start the web UI server.""" import uvicorn + global _DASHBOARD_EMBEDDED_CHAT_ENABLED + _DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat + _LOCALHOST = ("127.0.0.1", "localhost", "::1") if host not in _LOCALHOST and not allow_public: raise SystemExit( @@ -2817,7 +3155,10 @@ def start_server( # Record the bound host so host_header_middleware can validate incoming # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7). + # bound_port is also stashed so /api/pty can build the back-WS URL the + # PTY child uses to publish events to the dashboard sidebar. app.state.bound_host = host + app.state.bound_port = port if open_browser: import webbrowser diff --git a/hermes_state.py b/hermes_state.py index 0ea9815b5..ed95d25f4 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1039,6 +1039,71 @@ class SessionDB: result.append(msg) return result + def resolve_resume_session_id(self, session_id: str) -> str: + """Redirect a resume target to the descendant session that holds the messages. + + Context compression ends the current session and forks a new child session + (linked via ``parent_session_id``). The flush cursor is reset, so the + child is where new messages actually land — the parent ends up with + ``message_count = 0`` rows unless messages had already been flushed to + it before compression. See #15000. + + This helper walks ``parent_session_id`` forward from ``session_id`` and + returns the first descendant in the chain that has at least one message + row. If the original session already has messages, or no descendant + has any, the original ``session_id`` is returned unchanged. + + The chain is always walked via the child whose ``started_at`` is + latest; that matches the single-chain shape that compression creates. + A depth cap (32) guards against accidental loops in malformed data. + """ + if not session_id: + return session_id + + with self._lock: + # If this session already has messages, nothing to redirect. + try: + row = self._conn.execute( + "SELECT 1 FROM messages WHERE session_id = ? LIMIT 1", + (session_id,), + ).fetchone() + except Exception: + return session_id + if row is not None: + return session_id + + # Walk descendants: at each step, pick the most-recently-started + # child session; stop once we find one with messages. + current = session_id + seen = {current} + for _ in range(32): + try: + child_row = self._conn.execute( + "SELECT id FROM sessions " + "WHERE parent_session_id = ? " + "ORDER BY started_at DESC, id DESC LIMIT 1", + (current,), + ).fetchone() + except Exception: + return session_id + if child_row is None: + return session_id + child_id = child_row["id"] if hasattr(child_row, "keys") else child_row[0] + if not child_id or child_id in seen: + return session_id + seen.add(child_id) + try: + msg_row = self._conn.execute( + "SELECT 1 FROM messages WHERE session_id = ? LIMIT 1", + (child_id,), + ).fetchone() + except Exception: + return session_id + if msg_row is not None: + return child_id + current = child_id + return session_id + def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]: """ Load messages in the OpenAI conversation format (role + content dicts). diff --git a/model_tools.py b/model_tools.py index bee80f49b..36cea8f30 100644 --- a/model_tools.py +++ b/model_tools.py @@ -343,6 +343,18 @@ def get_tool_definitions( global _last_resolved_tool_names _last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools] + # Sanitize schemas for broad backend compatibility. llama.cpp's + # json-schema-to-grammar converter (used by its OAI server to build + # GBNF tool-call parsers) rejects some shapes that cloud providers + # silently accept — bare "type": "object" with no properties, + # string-valued schema nodes from malformed MCP servers, etc. This + # is a no-op for schemas that are already well-formed. + try: + from tools.schema_sanitizer import sanitize_tool_schemas + filtered_tools = sanitize_tool_schemas(filtered_tools) + except Exception as e: # pragma: no cover — defensive + logger.warning("Schema sanitization skipped: %s", e) + return filtered_tools @@ -418,6 +430,31 @@ def _coerce_value(value: str, expected_type): return _coerce_number(value, integer_only=(expected_type == "integer")) if expected_type == "boolean": return _coerce_boolean(value) + if expected_type == "array": + return _coerce_json(value, list) + if expected_type == "object": + return _coerce_json(value, dict) + return value + + +def _coerce_json(value: str, expected_python_type: type): + """Parse *value* as JSON when the schema expects an array or object. + + Handles model output drift where a complex oneOf/discriminated-union schema + causes the LLM to emit the array/object as a JSON string instead of a native + structure. Returns the original string if parsing fails or yields the wrong + Python type. + """ + try: + parsed = json.loads(value) + except (ValueError, TypeError): + return value + if isinstance(parsed, expected_python_type): + logger.debug( + "coerce_tool_args: coerced string to %s via json.loads", + expected_python_type.__name__, + ) + return parsed return value @@ -427,9 +464,9 @@ def _coerce_number(value: str, integer_only: bool = False): f = float(value) except (ValueError, OverflowError): return value - # Guard against inf/nan before int() conversion + # Guard against inf/nan — not JSON-serializable, keep original string if f != f or f == float("inf") or f == float("-inf"): - return f + return value # If it looks like an integer (no fractional part), return int if f == int(f): return int(f) diff --git a/nix/lib.nix b/nix/lib.nix index ee28537a6..e53a989f8 100644 --- a/nix/lib.nix +++ b/nix/lib.nix @@ -156,7 +156,7 @@ for entry in "''${ENTRIES[@]}"; do IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry" echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)" - OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1) + OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1) STATUS=$? if [ "$STATUS" -eq 0 ]; then echo " ok" diff --git a/nix/web.nix b/nix/web.nix index fc7772896..e79826fee 100644 --- a/nix/web.nix +++ b/nix/web.nix @@ -4,7 +4,7 @@ let src = ../web; npmDeps = pkgs.fetchNpmDeps { inherit src; - hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg="; + hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o="; }; npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; }; diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md index 3fbdc2aba..4c7e0f6be 100644 --- a/plugins/memory/hindsight/README.md +++ b/plugins/memory/hindsight/README.md @@ -59,7 +59,8 @@ Config file: `~/.hermes/hindsight/config.json` | Key | Default | Description | |-----|---------|-------------| -| `bank_id` | `hermes` | Memory bank name | +| `bank_id` | `hermes` | Memory bank name (static fallback used when `bank_id_template` is unset or resolves empty) | +| `bank_id_template` | — | Optional template to derive the bank name dynamically. Placeholders: `{profile}`, `{workspace}`, `{platform}`, `{user}`, `{session}`. Example: `hermes-{profile}` isolates memory per active Hermes profile. Empty placeholders collapse cleanly (e.g. `hermes-{user}` with no user becomes `hermes`). | | `bank_mission` | — | Reflect mission (identity/framing for reflect reasoning). Applied via Banks API. | | `bank_retain_mission` | — | Retain mission (steers what gets extracted). Applied via Banks API. | diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py index 2b233e265..bc82bc40f 100644 --- a/plugins/memory/hindsight/__init__.py +++ b/plugins/memory/hindsight/__init__.py @@ -3,6 +3,8 @@ Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud (API key) and local modes. +Configurable timeout via HINDSIGHT_TIMEOUT env var or config.json. + Original PR #1811 by benfrank241, adapted to MemoryProvider ABC. Config via environment variables: @@ -11,6 +13,7 @@ Config via environment variables: HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid) HINDSIGHT_API_URL — API endpoint HINDSIGHT_MODE — cloud or local (default: cloud) + HINDSIGHT_TIMEOUT — API request timeout in seconds (default: 120) HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories HINDSIGHT_RETAIN_USER_PREFIX — label used before user turns in retained transcripts @@ -23,6 +26,7 @@ Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to from __future__ import annotations import asyncio +import importlib import json import logging import os @@ -40,6 +44,7 @@ logger = logging.getLogger(__name__) _DEFAULT_API_URL = "https://api.hindsight.vectorize.io" _DEFAULT_LOCAL_URL = "http://localhost:8888" _MIN_CLIENT_VERSION = "0.4.22" +_DEFAULT_TIMEOUT = 120 # seconds — cloud API can take 30-40s per request _VALID_BUDGETS = {"low", "mid", "high"} _PROVIDER_DEFAULT_MODELS = { "openai": "gpt-4o-mini", @@ -54,6 +59,22 @@ _PROVIDER_DEFAULT_MODELS = { } +def _check_local_runtime() -> tuple[bool, str | None]: + """Return whether local embedded Hindsight imports cleanly. + + On older CPUs, importing the local Hindsight stack can raise a runtime + error from NumPy before the daemon starts. Treat that as "unavailable" + so Hermes can degrade gracefully instead of repeatedly trying to start + a broken local memory backend. + """ + try: + importlib.import_module("hindsight") + importlib.import_module("hindsight_embed.daemon_embed_manager") + return True, None + except Exception as exc: + return False, str(exc) + + # --------------------------------------------------------------------------- # Dedicated event loop for Hindsight async calls (one per process, reused). # Avoids creating ephemeral loops that leak aiohttp sessions. @@ -81,13 +102,18 @@ def _get_loop() -> asyncio.AbstractEventLoop: return _loop -def _run_sync(coro, timeout: float = 120.0): +def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT): """Schedule *coro* on the shared loop and block until done.""" loop = _get_loop() future = asyncio.run_coroutine_threadsafe(coro, loop) return future.result(timeout=timeout) +# --------------------------------------------------------------------------- +# Backward-compatible alias — instances use self._run_sync() instead. +# --------------------------------------------------------------------------- + + # --------------------------------------------------------------------------- # Tool schemas # --------------------------------------------------------------------------- @@ -233,6 +259,126 @@ def _utc_timestamp() -> str: return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z") +def _embedded_profile_name(config: dict[str, Any]) -> str: + """Return the Hindsight embedded profile name for this Hermes config.""" + profile = config.get("profile", "hermes") + return str(profile or "hermes") + + +def _load_simple_env(path) -> dict[str, str]: + """Parse a simple KEY=VALUE env file, ignoring comments and blank lines.""" + if not path.exists(): + return {} + + values: dict[str, str] = {} + for line in path.read_text(encoding="utf-8").splitlines(): + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + values[key.strip()] = value.strip() + return values + + +def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None) -> dict[str, str]: + """Build the profile-scoped env file that standalone hindsight-embed consumes.""" + current_key = llm_api_key + if current_key is None: + current_key = ( + config.get("llmApiKey") + or config.get("llm_api_key") + or os.environ.get("HINDSIGHT_LLM_API_KEY", "") + ) + + current_provider = config.get("llm_provider", "") + current_model = config.get("llm_model", "") + current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "") + + # The embedded daemon expects OpenAI wire format for these providers. + daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider + + env_values = { + "HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider), + "HINDSIGHT_API_LLM_API_KEY": str(current_key or ""), + "HINDSIGHT_API_LLM_MODEL": str(current_model), + "HINDSIGHT_API_LOG_LEVEL": "info", + } + if current_base_url: + env_values["HINDSIGHT_API_LLM_BASE_URL"] = str(current_base_url) + return env_values + + +def _embedded_profile_env_path(config: dict[str, Any]): + from pathlib import Path + + return Path.home() / ".hindsight" / "profiles" / f"{_embedded_profile_name(config)}.env" + + +def _materialize_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None): + """Write the profile-scoped env file that standalone hindsight-embed uses.""" + profile_env = _embedded_profile_env_path(config) + profile_env.parent.mkdir(parents=True, exist_ok=True) + env_values = _build_embedded_profile_env(config, llm_api_key=llm_api_key) + profile_env.write_text( + "".join(f"{key}={value}\n" for key, value in env_values.items()), + encoding="utf-8", + ) + return profile_env + +def _sanitize_bank_segment(value: str) -> str: + """Sanitize a bank_id_template placeholder value. + + Bank IDs should be safe for URL paths and filesystem use. Replaces any + character that isn't alphanumeric, dash, or underscore with a dash, and + collapses runs of dashes. + """ + if not value: + return "" + out = [] + prev_dash = False + for ch in str(value): + if ch.isalnum() or ch == "-" or ch == "_": + out.append(ch) + prev_dash = False + else: + if not prev_dash: + out.append("-") + prev_dash = True + return "".join(out).strip("-_") + + +def _resolve_bank_id_template(template: str, fallback: str, **placeholders: str) -> str: + """Resolve a bank_id template string with the given placeholders. + + Supported placeholders (each is sanitized before substitution): + {profile} — active Hermes profile name (from agent_identity) + {workspace} — Hermes workspace name (from agent_workspace) + {platform} — "cli", "telegram", "discord", etc. + {user} — platform user id (gateway sessions) + {session} — current session id + + Missing/empty placeholders are rendered as the empty string and then + collapsed — e.g. ``hermes-{user}`` with no user becomes ``hermes``. + + If the template is empty, resolution falls back to *fallback*. + Returns the sanitized bank id. + """ + if not template: + return fallback + sanitized = {k: _sanitize_bank_segment(v) for k, v in placeholders.items()} + try: + rendered = template.format(**sanitized) + except (KeyError, IndexError) as exc: + logger.warning("Invalid bank_id_template %r: %s — using fallback %r", + template, exc, fallback) + return fallback + while "--" in rendered: + rendered = rendered.replace("--", "-") + while "__" in rendered: + rendered = rendered.replace("__", "_") + rendered = rendered.strip("-_") + return rendered or fallback + + # --------------------------------------------------------------------------- # MemoryProvider implementation # --------------------------------------------------------------------------- @@ -262,13 +408,17 @@ class HindsightMemoryProvider(MemoryProvider): self._chat_type = "" self._thread_id = "" self._agent_identity = "" + self._agent_workspace = "" self._turn_index = 0 self._client = None + self._timeout = _DEFAULT_TIMEOUT self._prefetch_result = "" self._prefetch_lock = threading.Lock() self._prefetch_thread = None self._sync_thread = None self._session_id = "" + self._parent_session_id = "" + self._document_id = "" # Tags self._tags: list[str] | None = None @@ -293,6 +443,7 @@ class HindsightMemoryProvider(MemoryProvider): # Bank self._bank_mission = "" self._bank_retain_mission: str | None = None + self._bank_id_template = "" @property def name(self) -> str: @@ -302,9 +453,16 @@ class HindsightMemoryProvider(MemoryProvider): try: cfg = _load_config() mode = cfg.get("mode", "cloud") - if mode in ("local", "local_embedded", "local_external"): + if mode in ("local", "local_embedded"): + available, _ = _check_local_runtime() + return available + if mode == "local_external": return True - has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")) + has_key = bool( + cfg.get("apiKey") + or cfg.get("api_key") + or os.environ.get("HINDSIGHT_API_KEY", "") + ) has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", "")) return has_key or has_url except Exception: @@ -363,7 +521,7 @@ class HindsightMemoryProvider(MemoryProvider): else: deps_to_install = [cloud_dep] - print(f"\n Checking dependencies...") + print("\n Checking dependencies...") uv_path = shutil.which("uv") if not uv_path: print(" ⚠ uv not found — install it: curl -LsSf https://astral.sh/uv/install.sh | sh") @@ -374,14 +532,14 @@ class HindsightMemoryProvider(MemoryProvider): [uv_path, "pip", "install", "--python", sys.executable, "--quiet", "--upgrade"] + deps_to_install, check=True, timeout=120, capture_output=True, ) - print(f" ✓ Dependencies up to date") + print(" ✓ Dependencies up to date") except Exception as e: print(f" ⚠ Install failed: {e}") print(f" Run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}") # Step 3: Mode-specific config if mode == "cloud": - print(f"\n Get your API key at https://ui.hindsight.vectorize.io\n") + print("\n Get your API key at https://ui.hindsight.vectorize.io\n") existing_key = os.environ.get("HINDSIGHT_API_KEY", "") if existing_key: masked = f"...{existing_key[-4:]}" if len(existing_key) > 4 else "set" @@ -434,13 +592,19 @@ class HindsightMemoryProvider(MemoryProvider): sys.stdout.write(" LLM API key: ") sys.stdout.flush() llm_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip() - if llm_key: - env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key + # Always write explicitly (including empty) so the provider sees "" + # rather than a missing variable. The daemon reads from .env at + # startup and fails when HINDSIGHT_LLM_API_KEY is unset. + env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key # Step 4: Save everything provider_config["bank_id"] = "hermes" provider_config["recall_budget"] = "mid" - bank_id = "hermes" + # Read existing timeout from config if present, otherwise use default + existing_timeout = self._config.get("timeout") if self._config else None + timeout_val = existing_timeout if existing_timeout else _DEFAULT_TIMEOUT + provider_config["timeout"] = timeout_val + env_writes["HINDSIGHT_TIMEOUT"] = str(timeout_val) config["memory"]["provider"] = "hindsight" save_config(config) @@ -466,10 +630,32 @@ class HindsightMemoryProvider(MemoryProvider): new_lines.append(f"{k}={v}") env_path.write_text("\n".join(new_lines) + "\n") + if mode == "local_embedded": + materialized_config = dict(provider_config) + config_path = Path(hermes_home) / "hindsight" / "config.json" + try: + materialized_config = json.loads(config_path.read_text(encoding="utf-8")) + except Exception: + pass + + llm_api_key = env_writes.get("HINDSIGHT_LLM_API_KEY", "") + if not llm_api_key: + llm_api_key = _load_simple_env(Path(hermes_home) / ".env").get("HINDSIGHT_LLM_API_KEY", "") + if not llm_api_key: + llm_api_key = _load_simple_env(_embedded_profile_env_path(materialized_config)).get( + "HINDSIGHT_API_LLM_API_KEY", + "", + ) + + _materialize_embedded_profile_env( + materialized_config, + llm_api_key=llm_api_key or None, + ) + print(f"\n ✓ Hindsight memory configured ({mode} mode)") if env_writes: - print(f" API keys saved to .env") - print(f"\n Start a new session to activate.\n") + print(" API keys saved to .env") + print("\n Start a new session to activate.\n") def get_config_schema(self): return [ @@ -485,7 +671,8 @@ class HindsightMemoryProvider(MemoryProvider): {"key": "llm_base_url", "description": "Endpoint URL (e.g. http://192.168.1.10:8080/v1)", "default": "", "when": {"mode": "local_embedded", "llm_provider": "openai_compatible"}}, {"key": "llm_api_key", "description": "LLM API key (optional for openai_compatible)", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local_embedded"}}, {"key": "llm_model", "description": "LLM model", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local_embedded"}}, - {"key": "bank_id", "description": "Memory bank name", "default": "hermes"}, + {"key": "bank_id", "description": "Memory bank name (static fallback when bank_id_template is unset)", "default": "hermes"}, + {"key": "bank_id_template", "description": "Optional template to derive bank_id dynamically. Placeholders: {profile}, {workspace}, {platform}, {user}, {session}. Example: hermes-{profile}", "default": ""}, {"key": "bank_mission", "description": "Mission/purpose description for the memory bank"}, {"key": "bank_retain_mission", "description": "Custom extraction prompt for memory retention"}, {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]}, @@ -505,12 +692,19 @@ class HindsightMemoryProvider(MemoryProvider): {"key": "recall_max_tokens", "description": "Maximum tokens for recall results", "default": 4096}, {"key": "recall_max_input_chars", "description": "Maximum input query length for auto-recall", "default": 800}, {"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"}, + {"key": "timeout", "description": "API request timeout in seconds", "default": _DEFAULT_TIMEOUT}, ] def _get_client(self): """Return the cached Hindsight client (created once, reused).""" if self._client is None: if self._mode == "local_embedded": + available, reason = _check_local_runtime() + if not available: + raise RuntimeError( + "Hindsight local runtime is unavailable" + + (f": {reason}" if reason else "") + ) from hindsight import HindsightEmbedded HindsightEmbedded.__del__ = lambda self: None llm_provider = self._config.get("llm_provider", "") @@ -529,16 +723,30 @@ class HindsightMemoryProvider(MemoryProvider): self._client = HindsightEmbedded(**kwargs) else: from hindsight_client import Hindsight - kwargs = {"base_url": self._api_url, "timeout": 30.0} + timeout = self._timeout or _DEFAULT_TIMEOUT + kwargs = {"base_url": self._api_url, "timeout": float(timeout)} if self._api_key: kwargs["api_key"] = self._api_key - logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s)", - self._api_url, bool(self._api_key)) + logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s, timeout=%s)", + self._api_url, bool(self._api_key), kwargs["timeout"]) self._client = Hindsight(**kwargs) return self._client + def _run_sync(self, coro): + """Schedule *coro* on the shared loop using the configured timeout.""" + return _run_sync(coro, timeout=self._timeout) + def initialize(self, session_id: str, **kwargs) -> None: self._session_id = str(session_id or "").strip() + self._parent_session_id = str(kwargs.get("parent_session_id", "") or "").strip() + + # Each process lifecycle gets its own document_id. Reusing session_id + # alone caused overwrites on /resume — the reloaded session starts + # with an empty _session_turns, so the next retain would replace the + # previously stored content. session_id stays in tags so processes + # for the same session remain filterable together. + start_ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f") + self._document_id = f"{self._session_id}-{start_ts}" # Check client version and auto-upgrade if needed try: @@ -548,7 +756,9 @@ class HindsightMemoryProvider(MemoryProvider): if Version(installed) < Version(_MIN_CLIENT_VERSION): logger.warning("hindsight-client %s is outdated (need >=%s), attempting upgrade...", installed, _MIN_CLIENT_VERSION) - import shutil, subprocess, sys + import shutil + import subprocess + import sys uv_path = shutil.which("uv") if uv_path: try: @@ -575,19 +785,41 @@ class HindsightMemoryProvider(MemoryProvider): self._chat_type = str(kwargs.get("chat_type") or "").strip() self._thread_id = str(kwargs.get("thread_id") or "").strip() self._agent_identity = str(kwargs.get("agent_identity") or "").strip() + self._agent_workspace = str(kwargs.get("agent_workspace") or "").strip() self._turn_index = 0 self._session_turns = [] self._mode = self._config.get("mode", "cloud") + # Read timeout from config or env var, fall back to default + self._timeout = self._config.get("timeout") or int(os.environ.get("HINDSIGHT_TIMEOUT", str(_DEFAULT_TIMEOUT))) # "local" is a legacy alias for "local_embedded" if self._mode == "local": self._mode = "local_embedded" + if self._mode == "local_embedded": + available, reason = _check_local_runtime() + if not available: + logger.warning( + "Hindsight local mode disabled because its runtime could not be imported: %s", + reason, + ) + self._mode = "disabled" + return self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "") default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url) self._llm_base_url = self._config.get("llm_base_url", "") banks = self._config.get("banks", {}).get("hermes", {}) - self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes") + static_bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes") + self._bank_id_template = self._config.get("bank_id_template", "") or "" + self._bank_id = _resolve_bank_id_template( + self._bank_id_template, + fallback=static_bank_id, + profile=self._agent_identity, + workspace=self._agent_workspace, + platform=self._platform, + user=self._user_id, + session=self._session_id, + ) budget = self._config.get("recall_budget") or self._config.get("budget") or banks.get("budget", "mid") self._budget = budget if budget in _VALID_BUDGETS else "mid" @@ -640,6 +872,10 @@ class HindsightMemoryProvider(MemoryProvider): pass logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s", self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version) + if self._bank_id_template: + logger.debug("Hindsight bank resolved from template %r: profile=%s workspace=%s platform=%s user=%s -> bank=%s", + self._bank_id_template, self._agent_identity, self._agent_workspace, + self._platform, self._user_id, self._bank_id) logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, " "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s", self._auto_retain, self._auto_recall, self._retain_every_n_turns, @@ -669,42 +905,13 @@ class HindsightMemoryProvider(MemoryProvider): # Update the profile .env to match our current config so # the daemon always starts with the right settings. # If the config changed and the daemon is running, stop it. - from pathlib import Path as _Path - profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env" - current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "") - current_provider = self._config.get("llm_provider", "") - current_model = self._config.get("llm_model", "") - current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "") - # Map openai_compatible/openrouter → openai for the daemon (OpenAI wire format) - daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider - - # Read saved profile config - saved = {} - if profile_env.exists(): - for line in profile_env.read_text().splitlines(): - if "=" in line and not line.startswith("#"): - k, v = line.split("=", 1) - saved[k.strip()] = v.strip() - - config_changed = ( - saved.get("HINDSIGHT_API_LLM_PROVIDER") != daemon_provider or - saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or - saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or - saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url - ) + profile_env = _embedded_profile_env_path(self._config) + expected_env = _build_embedded_profile_env(self._config) + saved = _load_simple_env(profile_env) + config_changed = saved != expected_env if config_changed: - # Write updated profile .env - profile_env.parent.mkdir(parents=True, exist_ok=True) - env_lines = ( - f"HINDSIGHT_API_LLM_PROVIDER={daemon_provider}\n" - f"HINDSIGHT_API_LLM_API_KEY={current_key}\n" - f"HINDSIGHT_API_LLM_MODEL={current_model}\n" - f"HINDSIGHT_API_LOG_LEVEL=info\n" - ) - if current_base_url: - env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n" - profile_env.write_text(env_lines) + profile_env = _materialize_embedded_profile_env(self._config) if client._manager.is_running(profile): with open(log_path, "a") as f: f.write("\n=== Config changed, restarting daemon ===\n") @@ -777,7 +984,7 @@ class HindsightMemoryProvider(MemoryProvider): client = self._get_client() if self._prefetch_method == "reflect": logger.debug("Prefetch: calling reflect (bank=%s, query_len=%d)", self._bank_id, len(query)) - resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget)) + resp = self._run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget)) text = resp.text or "" else: recall_kwargs: dict = { @@ -791,7 +998,7 @@ class HindsightMemoryProvider(MemoryProvider): recall_kwargs["types"] = self._recall_types logger.debug("Prefetch: calling recall (bank=%s, query_len=%d, budget=%s)", self._bank_id, len(query), self._budget) - resp = _run_sync(client.arecall(**recall_kwargs)) + resp = self._run_sync(client.arecall(**recall_kwargs)) num_results = len(resp.results) if resp.results else 0 logger.debug("Prefetch: recall returned %d results", num_results) text = "\n".join(f"- {r.text}" for r in resp.results if r.text) if resp.results else "" @@ -888,7 +1095,7 @@ class HindsightMemoryProvider(MemoryProvider): if session_id: self._session_id = str(session_id).strip() - turn = json.dumps(self._build_turn_messages(user_content, assistant_content)) + turn = json.dumps(self._build_turn_messages(user_content, assistant_content), ensure_ascii=False) self._session_turns.append(turn) self._turn_counter += 1 self._turn_index = self._turn_counter @@ -902,6 +1109,12 @@ class HindsightMemoryProvider(MemoryProvider): len(self._session_turns), sum(len(t) for t in self._session_turns)) content = "[" + ",".join(self._session_turns) + "]" + lineage_tags: list[str] = [] + if self._session_id: + lineage_tags.append(f"session:{self._session_id}") + if self._parent_session_id: + lineage_tags.append(f"parent:{self._parent_session_id}") + def _sync(): try: client = self._get_client() @@ -912,15 +1125,16 @@ class HindsightMemoryProvider(MemoryProvider): message_count=len(self._session_turns) * 2, turn_index=self._turn_index, ), + tags=lineage_tags or None, ) item.pop("bank_id", None) item.pop("retain_async", None) logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d", - self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns)) - _run_sync(client.aretain_batch( + self._bank_id, self._document_id, self._retain_async, len(content), len(self._session_turns)) + self._run_sync(client.aretain_batch( bank_id=self._bank_id, items=[item], - document_id=self._session_id, + document_id=self._document_id, retain_async=self._retain_async, )) logger.debug("Hindsight retain succeeded") @@ -957,7 +1171,7 @@ class HindsightMemoryProvider(MemoryProvider): ) logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s", self._bank_id, len(content), context) - _run_sync(client.aretain(**retain_kwargs)) + self._run_sync(client.aretain(**retain_kwargs)) logger.debug("Tool hindsight_retain: success") return json.dumps({"result": "Memory stored successfully."}) except Exception as e: @@ -980,7 +1194,7 @@ class HindsightMemoryProvider(MemoryProvider): recall_kwargs["types"] = self._recall_types logger.debug("Tool hindsight_recall: bank=%s, query_len=%d, budget=%s", self._bank_id, len(query), self._budget) - resp = _run_sync(client.arecall(**recall_kwargs)) + resp = self._run_sync(client.arecall(**recall_kwargs)) num_results = len(resp.results) if resp.results else 0 logger.debug("Tool hindsight_recall: %d results", num_results) if not resp.results: @@ -998,7 +1212,7 @@ class HindsightMemoryProvider(MemoryProvider): try: logger.debug("Tool hindsight_reflect: bank=%s, query_len=%d, budget=%s", self._bank_id, len(query), self._budget) - resp = _run_sync(client.areflect( + resp = self._run_sync(client.areflect( bank_id=self._bank_id, query=query, budget=self._budget )) logger.debug("Tool hindsight_reflect: response_len=%d", len(resp.text or "")) @@ -1011,7 +1225,6 @@ class HindsightMemoryProvider(MemoryProvider): def shutdown(self) -> None: logger.debug("Hindsight shutdown: waiting for background threads") - global _loop, _loop_thread for t in (self._prefetch_thread, self._sync_thread): if t and t.is_alive(): t.join(timeout=5.0) @@ -1026,17 +1239,21 @@ class HindsightMemoryProvider(MemoryProvider): except RuntimeError: pass else: - _run_sync(self._client.aclose()) + self._run_sync(self._client.aclose()) except Exception: pass self._client = None - # Stop the background event loop so no tasks are pending at exit - if _loop is not None and _loop.is_running(): - _loop.call_soon_threadsafe(_loop.stop) - if _loop_thread is not None: - _loop_thread.join(timeout=5.0) - _loop = None - _loop_thread = None + # The module-global background event loop (_loop / _loop_thread) + # is intentionally NOT stopped here. It is shared across every + # HindsightMemoryProvider instance in the process — the plugin + # loader creates a new provider per AIAgent, and the gateway + # creates one AIAgent per concurrent chat session. Stopping the + # loop from one provider's shutdown() strands the aiohttp + # ClientSession + TCPConnector owned by every sibling provider + # on a dead loop, which surfaces as the "Unclosed client session" + # / "Unclosed connector" warnings reported in #11923. The loop + # runs on a daemon thread and is reclaimed on process exit; + # per-session cleanup happens via self._client.aclose() above. def register(ctx) -> None: diff --git a/plugins/spotify/__init__.py b/plugins/spotify/__init__.py new file mode 100644 index 000000000..0f68bba1f --- /dev/null +++ b/plugins/spotify/__init__.py @@ -0,0 +1,66 @@ +"""Spotify integration plugin — bundled, auto-loaded. + +Registers 7 tools (playback, devices, queue, search, playlists, albums, +library) into the ``spotify`` toolset. Each tool's handler is gated by +``_check_spotify_available()`` — when the user has not run ``hermes auth +spotify``, the tools remain registered (so they appear in ``hermes +tools``) but the runtime check prevents dispatch. + +Why a plugin instead of a top-level ``tools/`` file? + +- ``plugins/`` is where third-party service integrations live (see + ``plugins/image_gen/`` for the backend-provider pattern, ``plugins/ + disk-cleanup/`` for the standalone pattern). ``tools/`` is reserved + for foundational capabilities (terminal, read_file, web_search, etc.). +- Mirroring the image_gen plugin layout (``plugins///`` + for categories, flat ``plugins//`` for standalones) makes new + service integrations a pattern contributors can copy. +- Bundled + ``kind: backend`` auto-loads on startup just like image_gen + backends — no user opt-in needed, no ``plugins.enabled`` config. + +The Spotify auth flow (``hermes auth spotify``), CLI plumbing, and docs +are unchanged. This move is purely structural. +""" + +from __future__ import annotations + +from plugins.spotify.tools import ( + SPOTIFY_ALBUMS_SCHEMA, + SPOTIFY_DEVICES_SCHEMA, + SPOTIFY_LIBRARY_SCHEMA, + SPOTIFY_PLAYBACK_SCHEMA, + SPOTIFY_PLAYLISTS_SCHEMA, + SPOTIFY_QUEUE_SCHEMA, + SPOTIFY_SEARCH_SCHEMA, + _check_spotify_available, + _handle_spotify_albums, + _handle_spotify_devices, + _handle_spotify_library, + _handle_spotify_playback, + _handle_spotify_playlists, + _handle_spotify_queue, + _handle_spotify_search, +) + +_TOOLS = ( + ("spotify_playback", SPOTIFY_PLAYBACK_SCHEMA, _handle_spotify_playback, "🎵"), + ("spotify_devices", SPOTIFY_DEVICES_SCHEMA, _handle_spotify_devices, "🔈"), + ("spotify_queue", SPOTIFY_QUEUE_SCHEMA, _handle_spotify_queue, "📻"), + ("spotify_search", SPOTIFY_SEARCH_SCHEMA, _handle_spotify_search, "🔎"), + ("spotify_playlists", SPOTIFY_PLAYLISTS_SCHEMA, _handle_spotify_playlists, "📚"), + ("spotify_albums", SPOTIFY_ALBUMS_SCHEMA, _handle_spotify_albums, "💿"), + ("spotify_library", SPOTIFY_LIBRARY_SCHEMA, _handle_spotify_library, "❤️"), +) + + +def register(ctx) -> None: + """Register all Spotify tools. Called once by the plugin loader.""" + for name, schema, handler, emoji in _TOOLS: + ctx.register_tool( + name=name, + toolset="spotify", + schema=schema, + handler=handler, + check_fn=_check_spotify_available, + emoji=emoji, + ) diff --git a/plugins/spotify/client.py b/plugins/spotify/client.py new file mode 100644 index 000000000..2195cc20a --- /dev/null +++ b/plugins/spotify/client.py @@ -0,0 +1,435 @@ +"""Thin Spotify Web API helper used by Hermes native tools.""" + +from __future__ import annotations + +import json +from typing import Any, Dict, Iterable, Optional +from urllib.parse import urlparse + +import httpx + +from hermes_cli.auth import ( + AuthError, + resolve_spotify_runtime_credentials, +) + + +class SpotifyError(RuntimeError): + """Base Spotify tool error.""" + + +class SpotifyAuthRequiredError(SpotifyError): + """Raised when the user needs to authenticate with Spotify first.""" + + +class SpotifyAPIError(SpotifyError): + """Structured Spotify API failure.""" + + def __init__( + self, + message: str, + *, + status_code: Optional[int] = None, + response_body: Optional[str] = None, + ) -> None: + super().__init__(message) + self.status_code = status_code + self.response_body = response_body + self.path = None + + +class SpotifyClient: + def __init__(self) -> None: + self._runtime = self._resolve_runtime(refresh_if_expiring=True) + + def _resolve_runtime(self, *, force_refresh: bool = False, refresh_if_expiring: bool = True) -> Dict[str, Any]: + try: + return resolve_spotify_runtime_credentials( + force_refresh=force_refresh, + refresh_if_expiring=refresh_if_expiring, + ) + except AuthError as exc: + raise SpotifyAuthRequiredError(str(exc)) from exc + + @property + def base_url(self) -> str: + return str(self._runtime.get("base_url") or "").rstrip("/") + + def _headers(self) -> Dict[str, str]: + return { + "Authorization": f"Bearer {self._runtime['access_token']}", + "Content-Type": "application/json", + } + + def request( + self, + method: str, + path: str, + *, + params: Optional[Dict[str, Any]] = None, + json_body: Optional[Dict[str, Any]] = None, + allow_retry_on_401: bool = True, + empty_response: Optional[Dict[str, Any]] = None, + ) -> Any: + url = f"{self.base_url}{path}" + response = httpx.request( + method, + url, + headers=self._headers(), + params=_strip_none(params), + json=_strip_none(json_body) if json_body is not None else None, + timeout=30.0, + ) + if response.status_code == 401 and allow_retry_on_401: + self._runtime = self._resolve_runtime(force_refresh=True, refresh_if_expiring=True) + return self.request( + method, + path, + params=params, + json_body=json_body, + allow_retry_on_401=False, + ) + if response.status_code >= 400: + self._raise_api_error(response, method=method, path=path) + if response.status_code == 204 or not response.content: + return empty_response or {"success": True, "status_code": response.status_code, "empty": True} + if "application/json" in response.headers.get("content-type", ""): + return response.json() + return {"success": True, "text": response.text} + + def _raise_api_error(self, response: httpx.Response, *, method: str, path: str) -> None: + detail = response.text.strip() + message = _friendly_spotify_error_message( + status_code=response.status_code, + detail=_extract_spotify_error_detail(response, fallback=detail), + method=method, + path=path, + retry_after=response.headers.get("Retry-After"), + ) + error = SpotifyAPIError(message, status_code=response.status_code, response_body=detail) + error.path = path + raise error + + def get_devices(self) -> Any: + return self.request("GET", "/me/player/devices") + + def transfer_playback(self, *, device_id: str, play: bool = False) -> Any: + return self.request("PUT", "/me/player", json_body={ + "device_ids": [device_id], + "play": play, + }) + + def get_playback_state(self, *, market: Optional[str] = None) -> Any: + return self.request( + "GET", + "/me/player", + params={"market": market}, + empty_response={ + "status_code": 204, + "empty": True, + "message": "No active Spotify playback session was found. Open Spotify on a device and start playback, or transfer playback to an available device.", + }, + ) + + def get_currently_playing(self, *, market: Optional[str] = None) -> Any: + return self.request( + "GET", + "/me/player/currently-playing", + params={"market": market}, + empty_response={ + "status_code": 204, + "empty": True, + "message": "Spotify is not currently playing anything. Start playback in Spotify and try again.", + }, + ) + + def start_playback( + self, + *, + device_id: Optional[str] = None, + context_uri: Optional[str] = None, + uris: Optional[list[str]] = None, + offset: Optional[Dict[str, Any]] = None, + position_ms: Optional[int] = None, + ) -> Any: + return self.request( + "PUT", + "/me/player/play", + params={"device_id": device_id}, + json_body={ + "context_uri": context_uri, + "uris": uris, + "offset": offset, + "position_ms": position_ms, + }, + ) + + def pause_playback(self, *, device_id: Optional[str] = None) -> Any: + return self.request("PUT", "/me/player/pause", params={"device_id": device_id}) + + def skip_next(self, *, device_id: Optional[str] = None) -> Any: + return self.request("POST", "/me/player/next", params={"device_id": device_id}) + + def skip_previous(self, *, device_id: Optional[str] = None) -> Any: + return self.request("POST", "/me/player/previous", params={"device_id": device_id}) + + def seek(self, *, position_ms: int, device_id: Optional[str] = None) -> Any: + return self.request("PUT", "/me/player/seek", params={ + "position_ms": position_ms, + "device_id": device_id, + }) + + def set_repeat(self, *, state: str, device_id: Optional[str] = None) -> Any: + return self.request("PUT", "/me/player/repeat", params={"state": state, "device_id": device_id}) + + def set_shuffle(self, *, state: bool, device_id: Optional[str] = None) -> Any: + return self.request("PUT", "/me/player/shuffle", params={"state": str(bool(state)).lower(), "device_id": device_id}) + + def set_volume(self, *, volume_percent: int, device_id: Optional[str] = None) -> Any: + return self.request("PUT", "/me/player/volume", params={ + "volume_percent": volume_percent, + "device_id": device_id, + }) + + def get_queue(self) -> Any: + return self.request("GET", "/me/player/queue") + + def add_to_queue(self, *, uri: str, device_id: Optional[str] = None) -> Any: + return self.request("POST", "/me/player/queue", params={"uri": uri, "device_id": device_id}) + + def search( + self, + *, + query: str, + search_types: list[str], + limit: int = 10, + offset: int = 0, + market: Optional[str] = None, + include_external: Optional[str] = None, + ) -> Any: + return self.request("GET", "/search", params={ + "q": query, + "type": ",".join(search_types), + "limit": limit, + "offset": offset, + "market": market, + "include_external": include_external, + }) + + def get_my_playlists(self, *, limit: int = 20, offset: int = 0) -> Any: + return self.request("GET", "/me/playlists", params={"limit": limit, "offset": offset}) + + def get_playlist(self, *, playlist_id: str, market: Optional[str] = None) -> Any: + return self.request("GET", f"/playlists/{playlist_id}", params={"market": market}) + + def create_playlist( + self, + *, + name: str, + public: bool = False, + collaborative: bool = False, + description: Optional[str] = None, + ) -> Any: + return self.request("POST", "/me/playlists", json_body={ + "name": name, + "public": public, + "collaborative": collaborative, + "description": description, + }) + + def add_playlist_items( + self, + *, + playlist_id: str, + uris: list[str], + position: Optional[int] = None, + ) -> Any: + return self.request("POST", f"/playlists/{playlist_id}/items", json_body={ + "uris": uris, + "position": position, + }) + + def remove_playlist_items( + self, + *, + playlist_id: str, + uris: list[str], + snapshot_id: Optional[str] = None, + ) -> Any: + return self.request("DELETE", f"/playlists/{playlist_id}/items", json_body={ + "items": [{"uri": uri} for uri in uris], + "snapshot_id": snapshot_id, + }) + + def update_playlist_details( + self, + *, + playlist_id: str, + name: Optional[str] = None, + public: Optional[bool] = None, + collaborative: Optional[bool] = None, + description: Optional[str] = None, + ) -> Any: + return self.request("PUT", f"/playlists/{playlist_id}", json_body={ + "name": name, + "public": public, + "collaborative": collaborative, + "description": description, + }) + + def get_album(self, *, album_id: str, market: Optional[str] = None) -> Any: + return self.request("GET", f"/albums/{album_id}", params={"market": market}) + + def get_album_tracks(self, *, album_id: str, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any: + return self.request("GET", f"/albums/{album_id}/tracks", params={ + "limit": limit, + "offset": offset, + "market": market, + }) + + def get_saved_tracks(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any: + return self.request("GET", "/me/tracks", params={"limit": limit, "offset": offset, "market": market}) + + def save_library_items(self, *, uris: list[str]) -> Any: + return self.request("PUT", "/me/library", params={"uris": ",".join(uris)}) + + def library_contains(self, *, uris: list[str]) -> Any: + return self.request("GET", "/me/library/contains", params={"uris": ",".join(uris)}) + + def get_saved_albums(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any: + return self.request("GET", "/me/albums", params={"limit": limit, "offset": offset, "market": market}) + + def remove_saved_tracks(self, *, track_ids: list[str]) -> Any: + uris = [f"spotify:track:{track_id}" for track_id in track_ids] + return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)}) + + def remove_saved_albums(self, *, album_ids: list[str]) -> Any: + uris = [f"spotify:album:{album_id}" for album_id in album_ids] + return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)}) + + def get_recently_played( + self, + *, + limit: int = 20, + after: Optional[int] = None, + before: Optional[int] = None, + ) -> Any: + return self.request("GET", "/me/player/recently-played", params={ + "limit": limit, + "after": after, + "before": before, + }) + + +def _extract_spotify_error_detail(response: httpx.Response, *, fallback: str) -> str: + detail = fallback + try: + payload = response.json() + if isinstance(payload, dict): + error_obj = payload.get("error") + if isinstance(error_obj, dict): + detail = str(error_obj.get("message") or detail) + elif isinstance(error_obj, str): + detail = error_obj + except Exception: + pass + return detail.strip() + + +def _friendly_spotify_error_message( + *, + status_code: int, + detail: str, + method: str, + path: str, + retry_after: Optional[str], +) -> str: + normalized_detail = detail.lower() + is_playback_path = path.startswith("/me/player") + + if status_code == 401: + return "Spotify authentication failed or expired. Run `hermes auth spotify` again." + + if status_code == 403: + if is_playback_path: + return ( + "Spotify rejected this playback request. Playback control usually requires a Spotify Premium account " + "and an active Spotify Connect device." + ) + if "scope" in normalized_detail or "permission" in normalized_detail: + return "Spotify rejected the request because the current auth scope is insufficient. Re-run `hermes auth spotify` to refresh permissions." + return "Spotify rejected the request. The account may not have permission for this action." + + if status_code == 404: + if is_playback_path: + return "Spotify could not find an active playback device or player session for this request." + return "Spotify resource not found." + + if status_code == 429: + message = "Spotify rate limit exceeded." + if retry_after: + message += f" Retry after {retry_after} seconds." + return message + + if detail: + return detail + return f"Spotify API request failed with status {status_code}." + + +def _strip_none(payload: Optional[Dict[str, Any]]) -> Dict[str, Any]: + if not payload: + return {} + return {key: value for key, value in payload.items() if value is not None} + + +def normalize_spotify_id(value: str, expected_type: Optional[str] = None) -> str: + cleaned = (value or "").strip() + if not cleaned: + raise SpotifyError("Spotify id/uri/url is required.") + if cleaned.startswith("spotify:"): + parts = cleaned.split(":") + if len(parts) >= 3: + item_type = parts[1] + if expected_type and item_type != expected_type: + raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.") + return parts[2] + if "open.spotify.com" in cleaned: + parsed = urlparse(cleaned) + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) >= 2: + item_type, item_id = path_parts[0], path_parts[1] + if expected_type and item_type != expected_type: + raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.") + return item_id + return cleaned + + +def normalize_spotify_uri(value: str, expected_type: Optional[str] = None) -> str: + cleaned = (value or "").strip() + if not cleaned: + raise SpotifyError("Spotify URI/url/id is required.") + if cleaned.startswith("spotify:"): + if expected_type: + parts = cleaned.split(":") + if len(parts) >= 3 and parts[1] != expected_type: + raise SpotifyError(f"Expected a Spotify {expected_type}, got {parts[1]}.") + return cleaned + item_id = normalize_spotify_id(cleaned, expected_type) + if expected_type: + return f"spotify:{expected_type}:{item_id}" + return cleaned + + +def normalize_spotify_uris(values: Iterable[str], expected_type: Optional[str] = None) -> list[str]: + uris: list[str] = [] + for value in values: + uri = normalize_spotify_uri(str(value), expected_type) + if uri not in uris: + uris.append(uri) + if not uris: + raise SpotifyError("At least one Spotify item is required.") + return uris + + +def compact_json(data: Any) -> str: + return json.dumps(data, ensure_ascii=False) diff --git a/plugins/spotify/plugin.yaml b/plugins/spotify/plugin.yaml new file mode 100644 index 000000000..e9e1283e7 --- /dev/null +++ b/plugins/spotify/plugin.yaml @@ -0,0 +1,13 @@ +name: spotify +version: 1.0.0 +description: "Native Spotify integration — 7 tools (playback, devices, queue, search, playlists, albums, library) using Spotify Web API + PKCE OAuth. Auth via `hermes auth spotify`. Tools gate on `providers.spotify` in ~/.hermes/auth.json." +author: NousResearch +kind: backend +provides_tools: + - spotify_playback + - spotify_devices + - spotify_queue + - spotify_search + - spotify_playlists + - spotify_albums + - spotify_library diff --git a/plugins/spotify/tools.py b/plugins/spotify/tools.py new file mode 100644 index 000000000..f6022ff5a --- /dev/null +++ b/plugins/spotify/tools.py @@ -0,0 +1,454 @@ +"""Native Spotify tools for Hermes (registered via plugins/spotify).""" + +from __future__ import annotations + +from typing import Any, Dict, List + +from hermes_cli.auth import get_auth_status +from plugins.spotify.client import ( + SpotifyAPIError, + SpotifyAuthRequiredError, + SpotifyClient, + SpotifyError, + normalize_spotify_id, + normalize_spotify_uri, + normalize_spotify_uris, +) +from tools.registry import tool_error, tool_result + + +def _check_spotify_available() -> bool: + try: + return bool(get_auth_status("spotify").get("logged_in")) + except Exception: + return False + + +def _spotify_client() -> SpotifyClient: + return SpotifyClient() + + +def _spotify_tool_error(exc: Exception) -> str: + if isinstance(exc, (SpotifyError, SpotifyAuthRequiredError)): + return tool_error(str(exc)) + if isinstance(exc, SpotifyAPIError): + return tool_error(str(exc), status_code=exc.status_code) + return tool_error(f"Spotify tool failed: {type(exc).__name__}: {exc}") + + +def _coerce_limit(raw: Any, *, default: int = 20, minimum: int = 1, maximum: int = 50) -> int: + try: + value = int(raw) + except Exception: + value = default + return max(minimum, min(maximum, value)) + + +def _coerce_bool(raw: Any, default: bool = False) -> bool: + if isinstance(raw, bool): + return raw + if isinstance(raw, str): + cleaned = raw.strip().lower() + if cleaned in {"1", "true", "yes", "on"}: + return True + if cleaned in {"0", "false", "no", "off"}: + return False + return default + + +def _as_list(raw: Any) -> List[str]: + if raw is None: + return [] + if isinstance(raw, list): + return [str(item).strip() for item in raw if str(item).strip()] + return [str(raw).strip()] if str(raw).strip() else [] + + +def _describe_empty_playback(payload: Any, *, action: str) -> dict | None: + if not isinstance(payload, dict) or not payload.get("empty"): + return None + if action == "get_currently_playing": + return { + "success": True, + "action": action, + "is_playing": False, + "status_code": payload.get("status_code", 204), + "message": payload.get("message") or "Spotify is not currently playing anything.", + } + if action == "get_state": + return { + "success": True, + "action": action, + "has_active_device": False, + "status_code": payload.get("status_code", 204), + "message": payload.get("message") or "No active Spotify playback session was found.", + } + return None + + +def _handle_spotify_playback(args: dict, **kw) -> str: + action = str(args.get("action") or "get_state").strip().lower() + client = _spotify_client() + try: + if action == "get_state": + payload = client.get_playback_state(market=args.get("market")) + empty_result = _describe_empty_playback(payload, action=action) + return tool_result(empty_result or payload) + if action == "get_currently_playing": + payload = client.get_currently_playing(market=args.get("market")) + empty_result = _describe_empty_playback(payload, action=action) + return tool_result(empty_result or payload) + if action == "play": + offset = args.get("offset") + if isinstance(offset, dict): + payload_offset = {k: v for k, v in offset.items() if v is not None} + else: + payload_offset = None + uris = normalize_spotify_uris(_as_list(args.get("uris")), "track") if args.get("uris") else None + context_uri = None + if args.get("context_uri"): + raw_context = str(args.get("context_uri")) + context_type = None + if raw_context.startswith("spotify:album:") or "/album/" in raw_context: + context_type = "album" + elif raw_context.startswith("spotify:playlist:") or "/playlist/" in raw_context: + context_type = "playlist" + elif raw_context.startswith("spotify:artist:") or "/artist/" in raw_context: + context_type = "artist" + context_uri = normalize_spotify_uri(raw_context, context_type) + result = client.start_playback( + device_id=args.get("device_id"), + context_uri=context_uri, + uris=uris, + offset=payload_offset, + position_ms=args.get("position_ms"), + ) + return tool_result({"success": True, "action": action, "result": result}) + if action == "pause": + result = client.pause_playback(device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "next": + result = client.skip_next(device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "previous": + result = client.skip_previous(device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "seek": + if args.get("position_ms") is None: + return tool_error("position_ms is required for action='seek'") + result = client.seek(position_ms=int(args["position_ms"]), device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "set_repeat": + state = str(args.get("state") or "").strip().lower() + if state not in {"track", "context", "off"}: + return tool_error("state must be one of: track, context, off") + result = client.set_repeat(state=state, device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "set_shuffle": + result = client.set_shuffle(state=_coerce_bool(args.get("state")), device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "set_volume": + if args.get("volume_percent") is None: + return tool_error("volume_percent is required for action='set_volume'") + result = client.set_volume(volume_percent=max(0, min(100, int(args["volume_percent"]))), device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "result": result}) + if action == "recently_played": + after = args.get("after") + before = args.get("before") + if after and before: + return tool_error("Provide only one of 'after' or 'before'") + return tool_result(client.get_recently_played( + limit=_coerce_limit(args.get("limit"), default=20), + after=int(after) if after is not None else None, + before=int(before) if before is not None else None, + )) + return tool_error(f"Unknown spotify_playback action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_devices(args: dict, **kw) -> str: + action = str(args.get("action") or "list").strip().lower() + client = _spotify_client() + try: + if action == "list": + return tool_result(client.get_devices()) + if action == "transfer": + device_id = str(args.get("device_id") or "").strip() + if not device_id: + return tool_error("device_id is required for action='transfer'") + result = client.transfer_playback(device_id=device_id, play=_coerce_bool(args.get("play"))) + return tool_result({"success": True, "action": action, "result": result}) + return tool_error(f"Unknown spotify_devices action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_queue(args: dict, **kw) -> str: + action = str(args.get("action") or "get").strip().lower() + client = _spotify_client() + try: + if action == "get": + return tool_result(client.get_queue()) + if action == "add": + uri = normalize_spotify_uri(str(args.get("uri") or ""), None) + result = client.add_to_queue(uri=uri, device_id=args.get("device_id")) + return tool_result({"success": True, "action": action, "uri": uri, "result": result}) + return tool_error(f"Unknown spotify_queue action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_search(args: dict, **kw) -> str: + client = _spotify_client() + query = str(args.get("query") or "").strip() + if not query: + return tool_error("query is required") + raw_types = _as_list(args.get("types") or args.get("type") or ["track"]) + search_types = [value.lower() for value in raw_types if value.lower() in {"album", "artist", "playlist", "track", "show", "episode", "audiobook"}] + if not search_types: + return tool_error("types must contain one or more of: album, artist, playlist, track, show, episode, audiobook") + try: + return tool_result(client.search( + query=query, + search_types=search_types, + limit=_coerce_limit(args.get("limit"), default=10), + offset=max(0, int(args.get("offset") or 0)), + market=args.get("market"), + include_external=args.get("include_external"), + )) + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_playlists(args: dict, **kw) -> str: + action = str(args.get("action") or "list").strip().lower() + client = _spotify_client() + try: + if action == "list": + return tool_result(client.get_my_playlists( + limit=_coerce_limit(args.get("limit"), default=20), + offset=max(0, int(args.get("offset") or 0)), + )) + if action == "get": + playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist") + return tool_result(client.get_playlist(playlist_id=playlist_id, market=args.get("market"))) + if action == "create": + name = str(args.get("name") or "").strip() + if not name: + return tool_error("name is required for action='create'") + return tool_result(client.create_playlist( + name=name, + public=_coerce_bool(args.get("public")), + collaborative=_coerce_bool(args.get("collaborative")), + description=args.get("description"), + )) + if action == "add_items": + playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist") + uris = normalize_spotify_uris(_as_list(args.get("uris"))) + return tool_result(client.add_playlist_items( + playlist_id=playlist_id, + uris=uris, + position=args.get("position"), + )) + if action == "remove_items": + playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist") + uris = normalize_spotify_uris(_as_list(args.get("uris"))) + return tool_result(client.remove_playlist_items( + playlist_id=playlist_id, + uris=uris, + snapshot_id=args.get("snapshot_id"), + )) + if action == "update_details": + playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist") + return tool_result(client.update_playlist_details( + playlist_id=playlist_id, + name=args.get("name"), + public=args.get("public"), + collaborative=args.get("collaborative"), + description=args.get("description"), + )) + return tool_error(f"Unknown spotify_playlists action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_albums(args: dict, **kw) -> str: + action = str(args.get("action") or "get").strip().lower() + client = _spotify_client() + try: + album_id = normalize_spotify_id(str(args.get("album_id") or args.get("id") or ""), "album") + if action == "get": + return tool_result(client.get_album(album_id=album_id, market=args.get("market"))) + if action == "tracks": + return tool_result(client.get_album_tracks( + album_id=album_id, + limit=_coerce_limit(args.get("limit"), default=20), + offset=max(0, int(args.get("offset") or 0)), + market=args.get("market"), + )) + return tool_error(f"Unknown spotify_albums action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +def _handle_spotify_library(args: dict, **kw) -> str: + """Unified handler for saved tracks + saved albums (formerly two tools).""" + kind = str(args.get("kind") or "").strip().lower() + if kind not in {"tracks", "albums"}: + return tool_error("kind must be one of: tracks, albums") + action = str(args.get("action") or "list").strip().lower() + item_type = "track" if kind == "tracks" else "album" + client = _spotify_client() + try: + if action == "list": + limit = _coerce_limit(args.get("limit"), default=20) + offset = max(0, int(args.get("offset") or 0)) + market = args.get("market") + if kind == "tracks": + return tool_result(client.get_saved_tracks(limit=limit, offset=offset, market=market)) + return tool_result(client.get_saved_albums(limit=limit, offset=offset, market=market)) + if action == "save": + uris = normalize_spotify_uris(_as_list(args.get("uris") or args.get("items")), item_type) + return tool_result(client.save_library_items(uris=uris)) + if action == "remove": + ids = [normalize_spotify_id(item, item_type) for item in _as_list(args.get("ids") or args.get("items"))] + if not ids: + return tool_error("ids/items is required for action='remove'") + if kind == "tracks": + return tool_result(client.remove_saved_tracks(track_ids=ids)) + return tool_result(client.remove_saved_albums(album_ids=ids)) + return tool_error(f"Unknown spotify_library action: {action}") + except Exception as exc: + return _spotify_tool_error(exc) + + +COMMON_STRING = {"type": "string"} + +SPOTIFY_PLAYBACK_SCHEMA = { + "name": "spotify_playback", + "description": "Control Spotify playback, inspect the active playback state, or fetch recently played tracks.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["get_state", "get_currently_playing", "play", "pause", "next", "previous", "seek", "set_repeat", "set_shuffle", "set_volume", "recently_played"]}, + "device_id": COMMON_STRING, + "market": COMMON_STRING, + "context_uri": COMMON_STRING, + "uris": {"type": "array", "items": COMMON_STRING}, + "offset": {"type": "object"}, + "position_ms": {"type": "integer"}, + "state": {"description": "For set_repeat use track/context/off. For set_shuffle use boolean-like true/false.", "oneOf": [{"type": "string"}, {"type": "boolean"}]}, + "volume_percent": {"type": "integer"}, + "limit": {"type": "integer", "description": "For recently_played: number of tracks (max 50)"}, + "after": {"type": "integer", "description": "For recently_played: Unix ms cursor (after this timestamp)"}, + "before": {"type": "integer", "description": "For recently_played: Unix ms cursor (before this timestamp)"}, + }, + "required": ["action"], + }, +} + +SPOTIFY_DEVICES_SCHEMA = { + "name": "spotify_devices", + "description": "List Spotify Connect devices or transfer playback to a different device.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["list", "transfer"]}, + "device_id": COMMON_STRING, + "play": {"type": "boolean"}, + }, + "required": ["action"], + }, +} + +SPOTIFY_QUEUE_SCHEMA = { + "name": "spotify_queue", + "description": "Inspect the user's Spotify queue or add an item to it.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["get", "add"]}, + "uri": COMMON_STRING, + "device_id": COMMON_STRING, + }, + "required": ["action"], + }, +} + +SPOTIFY_SEARCH_SCHEMA = { + "name": "spotify_search", + "description": "Search the Spotify catalog for tracks, albums, artists, playlists, shows, or episodes.", + "parameters": { + "type": "object", + "properties": { + "query": COMMON_STRING, + "types": {"type": "array", "items": COMMON_STRING}, + "type": COMMON_STRING, + "limit": {"type": "integer"}, + "offset": {"type": "integer"}, + "market": COMMON_STRING, + "include_external": COMMON_STRING, + }, + "required": ["query"], + }, +} + +SPOTIFY_PLAYLISTS_SCHEMA = { + "name": "spotify_playlists", + "description": "List, inspect, create, update, and modify Spotify playlists.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["list", "get", "create", "add_items", "remove_items", "update_details"]}, + "playlist_id": COMMON_STRING, + "market": COMMON_STRING, + "limit": {"type": "integer"}, + "offset": {"type": "integer"}, + "name": COMMON_STRING, + "description": COMMON_STRING, + "public": {"type": "boolean"}, + "collaborative": {"type": "boolean"}, + "uris": {"type": "array", "items": COMMON_STRING}, + "position": {"type": "integer"}, + "snapshot_id": COMMON_STRING, + }, + "required": ["action"], + }, +} + +SPOTIFY_ALBUMS_SCHEMA = { + "name": "spotify_albums", + "description": "Fetch Spotify album metadata or album tracks.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["get", "tracks"]}, + "album_id": COMMON_STRING, + "id": COMMON_STRING, + "market": COMMON_STRING, + "limit": {"type": "integer"}, + "offset": {"type": "integer"}, + }, + "required": ["action"], + }, +} + +SPOTIFY_LIBRARY_SCHEMA = { + "name": "spotify_library", + "description": "List, save, or remove the user's saved Spotify tracks or albums. Use `kind` to select which.", + "parameters": { + "type": "object", + "properties": { + "kind": {"type": "string", "enum": ["tracks", "albums"], "description": "Which library to operate on"}, + "action": {"type": "string", "enum": ["list", "save", "remove"]}, + "limit": {"type": "integer"}, + "offset": {"type": "integer"}, + "market": COMMON_STRING, + "uris": {"type": "array", "items": COMMON_STRING}, + "ids": {"type": "array", "items": COMMON_STRING}, + "items": {"type": "array", "items": COMMON_STRING}, + }, + "required": ["kind", "action"], + }, +} diff --git a/pyproject.toml b/pyproject.toml index 2b76537fc..4b7e8816a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,6 +78,16 @@ termux = [ ] dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"] feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"] +google = [ + # Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts, + # Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with + # the [all] extra and users don't hit runtime `pip install` paths that fail + # in environments without pip (e.g. Nix-managed Python). + "google-api-python-client>=2.100,<3", + "google-auth-oauthlib>=1.0,<2", + "google-auth-httplib2>=0.2,<1", +] +# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean. web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"] rl = [ "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30", @@ -109,6 +119,7 @@ all = [ "hermes-agent[voice]", "hermes-agent[dingtalk]", "hermes-agent[feishu]", + "hermes-agent[google]", "hermes-agent[mistral]", "hermes-agent[bedrock]", "hermes-agent[web]", diff --git a/run_agent.py b/run_agent.py index affcbbd72..f7a929118 100644 --- a/run_agent.py +++ b/run_agent.py @@ -31,11 +31,13 @@ logger = logging.getLogger(__name__) import os import random import re +import ssl import sys import tempfile import time import threading from types import SimpleNamespace +import urllib.request import uuid from typing import List, Dict, Any, Optional from openai import OpenAI @@ -181,6 +183,25 @@ def _get_proxy_from_env() -> Optional[str]: return None +def _get_proxy_for_base_url(base_url: Optional[str]) -> Optional[str]: + """Return an env-configured proxy unless NO_PROXY excludes this base URL.""" + proxy = _get_proxy_from_env() + if not proxy or not base_url: + return proxy + + host = base_url_hostname(base_url) + if not host: + return proxy + + try: + if urllib.request.proxy_bypass_environment(host): + return None + except Exception: + pass + + return proxy + + def _install_safe_stdio() -> None: """Wrap stdout/stderr so best-effort console output cannot crash the agent.""" for stream_name in ("stdout", "stderr"): @@ -481,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool: return found +def _escape_invalid_chars_in_json_strings(raw: str) -> str: + """Escape unescaped control chars inside JSON string values. + + Walks the raw JSON character-by-character, tracking whether we are + inside a double-quoted string. Inside strings, replaces literal + control characters (0x00-0x1F) that aren't already part of an escape + sequence with their ``\\uXXXX`` equivalents. Pass-through for everything + else. + + Ported from #12093 — complements the other repair passes in + ``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is + not enough (e.g. llama.cpp backends that emit literal apostrophes or + tabs alongside other malformations). + """ + out: list[str] = [] + in_string = False + i = 0 + n = len(raw) + while i < n: + ch = raw[i] + if in_string: + if ch == "\\" and i + 1 < n: + # Already-escaped char — pass through as-is + out.append(ch) + out.append(raw[i + 1]) + i += 2 + continue + if ch == '"': + in_string = False + out.append(ch) + elif ord(ch) < 0x20: + out.append(f"\\u{ord(ch):04x}") + else: + out.append(ch) + else: + if ch == '"': + in_string = True + out.append(ch) + i += 1 + return "".join(out) + + def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: """Attempt to repair malformed tool_call argument JSON. @@ -502,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name) return "{}" + # Repair pass 0: llama.cpp backends sometimes emit literal control + # characters (tabs, newlines) inside JSON string values. json.loads + # with strict=False accepts these and lets us re-serialise the + # result into wire-valid JSON without any string surgery. This is + # the most common local-model repair case (#12068). + try: + parsed = json.loads(raw_stripped, strict=False) + reserialised = json.dumps(parsed, separators=(",", ":")) + if reserialised != raw_stripped: + logger.warning( + "Repaired unescaped control chars in tool_call arguments for %s", + tool_name, + ) + return reserialised + except (json.JSONDecodeError, TypeError, ValueError): + pass + # Attempt common JSON repairs fixed = raw_stripped # 1. Strip trailing commas before } or ] @@ -536,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str: except json.JSONDecodeError: pass + # Repair pass 4: escape unescaped control chars inside JSON strings, + # then retry. Catches cases where strict=False alone fails because + # other malformations are present too. + try: + escaped = _escape_invalid_chars_in_json_strings(fixed) + if escaped != fixed: + json.loads(escaped) + logger.warning( + "Repaired control-char-laced tool_call arguments for %s: %s → %s", + tool_name, raw_stripped[:80], escaped[:80], + ) + return escaped + except (json.JSONDecodeError, TypeError, ValueError): + pass + # Last resort: replace with empty object so the API request doesn't # crash the entire session. logger.warning( @@ -664,6 +759,40 @@ def _sanitize_structure_non_ascii(payload: Any) -> bool: _QWEN_CODE_VERSION = "0.14.1" +def _routermint_headers() -> dict: + """Return the User-Agent RouterMint needs to avoid Cloudflare 1010 blocks.""" + from hermes_cli import __version__ as _HERMES_VERSION + + return { + "User-Agent": f"HermesAgent/{_HERMES_VERSION}", + } + + +def _pool_may_recover_from_rate_limit(pool) -> bool: + """Decide whether to wait for credential-pool rotation instead of falling back. + + The existing pool-rotation path requires the pool to (1) exist and (2) have + at least one entry not currently in exhaustion cooldown. But rotation is + only meaningful when the pool has more than one entry. + + With a single-credential pool (common for Gemini OAuth, Vertex service + accounts, and any "one personal key" configuration), the primary entry + just 429'd and there is nothing to rotate to. Waiting for the pool + cooldown to expire means retrying against the same exhausted quota — the + daily-quota 429 will recur immediately, and the retry budget is burned. + + In that case we must fall back to the configured ``fallback_model`` + instead. Returns True only when rotation has somewhere to go. + + See issue #11314. + """ + if pool is None: + return False + if not pool.has_available(): + return False + return len(pool.entries()) > 1 + + def _qwen_portal_headers() -> dict: """Return default HTTP headers required by Qwen Portal API.""" import platform as _plat @@ -685,6 +814,11 @@ class AIAgent: for AI models that support function calling. """ + _TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER = ( + "[hermes-agent: tool call arguments were corrupted in this session and " + "have been dropped to keep the conversation alive. See issue #15236.]" + ) + @property def base_url(self) -> str: return self._base_url @@ -1007,8 +1141,21 @@ class AIAgent: self._use_prompt_caching, self._use_native_cache_layout = ( self._anthropic_prompt_cache_policy() ) - self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) - + # Anthropic supports "5m" (default) and "1h" cache TTL tiers. Read from + # config.yaml under prompt_caching.cache_ttl; unknown values keep "5m". + # 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long + # sessions with >5-minute pauses between turns (#14971). + self._cache_ttl = "5m" + try: + from hermes_cli.config import load_config as _load_pc_cfg + + _pc_cfg = _load_pc_cfg().get("prompt_caching", {}) or {} + _ttl = _pc_cfg.get("cache_ttl", "5m") + if _ttl in ("5m", "1h"): + self._cache_ttl = _ttl + except Exception: + pass + # Iteration budget: the LLM is only notified when it actually exhausts # the iteration budget (api_call_count >= max_iterations). At that # point we inject ONE message, allow one final API call, and if the @@ -1180,6 +1327,8 @@ class AIAgent: "X-OpenRouter-Title": "Hermes Agent", "X-OpenRouter-Categories": "productivity,cli-agent", } + elif base_url_host_matches(effective_base, "api.routermint.com"): + client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(effective_base, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers @@ -1367,6 +1516,8 @@ class AIAgent: # Track conversation messages for session logging self._session_messages: List[Dict[str, Any]] = [] + self._memory_write_origin = "assistant_tool" + self._memory_write_context = "foreground" # Cached system prompt -- built once per session, only rebuilt on compression self._cached_system_prompt: Optional[str] = None @@ -2049,12 +2200,14 @@ class AIAgent: # ("switched to anthropic, tui keeps trying openrouter"). old_norm = (old_provider or "").strip().lower() new_norm = (new_provider or "").strip().lower() + fallback_chain = list(getattr(self, "_fallback_chain", []) or []) if old_norm and new_norm and old_norm != new_norm: - self._fallback_chain = [ - entry for entry in self._fallback_chain + fallback_chain = [ + entry for entry in fallback_chain if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm} ] - self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None + self._fallback_chain = fallback_chain + self._fallback_model = fallback_chain[0] if fallback_chain else None logging.info( "Model switched in-place: %s (%s) -> %s (%s)", @@ -2159,6 +2312,34 @@ class AIAgent: except Exception: logger.debug("status_callback error in _emit_status", exc_info=True) + def _emit_warning(self, message: str) -> None: + """Emit a user-visible warning through the same status plumbing. + + Unlike debug logs, these warnings are meant for degraded side paths + such as auxiliary compression or memory flushes where the main turn can + continue but the user needs to know something important failed. + """ + try: + self._vprint(f"{self.log_prefix}{message}", force=True) + except Exception: + pass + if self.status_callback: + try: + self.status_callback("warn", message) + except Exception: + logger.debug("status_callback error in _emit_warning", exc_info=True) + + def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None: + """Surface a compact warning for failed auxiliary work.""" + try: + detail = self._summarize_api_error(exc) + except Exception: + detail = str(exc) + detail = (detail or exc.__class__.__name__).strip() + if len(detail) > 220: + detail = detail[:217].rstrip() + "..." + self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}") + def _current_main_runtime(self) -> Dict[str, str]: """Return the live main runtime for session-scoped auxiliary routing.""" return { @@ -2876,6 +3057,69 @@ class AIAgent: "If nothing stands out, just say 'Nothing to save.' and stop." ) + @staticmethod + def _summarize_background_review_actions( + review_messages: List[Dict], + prior_snapshot: List[Dict], + ) -> List[str]: + """Build the human-facing action summary for a background review pass. + + Walks the review agent's session messages and collects "successful tool + action" descriptions to surface to the user (e.g. "Memory updated"). + Tool messages already present in ``prior_snapshot`` are skipped so we + don't re-surface stale results from the prior conversation that the + review agent inherited via ``conversation_history`` (issue #14944). + + Matching is by ``tool_call_id`` when available, with a content-equality + fallback for tool messages that lack one. + """ + existing_tool_call_ids = set() + existing_tool_contents = set() + for prior in prior_snapshot or []: + if not isinstance(prior, dict) or prior.get("role") != "tool": + continue + tcid = prior.get("tool_call_id") + if tcid: + existing_tool_call_ids.add(tcid) + else: + content = prior.get("content") + if isinstance(content, str): + existing_tool_contents.add(content) + + actions: List[str] = [] + for msg in review_messages or []: + if not isinstance(msg, dict) or msg.get("role") != "tool": + continue + tcid = msg.get("tool_call_id") + if tcid and tcid in existing_tool_call_ids: + continue + if not tcid: + content_str = msg.get("content") + if isinstance(content_str, str) and content_str in existing_tool_contents: + continue + try: + data = json.loads(msg.get("content", "{}")) + except (json.JSONDecodeError, TypeError): + continue + if not isinstance(data, dict) or not data.get("success"): + continue + message = data.get("message", "") + target = data.get("target", "") + if "created" in message.lower(): + actions.append(message) + elif "updated" in message.lower(): + actions.append(message) + elif "added" in message.lower() or (target and "add" in message.lower()): + label = "Memory" if target == "memory" else "User profile" if target == "user" else target + actions.append(f"{label} updated") + elif "Entry added" in message: + label = "Memory" if target == "memory" else "User profile" if target == "user" else target + actions.append(f"{label} updated") + elif "removed" in message.lower() or "replaced" in message.lower(): + label = "Memory" if target == "memory" else "User profile" if target == "user" else target + actions.append(f"{label} updated") + return actions + def _spawn_background_review( self, messages_snapshot: List[Dict], @@ -2912,7 +3156,10 @@ class AIAgent: quiet_mode=True, platform=self.platform, provider=self.provider, + parent_session_id=self.session_id, ) + review_agent._memory_write_origin = "background_review" + review_agent._memory_write_context = "background_review" review_agent._memory_store = self._memory_store review_agent._memory_enabled = self._memory_enabled review_agent._user_profile_enabled = self._user_profile_enabled @@ -2925,32 +3172,15 @@ class AIAgent: ) # Scan the review agent's messages for successful tool actions - # and surface a compact summary to the user. - actions = [] - for msg in getattr(review_agent, "_session_messages", []): - if not isinstance(msg, dict) or msg.get("role") != "tool": - continue - try: - data = json.loads(msg.get("content", "{}")) - except (json.JSONDecodeError, TypeError): - continue - if not data.get("success"): - continue - message = data.get("message", "") - target = data.get("target", "") - if "created" in message.lower(): - actions.append(message) - elif "updated" in message.lower(): - actions.append(message) - elif "added" in message.lower() or (target and "add" in message.lower()): - label = "Memory" if target == "memory" else "User profile" if target == "user" else target - actions.append(f"{label} updated") - elif "Entry added" in message: - label = "Memory" if target == "memory" else "User profile" if target == "user" else target - actions.append(f"{label} updated") - elif "removed" in message.lower() or "replaced" in message.lower(): - label = "Memory" if target == "memory" else "User profile" if target == "user" else target - actions.append(f"{label} updated") + # and surface a compact summary to the user. Tool messages + # already present in messages_snapshot must be skipped, since + # the review agent inherits that history and would otherwise + # re-surface stale "created"/"updated" messages from the prior + # conversation as if they just happened (issue #14944). + actions = self._summarize_background_review_actions( + getattr(review_agent, "_session_messages", []), + messages_snapshot, + ) if actions: summary = " · ".join(dict.fromkeys(actions)) @@ -2963,7 +3193,8 @@ class AIAgent: pass except Exception as e: - logger.debug("Background memory/skill review failed: %s", e) + logger.warning("Background memory/skill review failed: %s", e) + self._emit_auxiliary_failure("background review", e) finally: # Close all resources (httpx client, subprocesses, etc.) so # GC doesn't try to clean them up on a dead asyncio event @@ -2977,6 +3208,32 @@ class AIAgent: t = threading.Thread(target=_run_review, daemon=True, name="bg-review") t.start() + def _build_memory_write_metadata( + self, + *, + write_origin: Optional[str] = None, + execution_context: Optional[str] = None, + task_id: Optional[str] = None, + tool_call_id: Optional[str] = None, + ) -> Dict[str, Any]: + """Build provenance metadata for external memory-provider mirrors.""" + metadata: Dict[str, Any] = { + "write_origin": write_origin or getattr(self, "_memory_write_origin", "assistant_tool"), + "execution_context": ( + execution_context + or getattr(self, "_memory_write_context", "foreground") + ), + "session_id": self.session_id or "", + "parent_session_id": self._parent_session_id or "", + "platform": self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"), + "tool_name": "memory", + } + if task_id: + metadata["task_id"] = task_id + if tool_call_id: + metadata["tool_call_id"] = tool_call_id + return {k: v for k, v in metadata.items() if v not in (None, "")} + def _apply_persist_user_message_override(self, messages: List[Dict]) -> None: """Rewrite the current-turn user message before persistence/return. @@ -3905,6 +4162,49 @@ class AIAgent: except Exception: pass + def _sync_external_memory_for_turn( + self, + *, + original_user_message: Any, + final_response: Any, + interrupted: bool, + ) -> None: + """Mirror a completed turn into external memory providers. + + Called at the end of ``run_conversation`` with the cleaned user + message (``original_user_message``) and the finalised assistant + response. The external memory backend gets both ``sync_all`` (to + persist the exchange) and ``queue_prefetch_all`` (to start + warming context for the next turn) in one shot. + + Uses ``original_user_message`` rather than ``user_message`` + because the latter may carry injected skill content that bloats + or breaks provider queries. + + Interrupted turns are skipped entirely (#15218). A partial + assistant output, an aborted tool chain, or a mid-stream reset + is not durable conversational truth — mirroring it into an + external memory backend pollutes future recall with state the + user never saw completed. The prefetch is gated on the same + flag: the user's next message is almost certainly a retry of + the same intent, and a prefetch keyed on the interrupted turn + would fire against stale context. + + Normal completed turns still sync as before. The whole body is + wrapped in ``try/except Exception`` because external memory + providers are strictly best-effort — a misconfigured or offline + backend must not block the user from seeing their response. + """ + if interrupted: + return + if not (self._memory_manager and final_response and original_user_message): + return + try: + self._memory_manager.sync_all(original_user_message, final_response) + self._memory_manager.queue_prefetch_all(original_user_message) + except Exception: + pass + def release_clients(self) -> None: """Release LLM client resources WITHOUT tearing down session tool state. @@ -4356,25 +4656,69 @@ class AIAgent: def _repair_tool_call(self, tool_name: str) -> str | None: """Attempt to repair a mismatched tool name before aborting. - 1. Try lowercase - 2. Try normalized (lowercase + hyphens/spaces -> underscores) - 3. Try fuzzy match (difflib, cutoff=0.7) + Models sometimes emit variants of a tool name that differ only + in casing, separators, or class-like suffixes. Normalize + aggressively before falling back to fuzzy match: + + 1. Lowercase direct match. + 2. Lowercase + hyphens/spaces -> underscores. + 3. CamelCase -> snake_case (TodoTool -> todo_tool). + 4. Strip trailing ``_tool`` / ``-tool`` / ``tool`` suffix that + Claude-style models sometimes tack on (TodoTool_tool -> + TodoTool -> Todo -> todo). Applied twice so double-tacked + suffixes like ``TodoTool_tool`` reduce all the way. + 5. Fuzzy match (difflib, cutoff=0.7). + + See #14784 for the original reports (TodoTool_tool, Patch_tool, + BrowserClick_tool were all returning "Unknown tool" before). Returns the repaired name if found in valid_tool_names, else None. """ + import re from difflib import get_close_matches - # 1. Lowercase + if not tool_name: + return None + + def _norm(s: str) -> str: + return s.lower().replace("-", "_").replace(" ", "_") + + def _camel_snake(s: str) -> str: + return re.sub(r"(? str | None: + lc = s.lower() + for suffix in ("_tool", "-tool", "tool"): + if lc.endswith(suffix): + return s[: -len(suffix)].rstrip("_-") + return None + + # Cheap fast-paths first — these cover the common case. lowered = tool_name.lower() if lowered in self.valid_tool_names: return lowered - - # 2. Normalize - normalized = lowered.replace("-", "_").replace(" ", "_") + normalized = _norm(tool_name) if normalized in self.valid_tool_names: return normalized - # 3. Fuzzy match + # Build the full candidate set for class-like emissions. + cands: set[str] = {tool_name, lowered, normalized, _camel_snake(tool_name)} + # Strip trailing tool-suffix up to twice — TodoTool_tool needs it. + for _ in range(2): + extra: set[str] = set() + for c in cands: + stripped = _strip_tool_suffix(c) + if stripped: + extra.add(stripped) + extra.add(_norm(stripped)) + extra.add(_camel_snake(stripped)) + cands |= extra + + for c in cands: + if c and c in self.valid_tool_names: + return c + + # Fuzzy match as last resort. matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7) if matches: return matches[0] @@ -4466,7 +4810,7 @@ class AIAgent: return False @staticmethod - def _build_keepalive_http_client() -> Any: + def _build_keepalive_http_client(base_url: str = "") -> Any: try: import httpx as _httpx import socket as _socket @@ -4480,8 +4824,9 @@ class AIAgent: _sock_opts.append((_socket.IPPROTO_TCP, _socket.TCP_KEEPALIVE, 30)) # When a custom transport is provided, httpx won't auto-read proxy # from env vars (allow_env_proxies = trust_env and transport is None). - # Explicitly read proxy settings to ensure HTTP_PROXY/HTTPS_PROXY work. - _proxy = _get_proxy_from_env() + # Explicitly read proxy settings while still honoring NO_PROXY for + # loopback / local endpoints such as a locally hosted sub2api. + _proxy = _get_proxy_for_base_url(base_url) return _httpx.Client( transport=_httpx.HTTPTransport(socket_options=_sock_opts), proxy=_proxy, @@ -4539,7 +4884,7 @@ class AIAgent: if k in {"api_key", "base_url", "default_headers", "timeout", "http_client"} } if "http_client" not in safe_kwargs: - keepalive_http = self._build_keepalive_http_client() + keepalive_http = self._build_keepalive_http_client(base_url) if keepalive_http is not None: safe_kwargs["http_client"] = keepalive_http client = GeminiNativeClient(**safe_kwargs) @@ -4568,7 +4913,7 @@ class AIAgent: # Tests in ``tests/run_agent/test_create_openai_client_reuse.py`` and # ``tests/run_agent/test_sequential_chats_live.py`` pin this invariant. if "http_client" not in client_kwargs: - keepalive_http = self._build_keepalive_http_client() + keepalive_http = self._build_keepalive_http_client(client_kwargs.get("base_url", "")) if keepalive_http is not None: client_kwargs["http_client"] = keepalive_http client = OpenAI(**client_kwargs) @@ -5044,6 +5389,41 @@ class AIAgent: return True + def _try_refresh_copilot_client_credentials(self) -> bool: + """Refresh Copilot credentials and rebuild the shared OpenAI client. + + Copilot tokens may remain the same string across refreshes (`gh auth token` + returns a stable OAuth token in many setups). We still rebuild the client + on 401 so retries recover from stale auth/client state without requiring + a session restart. + """ + if self.provider != "copilot": + return False + + try: + from hermes_cli.copilot_auth import resolve_copilot_token + + new_token, token_source = resolve_copilot_token() + except Exception as exc: + logger.debug("Copilot credential refresh failed: %s", exc) + return False + + if not isinstance(new_token, str) or not new_token.strip(): + return False + + new_token = new_token.strip() + + self.api_key = new_token + self._client_kwargs["api_key"] = self.api_key + self._client_kwargs["base_url"] = self.base_url + self._apply_client_headers_for_base_url(str(self.base_url or "")) + + if not self._replace_primary_openai_client(reason="copilot_credential_refresh"): + return False + + logger.info("Copilot credentials refreshed from %s", token_source) + return True + def _try_refresh_anthropic_client_credentials(self) -> bool: if self.api_mode != "anthropic_messages" or not hasattr(self, "_anthropic_api_key"): return False @@ -5097,6 +5477,8 @@ class AIAgent: self._client_kwargs["default_headers"] = dict(_OR_HEADERS) elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"): self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS) + elif base_url_host_matches(base_url, "api.routermint.com"): + self._client_kwargs["default_headers"] = _routermint_headers() elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers @@ -5175,7 +5557,7 @@ class AIAgent: effective_reason = FailoverReason.billing elif status_code == 429: effective_reason = FailoverReason.rate_limit - elif status_code == 401: + elif status_code in (401, 403): effective_reason = FailoverReason.auth if effective_reason == FailoverReason.billing: @@ -5232,6 +5614,26 @@ class AIAgent: self._try_refresh_anthropic_client_credentials() return self._anthropic_client.messages.create(**api_kwargs) + def _rebuild_anthropic_client(self) -> None: + """Rebuild the Anthropic client after an interrupt or stale call. + + Handles both direct Anthropic and Bedrock-hosted Anthropic models + correctly — rebuilding with the Bedrock SDK when provider is bedrock, + rather than always falling back to build_anthropic_client() which + requires a direct Anthropic API key. + """ + if getattr(self, "provider", None) == "bedrock": + from agent.anthropic_adapter import build_anthropic_bedrock_client + region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1" + self._anthropic_client = build_anthropic_bedrock_client(region) + else: + from agent.anthropic_adapter import build_anthropic_client + self._anthropic_client = build_anthropic_client( + self._anthropic_api_key, + getattr(self, "_anthropic_base_url", None), + timeout=get_provider_request_timeout(self.provider, self.model), + ) + def _interruptible_api_call(self, api_kwargs: dict): """ Run the API call in a background thread so the main conversation loop @@ -5267,12 +5669,21 @@ class AIAgent: # bedrock responses like chat_completions responses. from agent.bedrock_adapter import ( _get_bedrock_runtime_client, + invalidate_runtime_client, + is_stale_connection_error, normalize_converse_response, ) region = api_kwargs.pop("__bedrock_region__", "us-east-1") api_kwargs.pop("__bedrock_converse__", None) client = _get_bedrock_runtime_client(region) - raw_response = client.converse(**api_kwargs) + try: + raw_response = client.converse(**api_kwargs) + except Exception as _bedrock_exc: + # Evict the cached client on stale-connection failures + # so the outer retry loop builds a fresh client/pool. + if is_stale_connection_error(_bedrock_exc): + invalidate_runtime_client(region) + raise result["response"] = normalize_converse_response(raw_response) else: request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request") @@ -5330,14 +5741,8 @@ class AIAgent: ) try: if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_client - self._anthropic_client.close() - self._anthropic_client = build_anthropic_client( - self._anthropic_api_key, - getattr(self, "_anthropic_base_url", None), - timeout=get_provider_request_timeout(self.provider, self.model), - ) + self._rebuild_anthropic_client() else: rc = request_client_holder.get("client") if rc is not None: @@ -5362,14 +5767,8 @@ class AIAgent: # seed future retries. try: if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_client - self._anthropic_client.close() - self._anthropic_client = build_anthropic_client( - self._anthropic_api_key, - getattr(self, "_anthropic_base_url", None), - timeout=get_provider_request_timeout(self.provider, self.model), - ) + self._rebuild_anthropic_client() else: request_client = request_client_holder.get("client") if request_client is not None: @@ -5525,12 +5924,21 @@ class AIAgent: try: from agent.bedrock_adapter import ( _get_bedrock_runtime_client, + invalidate_runtime_client, + is_stale_connection_error, stream_converse_with_callbacks, ) region = api_kwargs.pop("__bedrock_region__", "us-east-1") api_kwargs.pop("__bedrock_converse__", None) client = _get_bedrock_runtime_client(region) - raw_response = client.converse_stream(**api_kwargs) + try: + raw_response = client.converse_stream(**api_kwargs) + except Exception as _bedrock_exc: + # Evict the cached client on stale-connection failures + # so the outer retry loop builds a fresh client/pool. + if is_stale_connection_error(_bedrock_exc): + invalidate_runtime_client(region) + raise def _on_text(text): _fire_first() @@ -5782,11 +6190,25 @@ class AIAgent: for idx in sorted(tool_calls_acc): tc = tool_calls_acc[idx] arguments = tc["function"]["arguments"] + tool_name = tc["function"]["name"] or "?" if arguments and arguments.strip(): try: json.loads(arguments) except json.JSONDecodeError: - has_truncated_tool_args = True + # Attempt repair before flagging as truncated. + # Models like GLM-5.1 via Ollama produce trailing + # commas, unclosed brackets, Python None, etc. + # Without repair, these hit the truncation handler + # and kill the session. _repair_tool_call_arguments + # returns "{}" for unrepairable args, which is far + # better than a crashed session. + repaired = _repair_tool_call_arguments(arguments, tool_name) + if repaired != "{}": + # Successfully repaired — use the fixed args + arguments = repaired + else: + # Unrepairable — flag for truncation handling + has_truncated_tool_args = True mock_tool_calls.append(SimpleNamespace( id=tc["id"], type=tc["type"], @@ -6210,14 +6632,8 @@ class AIAgent: if self._interrupt_requested: try: if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_client - self._anthropic_client.close() - self._anthropic_client = build_anthropic_client( - self._anthropic_api_key, - getattr(self, "_anthropic_base_url", None), - timeout=get_provider_request_timeout(self.provider, self.model), - ) + self._rebuild_anthropic_client() else: request_client = request_client_holder.get("client") if request_client is not None: @@ -6294,7 +6710,7 @@ class AIAgent: # ── Provider fallback ────────────────────────────────────────────────── - def _try_activate_fallback(self) -> bool: + def _try_activate_fallback(self, reason: "FailoverReason | None" = None) -> bool: """Switch to the next fallback model/provider in the chain. Called when the current model is failing after retries. Swaps the @@ -6306,6 +6722,15 @@ class AIAgent: auth resolution and client construction — no duplicated provider→key mappings. """ + if reason in (FailoverReason.rate_limit, FailoverReason.billing): + # Only start cooldown when leaving the primary provider. If we're + # already on a fallback and chain-switching, the primary wasn't the + # source of the 429 so the cooldown should not be reset/extended. + fallback_already_active = bool(getattr(self, "_fallback_activated", False)) + current_provider = (getattr(self, "provider", "") or "").strip().lower() + primary_provider = ((self._primary_runtime or {}).get("provider") or "").strip().lower() + if (not fallback_already_active) or (primary_provider and current_provider == primary_provider): + self._rate_limited_until = time.monotonic() + 60 if self._fallback_index >= len(self._fallback_chain): return False @@ -6442,11 +6867,15 @@ class AIAgent: # Without this, compression decisions use the primary model's # context window (e.g. 200K) instead of the fallback's (e.g. 32K), # causing oversized sessions to overflow the fallback. + # Also pass _config_context_length so the explicit config override + # (model.context_length in config.yaml) is respected — without this, + # the fallback activation drops to 128K even when config says 204800. if hasattr(self, 'context_compressor') and self.context_compressor: from agent.model_metadata import get_model_context_length fb_context_length = get_model_context_length( self.model, base_url=self.base_url, api_key=self.api_key, provider=self.provider, + config_context_length=getattr(self, "_config_context_length", None), ) self.context_compressor.update_model( model=self.model, @@ -6485,6 +6914,9 @@ class AIAgent: if not self._fallback_activated: return False + if getattr(self, "_rate_limited_until", 0) > time.monotonic(): + return False # primary still in rate-limit cooldown, stay on fallback + rt = self._primary_runtime try: # ── Core runtime state ── @@ -7193,6 +7625,12 @@ class AIAgent: raw_reasoning_content = getattr(assistant_message, "reasoning_content", None) if raw_reasoning_content is not None: msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content) + elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning(): + # DeepSeek thinking mode requires reasoning_content on every + # assistant tool-call message. Without it, replaying the + # persisted message causes HTTP 400. Include empty string + # as a defensive compatibility fallback (refs #15250). + msg["reasoning_content"] = "" if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: # Pass reasoning_details back unmodified so providers (OpenRouter, @@ -7268,6 +7706,35 @@ class AIAgent: return msg + def _needs_kimi_tool_reasoning(self) -> bool: + """Return True when the current provider is Kimi / Moonshot thinking mode. + + Kimi ``/coding`` and Moonshot thinking mode both require + ``reasoning_content`` on every assistant tool-call message; omitting + it causes the next replay to fail with HTTP 400. + """ + return ( + self.provider in {"kimi-coding", "kimi-coding-cn"} + or base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + + def _needs_deepseek_tool_reasoning(self) -> bool: + """Return True when the current provider is DeepSeek thinking mode. + + DeepSeek V4 thinking mode requires ``reasoning_content`` on every + assistant tool-call turn; omitting it causes HTTP 400 when the + message is replayed in a subsequent API request (#15250). + """ + provider = (self.provider or "").lower() + model = (self.model or "").lower() + return ( + provider == "deepseek" + or "deepseek" in model + or base_url_host_matches(self.base_url, "api.deepseek.com") + ) + def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None: """Copy provider-facing reasoning fields onto an API replay message.""" if source_msg.get("role") != "assistant": @@ -7283,13 +7750,14 @@ class AIAgent: api_msg["reasoning_content"] = normalized_reasoning return - kimi_requires_reasoning = ( - self.provider in {"kimi-coding", "kimi-coding-cn"} - or base_url_host_matches(self.base_url, "api.kimi.com") - or base_url_host_matches(self.base_url, "moonshot.ai") - or base_url_host_matches(self.base_url, "moonshot.cn") - ) - if kimi_requires_reasoning and source_msg.get("tool_calls"): + # Providers that require an echoed reasoning_content on every + # assistant tool-call turn. Detection logic lives in the per-provider + # helpers so both the creation path (_build_assistant_message) and + # this replay path stay in sync. + if source_msg.get("tool_calls") and ( + self._needs_kimi_tool_reasoning() + or self._needs_deepseek_tool_reasoning() + ): api_msg["reasoning_content"] = "" @staticmethod @@ -7320,6 +7788,115 @@ class AIAgent: ] return api_msg + @staticmethod + def _sanitize_tool_call_arguments( + messages: list, + *, + logger=None, + session_id: str = None, + ) -> int: + """Repair corrupted assistant tool-call argument JSON in-place.""" + log = logger or logging.getLogger(__name__) + if not isinstance(messages, list): + return 0 + + repaired = 0 + marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER + + def _prepend_marker(tool_msg: dict) -> None: + existing = tool_msg.get("content") + if isinstance(existing, str): + if not existing: + tool_msg["content"] = marker + elif not existing.startswith(marker): + tool_msg["content"] = f"{marker}\n{existing}" + return + if existing is None: + tool_msg["content"] = marker + return + try: + existing_text = json.dumps(existing) + except TypeError: + existing_text = str(existing) + tool_msg["content"] = f"{marker}\n{existing_text}" + + message_index = 0 + while message_index < len(messages): + msg = messages[message_index] + if not isinstance(msg, dict) or msg.get("role") != "assistant": + message_index += 1 + continue + + tool_calls = msg.get("tool_calls") + if not isinstance(tool_calls, list) or not tool_calls: + message_index += 1 + continue + + insert_at = message_index + 1 + for tool_call in tool_calls: + if not isinstance(tool_call, dict): + continue + function = tool_call.get("function") + if not isinstance(function, dict): + continue + + arguments = function.get("arguments") + if arguments is None or arguments == "": + function["arguments"] = "{}" + continue + if isinstance(arguments, str) and not arguments.strip(): + function["arguments"] = "{}" + continue + if not isinstance(arguments, str): + continue + + try: + json.loads(arguments) + except json.JSONDecodeError: + tool_call_id = tool_call.get("id") + function_name = function.get("name", "?") + preview = arguments[:80] + log.warning( + "Corrupted tool_call arguments repaired before request " + "(session=%s, message_index=%s, tool_call_id=%s, function=%s, preview=%r)", + session_id or "-", + message_index, + tool_call_id or "-", + function_name, + preview, + ) + function["arguments"] = "{}" + + existing_tool_msg = None + scan_index = message_index + 1 + while scan_index < len(messages): + candidate = messages[scan_index] + if not isinstance(candidate, dict) or candidate.get("role") != "tool": + break + if candidate.get("tool_call_id") == tool_call_id: + existing_tool_msg = candidate + break + scan_index += 1 + + if existing_tool_msg is None: + messages.insert( + insert_at, + { + "role": "tool", + "tool_call_id": tool_call_id, + "content": marker, + }, + ) + insert_at += 1 + else: + _prepend_marker(existing_tool_msg) + + repaired += 1 + + message_index += 1 + + return repaired + def _should_sanitize_tool_calls(self) -> bool: """Determine if tool_calls need sanitization for strict APIs. @@ -7417,6 +7994,7 @@ class AIAgent: _flush_temperature = _fixed_temp else: _flush_temperature = 0.3 + aux_error = None try: response = _call_llm( task="flush_memories", @@ -7426,14 +8004,19 @@ class AIAgent: max_tokens=5120, # timeout resolved from auxiliary.flush_memories.timeout config ) - except RuntimeError: + except Exception as e: + aux_error = e _aux_available = False response = None if not _aux_available and self.api_mode == "codex_responses": # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) - codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def]) + _ct_flush = self._get_transport() + if _ct_flush is not None: + codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def]) + elif not codex_kwargs.get("tools"): + codex_kwargs["tools"] = [memory_tool_def] if _flush_temperature is not None: codex_kwargs["temperature"] = _flush_temperature else: @@ -7465,11 +8048,37 @@ class AIAgent: **api_kwargs, timeout=_get_task_timeout("flush_memories") ) + if aux_error is not None: + logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error) + self._emit_auxiliary_failure("memory flush", aux_error) + + def _openai_tool_calls(resp): + if resp is not None and hasattr(resp, "choices") and resp.choices: + msg = getattr(resp.choices[0], "message", None) + calls = getattr(msg, "tool_calls", None) + if calls: + return calls + return [] + + def _codex_output_tool_calls(resp): + calls = [] + for item in getattr(resp, "output", []) or []: + if getattr(item, "type", None) == "function_call": + calls.append(SimpleNamespace( + id=getattr(item, "call_id", None), + type="function", + function=SimpleNamespace( + name=getattr(item, "name", ""), + arguments=getattr(item, "arguments", "{}"), + ), + )) + return calls + # Extract tool calls from the response, handling all API formats tool_calls = [] if self.api_mode == "codex_responses" and not _aux_available: _ct_flush = self._get_transport() - _cnr_flush = _ct_flush.normalize_response(response) + _cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None if _cnr_flush and _cnr_flush.tool_calls: tool_calls = [ SimpleNamespace( @@ -7477,6 +8086,8 @@ class AIAgent: function=SimpleNamespace(name=tc.name, arguments=tc.arguments), ) for tc in _cnr_flush.tool_calls ] + else: + tool_calls = _codex_output_tool_calls(response) elif self.api_mode == "anthropic_messages" and not _aux_available: _tfn = self._get_transport() _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) @@ -7489,15 +8100,16 @@ class AIAgent: ] elif self.api_mode in ("chat_completions", "bedrock_converse"): # chat_completions / bedrock — normalize through transport - _flush_result = self._get_transport().normalize_response(response) - if _flush_result.tool_calls: + _tfn = self._get_transport() + _flush_result = _tfn.normalize_response(response) if _tfn is not None else None + if _flush_result and _flush_result.tool_calls: tool_calls = _flush_result.tool_calls + else: + tool_calls = _openai_tool_calls(response) elif _aux_available and hasattr(response, "choices") and response.choices: # Auxiliary client returned OpenAI-shaped response while main # api_mode is codex/anthropic — extract tool_calls from .choices - _aux_msg = response.choices[0].message - if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls: - tool_calls = _aux_msg.tool_calls + tool_calls = _openai_tool_calls(response) for tc in tool_calls: if tc.function.name == "memory": @@ -7512,12 +8124,27 @@ class AIAgent: old_text=args.get("old_text"), store=self._memory_store, ) + if self._memory_manager and args.get("action") in ("add", "replace"): + try: + self._memory_manager.on_memory_write( + args.get("action", ""), + flush_target, + args.get("content", ""), + metadata=self._build_memory_write_metadata( + write_origin="memory_flush", + execution_context="flush_memories", + ), + ) + except Exception: + pass if not self.quiet_mode: print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") except Exception as e: - logger.debug("Memory flush tool call failed: %s", e) + logger.warning("Memory flush tool call failed: %s", e) + self._emit_auxiliary_failure("memory flush tool", e) except Exception as e: - logger.debug("Memory flush API call failed: %s", e) + logger.warning("Memory flush API call failed: %s", e) + self._emit_auxiliary_failure("memory flush", e) finally: # Strip flush artifacts: remove everything from the flush message onward. # Use sentinel marker instead of identity check for robustness. @@ -7556,7 +8183,21 @@ class AIAgent: except Exception: pass - compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) + try: + compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens, focus_topic=focus_topic) + except TypeError: + # Plugin context engine with strict signature that doesn't accept + # focus_topic — fall back to calling without it. + compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens) + + summary_error = getattr(self.context_compressor, "_last_summary_error", None) + if summary_error: + if getattr(self, "_last_compression_summary_warning", None) != summary_error: + self._last_compression_summary_warning = summary_error + self._emit_warning( + f"⚠ Compression summary failed: {summary_error}. " + "Inserted a fallback context marker." + ) todo_snapshot = self._todo_store.format_for_injection() if todo_snapshot: @@ -7727,6 +8368,10 @@ class AIAgent: function_args.get("action", ""), target, function_args.get("content", ""), + metadata=self._build_memory_write_metadata( + task_id=effective_task_id, + tool_call_id=tool_call_id, + ), ) except Exception: pass @@ -8238,6 +8883,10 @@ class AIAgent: function_args.get("action", ""), target, function_args.get("content", ""), + metadata=self._build_memory_write_metadata( + task_id=effective_task_id, + tool_call_id=getattr(tool_call, "id", None), + ), ) except Exception: pass @@ -8482,6 +9131,7 @@ class AIAgent: api_messages = [] for msg in messages: api_msg = msg.copy() + self._copy_reasoning_content_for_api(msg, api_msg) for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"): api_msg.pop(internal_field, None) if _needs_sanitize: @@ -9112,6 +9762,19 @@ class AIAgent: # Note: Reasoning is embedded in content via tags for trajectory storage. # However, providers like Moonshot AI require a separate 'reasoning_content' field # on assistant messages with tool_calls. We handle both cases here. + request_logger = getattr(self, "logger", None) or logging.getLogger(__name__) + repaired_tool_calls = self._sanitize_tool_call_arguments( + messages, + logger=request_logger, + session_id=self.session_id, + ) + if repaired_tool_calls > 0: + request_logger.info( + "Sanitized %s corrupted tool_call arguments before request (session=%s)", + repaired_tool_calls, + self.session_id or "-", + ) + api_messages = [] for idx, msg in enumerate(messages): api_msg = msg.copy() @@ -9277,6 +9940,7 @@ class AIAgent: codex_auth_retry_attempted=False anthropic_auth_retry_attempted=False nous_auth_retry_attempted=False + copilot_auth_retry_attempted=False thinking_sig_retry_attempted = False has_retried_429 = False restart_with_compressed_messages = False @@ -9434,28 +10098,47 @@ class AIAgent: response_invalid = True error_details.append("response is None") else: - # output_text fallback: stream backfill may have failed - # but normalize can still recover from output_text - _out_text = getattr(response, "output_text", None) - _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" - if _out_text_stripped: - logger.debug( - "Codex response.output is empty but output_text is present " - "(%d chars); deferring to normalization.", - len(_out_text_stripped), + # Provider returned a terminal failure (e.g. quota exhaustion). + # Treat as invalid so the fallback chain is triggered instead of + # letting the error bubble up outside the retry/fallback loop. + _codex_resp_status = str(getattr(response, "status", "") or "").strip().lower() + if _codex_resp_status in {"failed", "cancelled"}: + _codex_error_obj = getattr(response, "error", None) + _codex_error_msg = ( + _codex_error_obj.get("message") if isinstance(_codex_error_obj, dict) + else str(_codex_error_obj) if _codex_error_obj + else f"Responses API returned status '{_codex_resp_status}'" ) - else: - _resp_status = getattr(response, "status", None) - _resp_incomplete = getattr(response, "incomplete_details", None) - logger.warning( - "Codex response.output is empty after stream backfill " - "(status=%s, incomplete_details=%s, model=%s). %s", - _resp_status, _resp_incomplete, - getattr(response, "model", None), - f"api_mode={self.api_mode} provider={self.provider}", + logging.warning( + "Codex response status='%s' (error=%s). Routing to fallback. %s", + _codex_resp_status, _codex_error_msg, + self._client_log_context(), ) response_invalid = True - error_details.append("response.output is empty") + error_details.append(f"response.status={_codex_resp_status}: {_codex_error_msg}") + else: + # output_text fallback: stream backfill may have failed + # but normalize can still recover from output_text + _out_text = getattr(response, "output_text", None) + _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" + if _out_text_stripped: + logger.debug( + "Codex response.output is empty but output_text is present " + "(%d chars); deferring to normalization.", + len(_out_text_stripped), + ) + else: + _resp_status = getattr(response, "status", None) + _resp_incomplete = getattr(response, "incomplete_details", None) + logger.warning( + "Codex response.output is empty after stream backfill " + "(status=%s, incomplete_details=%s, model=%s). %s", + _resp_status, _resp_incomplete, + getattr(response, "model", None), + f"api_mode={self.api_mode} provider={self.provider}", + ) + response_invalid = True + error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": _tv = self._get_transport() if not _tv.validate_response(response): @@ -10221,6 +10904,15 @@ class AIAgent: print(f"{self.log_prefix} • Check credits / billing: https://portal.nousresearch.com") print(f"{self.log_prefix} • Verify stored credentials: {_dhh}/auth.json") print(f"{self.log_prefix} • Switch providers temporarily: /model --provider openrouter") + if ( + self.provider == "copilot" + and status_code == 401 + and not copilot_auth_retry_attempted + ): + copilot_auth_retry_attempted = True + if self._try_refresh_copilot_client_credentials(): + self._vprint(f"{self.log_prefix}🔐 Copilot credentials refreshed after 401. Retrying request...") + continue if ( self.api_mode == "anthropic_messages" and status_code == 401 @@ -10420,14 +11112,14 @@ class AIAgent: ) if is_rate_limited and self._fallback_index < len(self._fallback_chain): # Don't eagerly fallback if credential pool rotation may - # still recover. The pool's retry-then-rotate cycle needs - # at least one more attempt to fire — jumping to a fallback - # provider here short-circuits it. - pool = self._credential_pool - pool_may_recover = pool is not None and pool.has_available() + # still recover. See _pool_may_recover_from_rate_limit + # for the single-credential-pool exception. Fixes #11314. + pool_may_recover = _pool_may_recover_from_rate_limit( + self._credential_pool + ) if not pool_may_recover: self._emit_status("⚠️ Rate limited — switching to fallback provider...") - if self._try_activate_fallback(): + if self._try_activate_fallback(reason=classified.reason): retry_count = 0 compression_attempts = 0 primary_recovery_attempted = False @@ -10680,9 +11372,26 @@ class AIAgent: # already accounts for 413, 429, 529 (transient), context # overflow, and generic-400 heuristics. Local validation # errors (ValueError, TypeError) are programming bugs. + # Exclude UnicodeEncodeError — it's a ValueError subclass + # but is handled separately by the surrogate sanitization + # path above. Exclude json.JSONDecodeError — also a + # ValueError subclass, but it indicates a transient + # provider/network failure (malformed response body, + # truncated stream, routing layer corruption), not a + # local programming bug, and should be retried (#14782). is_local_validation_error = ( isinstance(api_error, (ValueError, TypeError)) - and not isinstance(api_error, UnicodeEncodeError) + and not isinstance( + api_error, (UnicodeEncodeError, json.JSONDecodeError) + ) + # ssl.SSLError (and its subclass SSLCertVerificationError) + # inherits from OSError *and* ValueError via Python MRO, + # so the isinstance(ValueError) check above would + # misclassify a TLS transport failure as a local + # programming bug and abort without retrying. Exclude + # ssl.SSLError explicitly so the error classifier's + # retryable=True mapping takes effect instead. + and not isinstance(api_error, ssl.SSLError) ) is_client_error = ( is_local_validation_error @@ -11895,14 +12604,11 @@ class AIAgent: self._iters_since_skill = 0 # External memory provider: sync the completed turn + queue next prefetch. - # Use original_user_message (clean input) — user_message may contain - # injected skill content that bloats / breaks provider queries. - if self._memory_manager and final_response and original_user_message: - try: - self._memory_manager.sync_all(original_user_message, final_response) - self._memory_manager.queue_prefetch_all(original_user_message) - except Exception: - pass + self._sync_external_memory_for_turn( + original_user_message=original_user_message, + final_response=final_response, + interrupted=interrupted, + ) # Background memory/skill review — runs AFTER the response is delivered # so it never competes with the user's task for model attention. diff --git a/scripts/release.py b/scripts/release.py index 5a38adc4f..c77f8581d 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -44,9 +44,13 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", "343873859@qq.com": "DrStrangerUJN", + "uzmpsk.dilekakbas@gmail.com": "dlkakbs", "jefferson@heimdallstrategy.com": "Mind-Dragon", "130918800+devorun@users.noreply.github.com": "devorun", "maks.mir@yahoo.com": "say8hi", + "web3blind@users.noreply.github.com": "web3blind", + "julia@alexland.us": "alexg0bot", + "1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl", # contributors (from noreply pattern) "david.vv@icloud.com": "davidvv", "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", @@ -58,13 +62,19 @@ AUTHOR_MAP = { "keifergu@tencent.com": "keifergu", "kshitijk4poor@users.noreply.github.com": "kshitijk4poor", "abner.the.foreman@agentmail.to": "Abnertheforeman", + "thomasgeorgevii09@gmail.com": "tochukwuada", "harryykyle1@gmail.com": "hharry11", "kshitijk4poor@gmail.com": "kshitijk4poor", + "keira.voss94@gmail.com": "keiravoss94", "16443023+stablegenius49@users.noreply.github.com": "stablegenius49", + "simbamax99@gmail.com": "simbam99", "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk", + "cyprian@ironin.pl": "iRonin", "valdi.jorge@gmail.com": "jvcl", + "q19dcp@gmail.com": "aj-nt", + "ebukau84@gmail.com": "UgwujaGeorge", "francip@gmail.com": "francip", "omni@comelse.com": "omnissiah-comelse", "oussama.redcode@gmail.com": "mavrickdeveloper", @@ -77,6 +87,7 @@ AUTHOR_MAP = { "77628552+raulvidis@users.noreply.github.com": "raulvidis", "145567217+Aum08Desai@users.noreply.github.com": "Aum08Desai", "256820943+kshitij-eliza@users.noreply.github.com": "kshitij-eliza", + "jiechengwu@pony.ai": "Jason2031", "44278268+shitcoinsherpa@users.noreply.github.com": "shitcoinsherpa", "104278804+Sertug17@users.noreply.github.com": "Sertug17", "112503481+caentzminger@users.noreply.github.com": "caentzminger", @@ -103,6 +114,7 @@ AUTHOR_MAP = { "30841158+n-WN@users.noreply.github.com": "n-WN", "tsuijinglei@gmail.com": "hiddenpuppy", "jerome@clawwork.ai": "HiddenPuppy", + "jerome.benoit@sap.com": "jerome-benoit", "wysie@users.noreply.github.com": "Wysie", "leoyuan0099@gmail.com": "keyuyuan", "bxzt2006@163.com": "Only-Code-A", @@ -167,6 +179,39 @@ AUTHOR_MAP = { "socrates1024@gmail.com": "socrates1024", "seanalt555@gmail.com": "Salt-555", "satelerd@gmail.com": "satelerd", + "dan@danlynn.com": "danklynn", + "mattmaximo@hotmail.com": "MattMaximo", + "149063006+j3ffffff@users.noreply.github.com": "j3ffffff", + "A-FdL-Prog@users.noreply.github.com": "A-FdL-Prog", + "l0hde@users.noreply.github.com": "l0hde", + "difujia@users.noreply.github.com": "difujia", + "vominh1919@gmail.com": "vominh1919", + "yue.gu2023@gmail.com": "YueLich", + "51783311+andyylin@users.noreply.github.com": "andyylin", + "me@jakubkrcmar.cz": "jakubkrcmar", + "prasadus92@gmail.com": "prasadus92", + "michael@make.software": "mssteuer", + "der@konsi.org": "konsisumer", + "abogale2@gmail.com": "amanuel2", + "alexazzjjtt@163.com": "alexzhu0", + "pub_forgreatagent@antgroup.com": "AntAISecurityLab", + "252620095+briandevans@users.noreply.github.com": "briandevans", + "danielrpike9@gmail.com": "Bartok9", + "skozyuk@cruxexperts.com": "CruxExperts", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "mgparkprint@gmail.com": "vlwkaos", + "tranquil_flow@protonmail.com": "Tranquil-Flow", + "wangshengyang2004@163.com": "Wangshengyang2004", + "hasan.ali13381@gmail.com": "H-Ali13381", + "xienb@proton.me": "XieNBi", + "139681654+maymuneth@users.noreply.github.com": "maymuneth", + "zengwei@nightq.cn": "nightq", + "1434494126@qq.com": "5park1e", + "158153005+5park1e@users.noreply.github.com": "5park1e", + "innocarpe@gmail.com": "innocarpe", + "noreply@ked.com": "qike-ms", + "andrekurait@gmail.com": "AndreKurait", + "bsgdigital@users.noreply.github.com": "bsgdigital", "numman.ali@gmail.com": "nummanali", "rohithsaimidigudla@gmail.com": "whitehatjr1001", "0xNyk@users.noreply.github.com": "0xNyk", @@ -185,6 +230,11 @@ AUTHOR_MAP = { "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", "hermes@nousresearch.com": "NousResearch", + "reginaldasr@gmail.com": "ReginaldasR", + "ntconguit@gmail.com": "0xharryriddle", + "agent@wildcat.local": "ericnicolaides", + "georgex8001@gmail.com": "georgex8001", + "stefan@dimagents.ai": "dimitrovi", "hermes@noushq.ai": "benbarclay", "chinmingcock@gmail.com": "ChimingLiu", "openclaw@sparklab.ai": "openclaw", @@ -333,6 +383,9 @@ AUTHOR_MAP = { "brian@bde.io": "briandevans", "hubin_ll@qq.com": "LLQWQ", "memosr_email@gmail.com": "memosr", + "jperlow@gmail.com": "perlowja", + "tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc", + "harryplusplus@gmail.com": "harryplusplus", "anthhub@163.com": "anthhub", "shenuu@gmail.com": "shenuu", "xiayh17@gmail.com": "xiayh0107", @@ -436,6 +489,12 @@ AUTHOR_MAP = { "topcheer@me.com": "topcheer", "walli@tencent.com": "walli", "zhuofengwang@tencent.com": "Zhuofeng-Wang", + # April 2026 salvage-PR batch (#14920, #14986, #14966) + "mrunmayeerane17@gmail.com": "mrunmayee17", + "69489633+camaragon@users.noreply.github.com": "camaragon", + "shamork@outlook.com": "shamork", + # April 2026 Discord Copilot /model salvage (#15030) + "cshong2017@outlook.com": "Nicecsh", # no-github-match — keep as display names "clio-agent@sisyphuslabs.ai": "Sisyphus", "marco@rutimka.de": "Marco Rutsch", @@ -443,6 +502,7 @@ AUTHOR_MAP = { "zhangxicen@example.com": "zhangxicen", "codex@openai.invalid": "teknium1", "screenmachine@gmail.com": "teknium1", + "chenzeshi@live.com": "chen1749144759", } diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index d19471c80..4ed03a904 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -248,7 +248,6 @@ Type these during an interactive chat session. ``` /config Show config (CLI) /model [name] Show or change model -/provider Show provider info /personality [name] Set personality /reasoning [level] Set reasoning (none|minimal|low|medium|high|xhigh|show|hide) /verbose Cycle: off → new → all → verbose diff --git a/skills/creative/design-md/SKILL.md b/skills/creative/design-md/SKILL.md new file mode 100644 index 000000000..36c4138db --- /dev/null +++ b/skills/creative/design-md/SKILL.md @@ -0,0 +1,196 @@ +--- +name: design-md +description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast. +version: 1.0.0 +author: Hermes Agent +license: MIT +metadata: + hermes: + tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google] + related_skills: [popular-web-designs, excalidraw, architecture-diagram] +--- + +# DESIGN.md Skill + +DESIGN.md is Google's open spec (Apache-2.0, `google-labs-code/design.md`) for +describing a visual identity to coding agents. One file combines: + +- **YAML front matter** — machine-readable design tokens (normative values) +- **Markdown body** — human-readable rationale, organized into canonical sections + +Tokens give exact values. Prose tells agents *why* those values exist and how to +apply them. The CLI (`npx @google/design.md`) lints structure + WCAG contrast, +diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON. + +## When to use this skill + +- User asks for a DESIGN.md file, design tokens, or a design system spec +- User wants consistent UI/brand across multiple projects or tools +- User pastes an existing DESIGN.md and asks to lint, diff, export, or extend it +- User asks to port a style guide into a format agents can consume +- User wants contrast / WCAG accessibility validation on their color palette + +For purely visual inspiration or layout examples, use `popular-web-designs` +instead. This skill is for the *formal spec file* itself. + +## File anatomy + +```md +--- +version: alpha +name: Heritage +description: Architectural minimalism meets journalistic gravitas. +colors: + primary: "#1A1C1E" + secondary: "#6C7278" + tertiary: "#B8422E" + neutral: "#F7F5F2" +typography: + h1: + fontFamily: Public Sans + fontSize: 3rem + fontWeight: 700 + lineHeight: 1.1 + letterSpacing: "-0.02em" + body-md: + fontFamily: Public Sans + fontSize: 1rem +rounded: + sm: 4px + md: 8px + lg: 16px +spacing: + sm: 8px + md: 16px + lg: 24px +components: + button-primary: + backgroundColor: "{colors.tertiary}" + textColor: "#FFFFFF" + rounded: "{rounded.sm}" + padding: 12px + button-primary-hover: + backgroundColor: "{colors.primary}" +--- + +## Overview + +Architectural Minimalism meets Journalistic Gravitas... + +## Colors + +- **Primary (#1A1C1E):** Deep ink for headlines and core text. +- **Tertiary (#B8422E):** "Boston Clay" — the sole driver for interaction. + +## Typography + +Public Sans for everything except small all-caps labels... + +## Components + +`button-primary` is the only high-emphasis action on a page... +``` + +## Token types + +| Type | Format | Example | +|------|--------|---------| +| Color | `#` + hex (sRGB) | `"#1A1C1E"` | +| Dimension | number + unit (`px`, `em`, `rem`) | `48px`, `-0.02em` | +| Token reference | `{path.to.token}` | `{colors.primary}` | +| Typography | object with `fontFamily`, `fontSize`, `fontWeight`, `lineHeight`, `letterSpacing`, `fontFeature`, `fontVariation` | see above | + +Component property whitelist: `backgroundColor`, `textColor`, `typography`, +`rounded`, `padding`, `size`, `height`, `width`. Variants (hover, active, +pressed) are **separate component entries** with related key names +(`button-primary-hover`), not nested. + +## Canonical section order + +Sections are optional, but present ones MUST appear in this order. Duplicate +headings reject the file. + +1. Overview (alias: Brand & Style) +2. Colors +3. Typography +4. Layout (alias: Layout & Spacing) +5. Elevation & Depth (alias: Elevation) +6. Shapes +7. Components +8. Do's and Don'ts + +Unknown sections are preserved, not errored. Unknown token names are accepted +if the value type is valid. Unknown component properties produce a warning. + +## Workflow: authoring a new DESIGN.md + +1. **Ask the user** (or infer) the brand tone, accent color, and typography + direction. If they provided a site, image, or vibe, translate it to the + token shape above. +2. **Write `DESIGN.md`** in their project root using `write_file`. Always + include `name:` and `colors:`; other sections optional but encouraged. +3. **Use token references** (`{colors.primary}`) in the `components:` section + instead of re-typing hex values. Keeps the palette single-source. +4. **Lint it** (see below). Fix any broken references or WCAG failures + before returning. +5. **If the user has an existing project**, also write Tailwind or DTCG + exports next to the file (`tailwind.theme.json`, `tokens.json`). + +## Workflow: lint / diff / export + +The CLI is `@google/design.md` (Node). Use `npx` — no global install needed. + +```bash +# Validate structure + token references + WCAG contrast +npx -y @google/design.md lint DESIGN.md + +# Compare two versions, fail on regression (exit 1 = regression) +npx -y @google/design.md diff DESIGN.md DESIGN-v2.md + +# Export to Tailwind theme JSON +npx -y @google/design.md export --format tailwind DESIGN.md > tailwind.theme.json + +# Export to W3C DTCG (Design Tokens Format Module) JSON +npx -y @google/design.md export --format dtcg DESIGN.md > tokens.json + +# Print the spec itself — useful when injecting into an agent prompt +npx -y @google/design.md spec --rules-only --format json +``` + +All commands accept `-` for stdin. `lint` returns exit 1 on errors. Use the +`--format json` flag and parse the output if you need to report findings +structurally. + +### Lint rule reference (what the 7 rules catch) + +- `broken-ref` (error) — `{colors.missing}` points at a non-existent token +- `duplicate-section` (error) — same `## Heading` appears twice +- `invalid-color`, `invalid-dimension`, `invalid-typography` (error) +- `wcag-contrast` (warning/info) — component `textColor` vs `backgroundColor` + ratio against WCAG AA (4.5:1) and AAA (7:1) +- `unknown-component-property` (warning) — outside the whitelist above + +When the user cares about accessibility, call this out explicitly in your +summary — WCAG findings are the most load-bearing reason to use the CLI. + +## Pitfalls + +- **Don't nest component variants.** `button-primary.hover` is wrong; + `button-primary-hover` as a sibling key is right. +- **Hex colors must be quoted strings.** YAML will otherwise choke on `#` or + truncate values like `#1A1C1E` oddly. +- **Negative dimensions need quotes too.** `letterSpacing: -0.02em` parses as + a YAML flow — write `letterSpacing: "-0.02em"`. +- **Section order is enforced.** If the user gives you prose in a random order, + reorder it to match the canonical list before saving. +- **`version: alpha` is the current spec version** (as of Apr 2026). The spec + is marked alpha — watch for breaking changes. +- **Token references resolve by dotted path.** `{colors.primary}` works; + `{primary}` does not. + +## Spec source of truth + +- Repo: https://github.com/google-labs-code/design.md (Apache-2.0) +- CLI: `@google/design.md` on npm +- License of generated DESIGN.md files: whatever the user's project uses; + the spec itself is Apache-2.0. diff --git a/skills/creative/design-md/templates/starter.md b/skills/creative/design-md/templates/starter.md new file mode 100644 index 000000000..03d54785f --- /dev/null +++ b/skills/creative/design-md/templates/starter.md @@ -0,0 +1,99 @@ +--- +version: alpha +name: MyBrand +description: One-sentence description of the visual identity. +colors: + primary: "#0F172A" + secondary: "#64748B" + tertiary: "#2563EB" + neutral: "#F8FAFC" + on-primary: "#FFFFFF" + on-tertiary: "#FFFFFF" +typography: + h1: + fontFamily: Inter + fontSize: 3rem + fontWeight: 700 + lineHeight: 1.1 + letterSpacing: "-0.02em" + h2: + fontFamily: Inter + fontSize: 2rem + fontWeight: 600 + lineHeight: 1.2 + body-md: + fontFamily: Inter + fontSize: 1rem + lineHeight: 1.5 + label-caps: + fontFamily: Inter + fontSize: 0.75rem + fontWeight: 600 + letterSpacing: "0.08em" +rounded: + sm: 4px + md: 8px + lg: 16px + full: 9999px +spacing: + xs: 4px + sm: 8px + md: 16px + lg: 24px + xl: 48px +components: + button-primary: + backgroundColor: "{colors.tertiary}" + textColor: "{colors.on-tertiary}" + rounded: "{rounded.sm}" + padding: 12px + button-primary-hover: + backgroundColor: "{colors.primary}" + textColor: "{colors.on-primary}" + card: + backgroundColor: "{colors.neutral}" + textColor: "{colors.primary}" + rounded: "{rounded.md}" + padding: 24px +--- + +## Overview + +Describe the voice and feel of the brand in one or two paragraphs. What mood +does it evoke? What emotional response should a user have on first impression? + +## Colors + +- **Primary ({colors.primary}):** Core text, headlines, high-emphasis surfaces. +- **Secondary ({colors.secondary}):** Supporting text, borders, metadata. +- **Tertiary ({colors.tertiary}):** Interaction driver — buttons, links, + selected states. Use sparingly to preserve its signal. +- **Neutral ({colors.neutral}):** Page background and surface fills. + +## Typography + +Inter for everything. Weight and size carry hierarchy, not font family. Tight +letter-spacing on display sizes; default tracking on body. + +## Layout + +Spacing scale is a 4px baseline. Use `md` (16px) for intra-component gaps, +`lg` (24px) for inter-component gaps, `xl` (48px) for section breaks. + +## Shapes + +Rounded corners are modest — `sm` on interactive elements, `md` on cards. +`full` is reserved for avatars and pill badges. + +## Components + +- `button-primary` is the only high-emphasis action per screen. +- `card` is the default surface for grouped content. No shadow by default. + +## Do's and Don'ts + +- **Do** use token references (`{colors.primary}`) instead of literal hex in + component definitions. +- **Don't** introduce colors outside the palette — extend the palette first. +- **Don't** nest component variants. `button-primary-hover` is a sibling, + not a child. diff --git a/skills/media/spotify/SKILL.md b/skills/media/spotify/SKILL.md new file mode 100644 index 000000000..612eec16f --- /dev/null +++ b/skills/media/spotify/SKILL.md @@ -0,0 +1,134 @@ +--- +name: spotify +description: Control Spotify — play music, search the catalog, manage playlists and library, inspect devices and playback state. Loads when the user asks to play/pause/queue music, search tracks/albums/artists, manage playlists, or check what's playing. Assumes the Hermes Spotify toolset is enabled and `hermes auth spotify` has been run. +version: 1.0.0 +author: Hermes Agent +license: MIT +prerequisites: + tools: [spotify_playback, spotify_devices, spotify_queue, spotify_search, spotify_playlists, spotify_albums, spotify_library] +metadata: + hermes: + tags: [spotify, music, playback, playlists, media] + related_skills: [gif-search] +--- + +# Spotify + +Control the user's Spotify account via the Hermes Spotify toolset (7 tools). Setup guide: https://hermes-agent.nousresearch.com/docs/user-guide/features/spotify + +## When to use this skill + +The user says something like "play X", "pause", "skip", "queue up X", "what's playing", "search for X", "add to my X playlist", "make a playlist", "save this to my library", etc. + +## The 7 tools + +- `spotify_playback` — play, pause, next, previous, seek, set_repeat, set_shuffle, set_volume, get_state, get_currently_playing, recently_played +- `spotify_devices` — list, transfer +- `spotify_queue` — get, add +- `spotify_search` — search the catalog +- `spotify_playlists` — list, get, create, add_items, remove_items, update_details +- `spotify_albums` — get, tracks +- `spotify_library` — list/save/remove with `kind: "tracks"|"albums"` + +Playback-mutating actions require Spotify Premium; search/library/playlist ops work on Free. + +## Canonical patterns (minimize tool calls) + +### "Play " +One search, then play by URI. Do NOT loop through search results describing them unless the user asked for options. + +``` +spotify_search({"query": "miles davis kind of blue", "types": ["album"], "limit": 1}) +→ got album URI spotify:album:1weenld61qoidwYuZ1GESA +spotify_playback({"action": "play", "context_uri": "spotify:album:1weenld61qoidwYuZ1GESA"}) +``` + +For "play some " (no specific song), prefer `types: ["artist"]` and play the artist context URI — Spotify handles smart shuffle. If the user says "the song" or "that track", search `types: ["track"]` and pass `uris: [track_uri]` to play. + +### "What's playing?" / "What am I listening to?" +Single call — don't chain get_state after get_currently_playing. + +``` +spotify_playback({"action": "get_currently_playing"}) +``` + +If it returns 204/empty (`is_playing: false`), tell the user nothing is playing. Don't retry. + +### "Pause" / "Skip" / "Volume 50" +Direct action, no preflight inspection needed. + +``` +spotify_playback({"action": "pause"}) +spotify_playback({"action": "next"}) +spotify_playback({"action": "set_volume", "volume_percent": 50}) +``` + +### "Add to my playlist" +1. `spotify_playlists list` to find the playlist ID by name +2. Get the track URI (from currently playing, or search) +3. `spotify_playlists add_items` with the playlist_id and URIs + +``` +spotify_playlists({"action": "list"}) +→ found "Late Night Jazz" = 37i9dQZF1DX4wta20PHgwo +spotify_playback({"action": "get_currently_playing"}) +→ current track uri = spotify:track:0DiWol3AO6WpXZgp0goxAV +spotify_playlists({"action": "add_items", + "playlist_id": "37i9dQZF1DX4wta20PHgwo", + "uris": ["spotify:track:0DiWol3AO6WpXZgp0goxAV"]}) +``` + +### "Create a playlist called X and add the last 3 songs I played" +``` +spotify_playback({"action": "recently_played", "limit": 3}) +spotify_playlists({"action": "create", "name": "Focus 2026"}) +→ got playlist_id back in response +spotify_playlists({"action": "add_items", "playlist_id": , "uris": [<3 uris>]}) +``` + +### "Save / unsave / is this saved?" +Use `spotify_library` with the right `kind`. + +``` +spotify_library({"kind": "tracks", "action": "save", "uris": ["spotify:track:..."]}) +spotify_library({"kind": "albums", "action": "list", "limit": 50}) +``` + +### "Transfer playback to my " +``` +spotify_devices({"action": "list"}) +→ pick the device_id by matching name/type +spotify_devices({"action": "transfer", "device_id": "", "play": true}) +``` + +## Critical failure modes + +**`403 Forbidden — No active device found`** on any playback action means Spotify isn't running anywhere. Tell the user: "Open Spotify on your phone/desktop/web player first, start any track for a second, then retry." Don't retry the tool call blindly — it will fail the same way. You can call `spotify_devices list` to confirm; an empty list means no active device. + +**`403 Forbidden — Premium required`** means the user is on Free and tried to mutate playback. Don't retry; tell them this action needs Premium. Reads still work (search, playlists, library, get_state). + +**`204 No Content` on `get_currently_playing`** is NOT an error — it means nothing is playing. The tool returns `is_playing: false`. Just report that to the user. + +**`429 Too Many Requests`** = rate limit. Wait and retry once. If it keeps happening, you're looping — stop. + +**`401 Unauthorized` after a retry** — refresh token revoked. Tell the user to run `hermes auth spotify` again. + +## URI and ID formats + +Spotify uses three interchangeable ID formats. The tools accept all three and normalize: + +- URI: `spotify:track:0DiWol3AO6WpXZgp0goxAV` (preferred) +- URL: `https://open.spotify.com/track/0DiWol3AO6WpXZgp0goxAV` +- Bare ID: `0DiWol3AO6WpXZgp0goxAV` + +When in doubt, use full URIs. Search results return URIs in the `uri` field — pass those directly. + +Entity types: `track`, `album`, `artist`, `playlist`, `show`, `episode`. Use the right type for the action — `spotify_playback.play` with a `context_uri` expects album/playlist/artist; `uris` expects an array of track URIs. + +## What NOT to do + +- **Don't call `get_state` before every action.** Spotify accepts play/pause/skip without preflight. Only inspect state when the user asked "what's playing" or you need to reason about device/track. +- **Don't describe search results unless asked.** If the user said "play X", search, grab the top URI, play it. They'll hear it's wrong if it's wrong. +- **Don't retry on `403 Premium required` or `403 No active device`.** Those are permanent until user action. +- **Don't use `spotify_search` to find a playlist by name** — that searches the public Spotify catalog. User playlists come from `spotify_playlists list`. +- **Don't mix `kind: "tracks"` with album URIs** in `spotify_library` (or vice versa). The tool normalizes IDs but the API endpoint differs. diff --git a/skills/mlops/models/segment-anything/SKILL.md b/skills/mlops/models/segment-anything/SKILL.md index 14b766e5b..2fea76141 100644 --- a/skills/mlops/models/segment-anything/SKILL.md +++ b/skills/mlops/models/segment-anything/SKILL.md @@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks( ### Model architecture + ``` SAM Architecture: ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ @@ -144,6 +145,7 @@ SAM Architecture: Image Embeddings Prompt Embeddings Masks + IoU (computed once) (per prompt) predictions ``` + ### Model variants diff --git a/skills/productivity/google-workspace/scripts/_hermes_home.py b/skills/productivity/google-workspace/scripts/_hermes_home.py new file mode 100644 index 000000000..456eaa930 --- /dev/null +++ b/skills/productivity/google-workspace/scripts/_hermes_home.py @@ -0,0 +1,42 @@ +"""Resolve HERMES_HOME for standalone skill scripts. + +Skill scripts may run outside the Hermes process (e.g. system Python, +nix env, CI) where ``hermes_constants`` is not importable. This module +provides the same ``get_hermes_home()`` and ``display_hermes_home()`` +contracts as ``hermes_constants`` without requiring it on ``sys.path``. + +When ``hermes_constants`` IS available it is used directly so that any +future enhancements (profile resolution, Docker detection, etc.) are +picked up automatically. The fallback path replicates the core logic +from ``hermes_constants.py`` using only the stdlib. + +All scripts under ``google-workspace/scripts/`` should import from here +instead of duplicating the ``HERMES_HOME = Path(os.getenv(...))`` pattern. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +try: + from hermes_constants import display_hermes_home as display_hermes_home + from hermes_constants import get_hermes_home as get_hermes_home +except (ModuleNotFoundError, ImportError): + + def get_hermes_home() -> Path: + """Return the Hermes home directory (default: ~/.hermes). + + Mirrors ``hermes_constants.get_hermes_home()``.""" + val = os.environ.get("HERMES_HOME", "").strip() + return Path(val) if val else Path.home() / ".hermes" + + def display_hermes_home() -> str: + """Return a user-friendly ``~/``-shortened display string. + + Mirrors ``hermes_constants.display_hermes_home()``.""" + home = get_hermes_home() + try: + return "~/" + str(home.relative_to(Path.home())) + except ValueError: + return str(home) diff --git a/skills/productivity/google-workspace/scripts/google_api.py b/skills/productivity/google-workspace/scripts/google_api.py index 6504c098b..0c39e091f 100644 --- a/skills/productivity/google-workspace/scripts/google_api.py +++ b/skills/productivity/google-workspace/scripts/google_api.py @@ -31,7 +31,14 @@ from datetime import datetime, timedelta, timezone from email.mime.text import MIMEText from pathlib import Path -HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) +# Ensure sibling modules (_hermes_home) are importable when run standalone. +_SCRIPTS_DIR = str(Path(__file__).resolve().parent) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _hermes_home import get_hermes_home + +HERMES_HOME = get_hermes_home() TOKEN_PATH = HERMES_HOME / "google_token.json" CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json" diff --git a/skills/productivity/google-workspace/scripts/gws_bridge.py b/skills/productivity/google-workspace/scripts/gws_bridge.py index 0477749d7..e3cc9f147 100755 --- a/skills/productivity/google-workspace/scripts/gws_bridge.py +++ b/skills/productivity/google-workspace/scripts/gws_bridge.py @@ -10,9 +10,12 @@ import sys from datetime import datetime, timezone from pathlib import Path +# Ensure sibling modules (_hermes_home) are importable when run standalone. +_SCRIPTS_DIR = str(Path(__file__).resolve().parent) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) -def get_hermes_home() -> Path: - return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) +from _hermes_home import get_hermes_home def get_token_path() -> Path: diff --git a/skills/productivity/google-workspace/scripts/setup.py b/skills/productivity/google-workspace/scripts/setup.py index bf4fb39ca..851d8911b 100644 --- a/skills/productivity/google-workspace/scripts/setup.py +++ b/skills/productivity/google-workspace/scripts/setup.py @@ -21,6 +21,8 @@ Agent workflow: 6. Run --check to verify. Done. """ +from __future__ import annotations # allow PEP 604 `X | None` on Python 3.9+ + import argparse import json import os @@ -28,13 +30,12 @@ import subprocess import sys from pathlib import Path -try: - from hermes_constants import display_hermes_home, get_hermes_home -except ModuleNotFoundError: - HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4] - if HERMES_AGENT_ROOT.exists(): - sys.path.insert(0, str(HERMES_AGENT_ROOT)) - from hermes_constants import display_hermes_home, get_hermes_home +# Ensure sibling modules (_hermes_home) are importable when run standalone. +_SCRIPTS_DIR = str(Path(__file__).resolve().parent) +if _SCRIPTS_DIR not in sys.path: + sys.path.insert(0, _SCRIPTS_DIR) + +from _hermes_home import display_hermes_home, get_hermes_home HERMES_HOME = get_hermes_home() TOKEN_PATH = HERMES_HOME / "google_token.json" @@ -111,7 +112,11 @@ def install_deps(): return True except subprocess.CalledProcessError as e: print(f"ERROR: Failed to install dependencies: {e}") - print(f"Try manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}") + print( + "On environments without pip (e.g. Nix), install the optional extra instead:" + ) + print(" pip install 'hermes-agent[google]'") + print(f"Or manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}") return False diff --git a/skills/research/research-paper-writing/SKILL.md b/skills/research/research-paper-writing/SKILL.md index f45ce7e2f..a6f343825 100644 --- a/skills/research/research-paper-writing/SKILL.md +++ b/skills/research/research-paper-writing/SKILL.md @@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops. + ``` ┌─────────────────────────────────────────────────────────────┐ │ RESEARCH PAPER PIPELINE │ @@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n │ │ └─────────────────────────────────────────────────────────────┘ ``` + --- diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index faa4c18a7..d4afed101 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -904,9 +904,15 @@ class TestRegisterSessionMcpServers: ] with patch("tools.mcp_tool.register_mcp_servers", return_value=["mcp_srv_search"]), \ - patch("model_tools.get_tool_definitions", return_value=fake_tools): + patch("model_tools.get_tool_definitions", return_value=fake_tools) as mock_defs: await agent._register_session_mcp_servers(state, [server]) + mock_defs.assert_called_once_with( + enabled_toolsets=["hermes-acp", "mcp-srv"], + disabled_toolsets=None, + quiet_mode=True, + ) + assert state.agent.enabled_toolsets == ["hermes-acp", "mcp-srv"] assert state.agent.tools == fake_tools assert state.agent.valid_tool_names == {"mcp_srv_search", "terminal"} # _invalidate_system_prompt should have been called diff --git a/tests/acp/test_session.py b/tests/acp/test_session.py index 50d04b1a9..c86819f6d 100644 --- a/tests/acp/test_session.py +++ b/tests/acp/test_session.py @@ -138,6 +138,43 @@ class TestListAndCleanup: class TestPersistence: """Verify that sessions are persisted to SessionDB and can be restored.""" + def test_create_session_includes_registered_mcp_toolsets(self, tmp_path, monkeypatch): + captured = {} + + def fake_resolve_runtime_provider(requested=None, **kwargs): + return { + "provider": "openrouter", + "api_mode": "chat_completions", + "base_url": "https://openrouter.example/v1", + "api_key": "***", + "command": None, + "args": [], + } + + def fake_agent(**kwargs): + captured.update(kwargs) + return SimpleNamespace(model=kwargs.get("model"), enabled_toolsets=kwargs.get("enabled_toolsets")) + + monkeypatch.setattr("hermes_cli.config.load_config", lambda: { + "model": {"provider": "openrouter", "default": "test-model"}, + "mcp_servers": { + "olympus": {"command": "python", "enabled": True}, + "exa": {"url": "https://exa.ai/mcp"}, + "disabled": {"command": "python", "enabled": False}, + }, + }) + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + fake_resolve_runtime_provider, + ) + db = SessionDB(tmp_path / "state.db") + + with patch("run_agent.AIAgent", side_effect=fake_agent): + manager = SessionManager(db=db) + manager.create_session(cwd="/work") + + assert captured["enabled_toolsets"] == ["hermes-acp", "mcp-olympus", "mcp-exa"] + def test_create_session_writes_to_db(self, manager): state = manager.create_session(cwd="/project") db = manager._get_db() diff --git a/tests/agent/test_anthropic_keychain.py b/tests/agent/test_anthropic_keychain.py new file mode 100644 index 000000000..c0f9c7718 --- /dev/null +++ b/tests/agent/test_anthropic_keychain.py @@ -0,0 +1,165 @@ +"""Tests for Bug #12905 fixes in agent/anthropic_adapter.py — macOS Keychain support.""" + +import json +import platform +from unittest.mock import patch, MagicMock + +import pytest + +from agent.anthropic_adapter import ( + _read_claude_code_credentials_from_keychain, + read_claude_code_credentials, +) + + +class TestReadClaudeCodeCredentialsFromKeychain: + """Bug 4: macOS Keychain support for Claude Code >=2.1.114.""" + + def test_returns_none_on_linux(self): + """Keychain reading is Darwin-only; must return None on other platforms.""" + with patch("agent.anthropic_adapter.platform.system", return_value="Linux"): + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_on_windows(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Windows"): + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_when_security_command_not_found(self): + """OSError from missing security binary must be handled gracefully.""" + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run", + side_effect=OSError("security not found")): + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_on_nonzero_exit_code(self): + """security returns non-zero when the Keychain entry doesn't exist.""" + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="") + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_for_empty_stdout(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="") + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_for_non_json_payload(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0, stdout="not valid json", stderr="") + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_when_password_field_is_missing_claude_ai_oauth(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({"someOtherService": {"accessToken": "tok"}}), + stderr="", + ) + assert _read_claude_code_credentials_from_keychain() is None + + def test_returns_none_when_access_token_is_empty(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({"claudeAiOauth": {"accessToken": "", "refreshToken": "x"}}), + stderr="", + ) + assert _read_claude_code_credentials_from_keychain() is None + + def test_parses_valid_keychain_entry(self): + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({ + "claudeAiOauth": { + "accessToken": "kc-access-token-abc", + "refreshToken": "kc-refresh-token-xyz", + "expiresAt": 9999999999999, + } + }), + stderr="", + ) + creds = _read_claude_code_credentials_from_keychain() + assert creds is not None + assert creds["accessToken"] == "kc-access-token-abc" + assert creds["refreshToken"] == "kc-refresh-token-xyz" + assert creds["expiresAt"] == 9999999999999 + assert creds["source"] == "macos_keychain" + + +class TestReadClaudeCodeCredentialsPriority: + """Bug 4: Keychain must be checked before the JSON file.""" + + def test_keychain_takes_priority_over_json_file(self, tmp_path, monkeypatch): + """When both Keychain and JSON file have credentials, Keychain wins.""" + # Set up JSON file with "older" token + json_cred_file = tmp_path / ".claude" / ".credentials.json" + json_cred_file.parent.mkdir(parents=True) + json_cred_file.write_text(json.dumps({ + "claudeAiOauth": { + "accessToken": "json-token", + "refreshToken": "json-refresh", + "expiresAt": 9999999999999, + } + })) + monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) + + # Mock Keychain to return a "newer" token + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout=json.dumps({ + "claudeAiOauth": { + "accessToken": "keychain-token", + "refreshToken": "keychain-refresh", + "expiresAt": 9999999999999, + } + }), + stderr="", + ) + creds = read_claude_code_credentials() + + # Keychain token should be returned, not JSON file token + assert creds is not None + assert creds["accessToken"] == "keychain-token" + assert creds["source"] == "macos_keychain" + + def test_falls_back_to_json_when_keychain_returns_none(self, tmp_path, monkeypatch): + """When Keychain has no entry, JSON file is used as fallback.""" + json_cred_file = tmp_path / ".claude" / ".credentials.json" + json_cred_file.parent.mkdir(parents=True) + json_cred_file.write_text(json.dumps({ + "claudeAiOauth": { + "accessToken": "json-fallback-token", + "refreshToken": "json-refresh", + "expiresAt": 9999999999999, + } + })) + monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) + + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + # Simulate Keychain entry not found + mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="") + creds = read_claude_code_credentials() + + assert creds is not None + assert creds["accessToken"] == "json-fallback-token" + assert creds["source"] == "claude_code_credentials_file" + + def test_returns_none_when_neither_keychain_nor_json_has_creds(self, tmp_path, monkeypatch): + """No credentials anywhere — must return None cleanly.""" + monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path) + + with patch("agent.anthropic_adapter.platform.system", return_value="Darwin"), \ + patch("agent.anthropic_adapter.subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="") + creds = read_claude_code_credentials() + + assert creds is None diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index b5b74bd30..5ee0f1265 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -19,6 +19,7 @@ from agent.auxiliary_client import ( _read_codex_access_token, _get_provider_chain, _is_payment_error, + _normalize_aux_provider, _try_payment_fallback, _resolve_auto, ) @@ -54,6 +55,17 @@ def codex_auth_dir(tmp_path, monkeypatch): return codex_dir +class TestNormalizeAuxProvider: + def test_maps_github_copilot_aliases(self): + assert _normalize_aux_provider("github") == "copilot" + assert _normalize_aux_provider("github-copilot") == "copilot" + assert _normalize_aux_provider("github-models") == "copilot" + + def test_maps_github_copilot_acp_aliases(self): + assert _normalize_aux_provider("github-copilot-acp") == "copilot-acp" + assert _normalize_aux_provider("copilot-acp-agent") == "copilot-acp" + + class TestReadCodexAccessToken: def test_valid_auth_store(self, tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" @@ -1203,3 +1215,201 @@ class TestAnthropicCompatImageConversion: }] result = _convert_openai_images_to_anthropic(messages) assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg" + + +class _AuxAuth401(Exception): + status_code = 401 + + def __init__(self, message="Provided authentication token is expired"): + super().__init__(message) + + +class _DummyResponse: + def __init__(self, text="ok"): + self.choices = [MagicMock(message=MagicMock(content=text))] + + +class _FailingThenSuccessCompletions: + def __init__(self): + self.calls = 0 + + def create(self, **kwargs): + self.calls += 1 + if self.calls == 1: + raise _AuxAuth401() + return _DummyResponse("sync-ok") + + +class _AsyncFailingThenSuccessCompletions: + def __init__(self): + self.calls = 0 + + async def create(self, **kwargs): + self.calls += 1 + if self.calls == 1: + raise _AuxAuth401() + return _DummyResponse("async-ok") + + +class TestAuxiliaryAuthRefreshRetry: + def test_call_llm_refreshes_codex_on_401_for_vision(self): + failing_client = MagicMock() + failing_client.base_url = "https://chatgpt.com/backend-api/codex" + failing_client.chat.completions = _FailingThenSuccessCompletions() + + fresh_client = MagicMock() + fresh_client.base_url = "https://chatgpt.com/backend-api/codex" + fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-sync") + + with ( + patch( + "agent.auxiliary_client.resolve_vision_provider_client", + side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")], + ), + patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, + ): + resp = call_llm( + task="vision", + provider="openai-codex", + model="gpt-5.2-codex", + messages=[{"role": "user", "content": "hi"}], + ) + + assert resp.choices[0].message.content == "fresh-sync" + mock_refresh.assert_called_once_with("openai-codex") + + def test_call_llm_refreshes_codex_on_401_for_non_vision(self): + stale_client = MagicMock() + stale_client.base_url = "https://chatgpt.com/backend-api/codex" + stale_client.chat.completions.create.side_effect = _AuxAuth401("stale codex token") + + fresh_client = MagicMock() + fresh_client.base_url = "https://chatgpt.com/backend-api/codex" + fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision") + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.2-codex", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.2-codex"), (fresh_client, "gpt-5.2-codex")]), + patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, + ): + resp = call_llm( + task="compression", + provider="openai-codex", + model="gpt-5.2-codex", + messages=[{"role": "user", "content": "hi"}], + ) + + assert resp.choices[0].message.content == "fresh-non-vision" + mock_refresh.assert_called_once_with("openai-codex") + assert stale_client.chat.completions.create.call_count == 1 + assert fresh_client.chat.completions.create.call_count == 1 + + def test_call_llm_refreshes_anthropic_on_401_for_non_vision(self): + stale_client = MagicMock() + stale_client.base_url = "https://api.anthropic.com" + stale_client.chat.completions.create.side_effect = _AuxAuth401("anthropic token expired") + + fresh_client = MagicMock() + fresh_client.base_url = "https://api.anthropic.com" + fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-anthropic") + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]), + patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, + ): + resp = call_llm( + task="compression", + provider="anthropic", + model="claude-haiku-4-5-20251001", + messages=[{"role": "user", "content": "hi"}], + ) + + assert resp.choices[0].message.content == "fresh-anthropic" + mock_refresh.assert_called_once_with("anthropic") + assert stale_client.chat.completions.create.call_count == 1 + assert fresh_client.chat.completions.create.call_count == 1 + + @pytest.mark.asyncio + async def test_async_call_llm_refreshes_codex_on_401_for_vision(self): + failing_client = MagicMock() + failing_client.base_url = "https://chatgpt.com/backend-api/codex" + failing_client.chat.completions = _AsyncFailingThenSuccessCompletions() + + fresh_client = MagicMock() + fresh_client.base_url = "https://chatgpt.com/backend-api/codex" + fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async")) + + with ( + patch( + "agent.auxiliary_client.resolve_vision_provider_client", + side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")], + ), + patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, + ): + resp = await async_call_llm( + task="vision", + provider="openai-codex", + model="gpt-5.2-codex", + messages=[{"role": "user", "content": "hi"}], + ) + + assert resp.choices[0].message.content == "fresh-async" + mock_refresh.assert_called_once_with("openai-codex") + + def test_refresh_provider_credentials_force_refreshes_anthropic_oauth_and_evicts_cache(self, monkeypatch): + stale_client = MagicMock() + cache_key = ("anthropic", False, None, None, None) + + monkeypatch.setenv("ANTHROPIC_TOKEN", "") + monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "") + monkeypatch.setenv("ANTHROPIC_API_KEY", "") + + with ( + patch("agent.auxiliary_client._client_cache", {cache_key: (stale_client, "claude-haiku-4-5-20251001", None)}), + patch("agent.anthropic_adapter.read_claude_code_credentials", return_value={ + "accessToken": "expired-token", + "refreshToken": "refresh-token", + "expiresAt": 0, + }), + patch("agent.anthropic_adapter.refresh_anthropic_oauth_pure", return_value={ + "access_token": "fresh-token", + "refresh_token": "refresh-token-2", + "expires_at_ms": 9999999999999, + }) as mock_refresh_oauth, + patch("agent.anthropic_adapter._write_claude_code_credentials") as mock_write, + ): + from agent.auxiliary_client import _refresh_provider_credentials + + assert _refresh_provider_credentials("anthropic") is True + + mock_refresh_oauth.assert_called_once_with("refresh-token", use_json=False) + mock_write.assert_called_once_with("fresh-token", "refresh-token-2", 9999999999999) + stale_client.close.assert_called_once() + + @pytest.mark.asyncio + async def test_async_call_llm_refreshes_anthropic_on_401_for_non_vision(self): + stale_client = MagicMock() + stale_client.base_url = "https://api.anthropic.com" + stale_client.chat.completions.create = AsyncMock(side_effect=_AuxAuth401("anthropic token expired")) + + fresh_client = MagicMock() + fresh_client.base_url = "https://api.anthropic.com" + fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async-anthropic")) + + with ( + patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)), + patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]), + patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh, + ): + resp = await async_call_llm( + task="compression", + provider="anthropic", + model="claude-haiku-4-5-20251001", + messages=[{"role": "user", "content": "hi"}], + ) + + assert resp.choices[0].message.content == "fresh-async-anthropic" + mock_refresh.assert_called_once_with("anthropic") + assert stale_client.chat.completions.create.await_count == 1 + assert fresh_client.chat.completions.create.await_count == 1 diff --git a/tests/agent/test_auxiliary_named_custom_providers.py b/tests/agent/test_auxiliary_named_custom_providers.py index 437a6c400..5152428b6 100644 --- a/tests/agent/test_auxiliary_named_custom_providers.py +++ b/tests/agent/test_auxiliary_named_custom_providers.py @@ -100,6 +100,26 @@ class TestResolveProviderClientMainAlias: assert client is not None assert "beans.local" in str(client.base_url) + def test_main_resolves_github_copilot_alias(self, tmp_path): + _write_config(tmp_path, { + "model": {"default": "gpt-5.4", "provider": "github-copilot"}, + }) + with ( + patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={ + "api_key": "ghu_test_token", + "base_url": "https://api.githubcopilot.com", + }), + patch("agent.auxiliary_client.OpenAI") as mock_openai, + ): + mock_openai.return_value = MagicMock() + from agent.auxiliary_client import resolve_provider_client + + client, model = resolve_provider_client("main", "gpt-5.4") + + assert client is not None + assert model == "gpt-5.4" + assert mock_openai.called + class TestResolveProviderClientNamedCustom: """resolve_provider_client should resolve named custom providers directly.""" @@ -252,3 +272,158 @@ class TestVisionPathApiMode: mock_gcc.assert_called_once() _, kwargs = mock_gcc.call_args assert kwargs.get("api_mode") == "chat_completions" + + +class TestProvidersDictApiModeAnthropicMessages: + """Regression guard for #15033. + + Named providers declared under the ``providers:`` dict with + ``api_mode: anthropic_messages`` must route auxiliary calls through + the Anthropic Messages API (via AnthropicAuxiliaryClient), not + through an OpenAI chat-completions client. + + The bug had two halves: the providers-dict branch of + ``_get_named_custom_provider`` dropped the ``api_mode`` field, and + ``resolve_provider_client``'s named-custom branch never read it. + """ + + def test_providers_dict_propagates_api_mode(self, tmp_path, monkeypatch): + monkeypatch.setenv("MYRELAY_API_KEY", "sk-test") + _write_config(tmp_path, { + "providers": { + "myrelay": { + "name": "myrelay", + "base_url": "https://example-relay.test/anthropic", + "key_env": "MYRELAY_API_KEY", + "api_mode": "anthropic_messages", + "default_model": "claude-opus-4-7", + }, + }, + }) + from hermes_cli.runtime_provider import _get_named_custom_provider + entry = _get_named_custom_provider("myrelay") + assert entry is not None + assert entry.get("api_mode") == "anthropic_messages" + assert entry.get("base_url") == "https://example-relay.test/anthropic" + assert entry.get("api_key") == "sk-test" + + def test_providers_dict_invalid_api_mode_is_dropped(self, tmp_path): + _write_config(tmp_path, { + "providers": { + "weird": { + "name": "weird", + "base_url": "https://example.test", + "api_mode": "bogus_nonsense", + "default_model": "x", + }, + }, + }) + from hermes_cli.runtime_provider import _get_named_custom_provider + entry = _get_named_custom_provider("weird") + assert entry is not None + assert "api_mode" not in entry + + def test_providers_dict_without_api_mode_is_unchanged(self, tmp_path): + _write_config(tmp_path, { + "providers": { + "localchat": { + "name": "localchat", + "base_url": "http://127.0.0.1:1234/v1", + "api_key": "local-key", + "default_model": "llama-3", + }, + }, + }) + from hermes_cli.runtime_provider import _get_named_custom_provider + entry = _get_named_custom_provider("localchat") + assert entry is not None + assert "api_mode" not in entry + + def test_resolve_provider_client_returns_anthropic_client(self, tmp_path, monkeypatch): + """Named custom provider with api_mode=anthropic_messages must + route through AnthropicAuxiliaryClient.""" + monkeypatch.setenv("MYRELAY_API_KEY", "sk-test") + _write_config(tmp_path, { + "providers": { + "myrelay": { + "name": "myrelay", + "base_url": "https://example-relay.test/anthropic", + "key_env": "MYRELAY_API_KEY", + "api_mode": "anthropic_messages", + "default_model": "claude-opus-4-7", + }, + }, + }) + from agent.auxiliary_client import ( + resolve_provider_client, + AnthropicAuxiliaryClient, + AsyncAnthropicAuxiliaryClient, + ) + sync_client, sync_model = resolve_provider_client("myrelay", async_mode=False) + assert isinstance(sync_client, AnthropicAuxiliaryClient), ( + f"expected AnthropicAuxiliaryClient, got {type(sync_client).__name__}" + ) + assert sync_model == "claude-opus-4-7" + + async_client, async_model = resolve_provider_client("myrelay", async_mode=True) + assert isinstance(async_client, AsyncAnthropicAuxiliaryClient), ( + f"expected AsyncAnthropicAuxiliaryClient, got {type(async_client).__name__}" + ) + assert async_model == "claude-opus-4-7" + + def test_aux_task_override_routes_named_provider_to_anthropic(self, tmp_path, monkeypatch): + """The full chain: auxiliary..provider: myrelay with + api_mode anthropic_messages must produce an Anthropic client.""" + monkeypatch.setenv("MYRELAY_API_KEY", "sk-test") + _write_config(tmp_path, { + "providers": { + "myrelay": { + "name": "myrelay", + "base_url": "https://example-relay.test/anthropic", + "key_env": "MYRELAY_API_KEY", + "api_mode": "anthropic_messages", + "default_model": "claude-opus-4-7", + }, + }, + "auxiliary": { + "flush_memories": { + "provider": "myrelay", + "model": "claude-sonnet-4.6", + }, + }, + "model": {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}, + }) + from agent.auxiliary_client import ( + get_async_text_auxiliary_client, + get_text_auxiliary_client, + AnthropicAuxiliaryClient, + AsyncAnthropicAuxiliaryClient, + ) + async_client, async_model = get_async_text_auxiliary_client("flush_memories") + assert isinstance(async_client, AsyncAnthropicAuxiliaryClient) + assert async_model == "claude-sonnet-4.6" + + sync_client, sync_model = get_text_auxiliary_client("flush_memories") + assert isinstance(sync_client, AnthropicAuxiliaryClient) + assert sync_model == "claude-sonnet-4.6" + + def test_provider_without_api_mode_still_uses_openai(self, tmp_path): + """Named providers that don't declare api_mode should still go + through the plain OpenAI-wire path (no regression).""" + _write_config(tmp_path, { + "providers": { + "localchat": { + "name": "localchat", + "base_url": "http://127.0.0.1:1234/v1", + "api_key": "local-key", + "default_model": "llama-3", + }, + }, + }) + from agent.auxiliary_client import resolve_provider_client + from openai import OpenAI, AsyncOpenAI + sync_client, _ = resolve_provider_client("localchat", async_mode=False) + # sync returns the raw OpenAI client + assert isinstance(sync_client, OpenAI) + async_client, _ = resolve_provider_client("localchat", async_mode=True) + assert isinstance(async_client, AsyncOpenAI) diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py index d12be7b88..fea136604 100644 --- a/tests/agent/test_bedrock_adapter.py +++ b/tests/agent/test_bedrock_adapter.py @@ -1230,3 +1230,210 @@ class TestEmptyTextBlockFix: from agent.bedrock_adapter import _convert_content_to_converse blocks = _convert_content_to_converse("Hello") assert blocks[0]["text"] == "Hello" + + +# --------------------------------------------------------------------------- +# Stale-connection detection and per-region client invalidation +# --------------------------------------------------------------------------- + +class TestInvalidateRuntimeClient: + """Per-region eviction used to discard dead/stale bedrock-runtime clients.""" + + def test_evicts_only_the_target_region(self): + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + invalidate_runtime_client, + reset_client_cache, + ) + reset_client_cache() + _bedrock_runtime_client_cache["us-east-1"] = "dead-client" + _bedrock_runtime_client_cache["us-west-2"] = "live-client" + + evicted = invalidate_runtime_client("us-east-1") + + assert evicted is True + assert "us-east-1" not in _bedrock_runtime_client_cache + assert _bedrock_runtime_client_cache["us-west-2"] == "live-client" + + def test_returns_false_when_region_not_cached(self): + from agent.bedrock_adapter import invalidate_runtime_client, reset_client_cache + reset_client_cache() + assert invalidate_runtime_client("eu-west-1") is False + + +class TestIsStaleConnectionError: + """Classifier that decides whether an exception warrants client eviction.""" + + def test_detects_botocore_connection_closed_error(self): + from agent.bedrock_adapter import is_stale_connection_error + from botocore.exceptions import ConnectionClosedError + exc = ConnectionClosedError(endpoint_url="https://bedrock.example") + assert is_stale_connection_error(exc) is True + + def test_detects_botocore_endpoint_connection_error(self): + from agent.bedrock_adapter import is_stale_connection_error + from botocore.exceptions import EndpointConnectionError + exc = EndpointConnectionError(endpoint_url="https://bedrock.example") + assert is_stale_connection_error(exc) is True + + def test_detects_botocore_read_timeout(self): + from agent.bedrock_adapter import is_stale_connection_error + from botocore.exceptions import ReadTimeoutError + exc = ReadTimeoutError(endpoint_url="https://bedrock.example") + assert is_stale_connection_error(exc) is True + + def test_detects_urllib3_protocol_error(self): + from agent.bedrock_adapter import is_stale_connection_error + from urllib3.exceptions import ProtocolError + exc = ProtocolError("Connection broken") + assert is_stale_connection_error(exc) is True + + def test_detects_library_internal_assertion_error(self): + """A bare AssertionError raised from inside urllib3/botocore signals + a corrupted connection-pool invariant and should trigger eviction.""" + from agent.bedrock_adapter import is_stale_connection_error + + # Fabricate an AssertionError whose traceback's last frame belongs + # to a module named "urllib3.connectionpool". We do this by exec'ing + # a tiny `assert False` under a fake globals dict — the resulting + # frame's ``f_globals["__name__"]`` is what the classifier inspects. + fake_globals = {"__name__": "urllib3.connectionpool"} + try: + exec("def _boom():\n assert False\n_boom()", fake_globals) + except AssertionError as exc: + assert is_stale_connection_error(exc) is True + else: + pytest.fail("AssertionError not raised") + + def test_detects_botocore_internal_assertion_error(self): + """Same as above but for a frame inside the botocore namespace.""" + from agent.bedrock_adapter import is_stale_connection_error + fake_globals = {"__name__": "botocore.httpsession"} + try: + exec("def _boom():\n assert False\n_boom()", fake_globals) + except AssertionError as exc: + assert is_stale_connection_error(exc) is True + else: + pytest.fail("AssertionError not raised") + + def test_ignores_application_assertion_error(self): + """AssertionError from application code (not urllib3/botocore) should + NOT be classified as stale — those are real test/code bugs.""" + from agent.bedrock_adapter import is_stale_connection_error + try: + assert False, "test-only" # noqa: B011 + except AssertionError as exc: + assert is_stale_connection_error(exc) is False + + def test_ignores_unrelated_exceptions(self): + from agent.bedrock_adapter import is_stale_connection_error + assert is_stale_connection_error(ValueError("bad input")) is False + assert is_stale_connection_error(KeyError("missing")) is False + + +class TestCallConverseInvalidatesOnStaleError: + """call_converse / call_converse_stream evict the cached client when the + boto3 call raises a stale-connection error — so the next invocation + reconnects instead of reusing the dead socket.""" + + def test_converse_evicts_client_on_stale_error(self): + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + call_converse, + reset_client_cache, + ) + from botocore.exceptions import ConnectionClosedError + + reset_client_cache() + dead_client = MagicMock() + dead_client.converse.side_effect = ConnectionClosedError( + endpoint_url="https://bedrock.example", + ) + _bedrock_runtime_client_cache["us-east-1"] = dead_client + + with pytest.raises(ConnectionClosedError): + call_converse( + region="us-east-1", + model="anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "user", "content": "hi"}], + ) + + assert "us-east-1" not in _bedrock_runtime_client_cache, ( + "stale client should have been evicted so the retry reconnects" + ) + + def test_converse_stream_evicts_client_on_stale_error(self): + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + call_converse_stream, + reset_client_cache, + ) + from botocore.exceptions import ConnectionClosedError + + reset_client_cache() + dead_client = MagicMock() + dead_client.converse_stream.side_effect = ConnectionClosedError( + endpoint_url="https://bedrock.example", + ) + _bedrock_runtime_client_cache["us-east-1"] = dead_client + + with pytest.raises(ConnectionClosedError): + call_converse_stream( + region="us-east-1", + model="anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "user", "content": "hi"}], + ) + + assert "us-east-1" not in _bedrock_runtime_client_cache + + def test_converse_does_not_evict_on_non_stale_error(self): + """Non-stale errors (e.g. ValidationException) leave the client cache alone.""" + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + call_converse, + reset_client_cache, + ) + from botocore.exceptions import ClientError + + reset_client_cache() + live_client = MagicMock() + live_client.converse.side_effect = ClientError( + error_response={"Error": {"Code": "ValidationException", "Message": "bad"}}, + operation_name="Converse", + ) + _bedrock_runtime_client_cache["us-east-1"] = live_client + + with pytest.raises(ClientError): + call_converse( + region="us-east-1", + model="anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "user", "content": "hi"}], + ) + + assert _bedrock_runtime_client_cache.get("us-east-1") is live_client, ( + "validation errors do not indicate a dead connection — keep the client" + ) + + def test_converse_leaves_successful_client_in_cache(self): + from agent.bedrock_adapter import ( + _bedrock_runtime_client_cache, + call_converse, + reset_client_cache, + ) + + reset_client_cache() + live_client = MagicMock() + live_client.converse.return_value = { + "output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}}, + "stopReason": "end_turn", + "usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2}, + } + _bedrock_runtime_client_cache["us-east-1"] = live_client + + call_converse( + region="us-east-1", + model="anthropic.claude-3-sonnet-20240229-v1:0", + messages=[{"role": "user", "content": "hi"}], + ) + + assert _bedrock_runtime_client_cache.get("us-east-1") is live_client diff --git a/tests/agent/test_bedrock_integration.py b/tests/agent/test_bedrock_integration.py index 202bd3ebd..954075ab7 100644 --- a/tests/agent/test_bedrock_integration.py +++ b/tests/agent/test_bedrock_integration.py @@ -376,17 +376,15 @@ class TestBedrockModelNameNormalization: "apac.anthropic.claude-haiku-4-5", preserve_dots=True ) == "apac.anthropic.claude-haiku-4-5" - def test_preserve_false_mangles_as_documented(self): - """Canary: with ``preserve_dots=False`` the function still - produces the broken all-hyphen form — this is the shape that - Bedrock rejected and that the fix avoids. Keeping this test - locks in the existing behaviour of ``normalize_model_name`` so a - future refactor doesn't accidentally decouple the knob from its - effect.""" + def test_bedrock_prefix_preserved_without_preserve_dots(self): + """Bedrock inference profile IDs are auto-detected by prefix and + always returned unmangled -- ``preserve_dots`` is irrelevant for + these IDs because the dots are namespace separators, not version + separators. Regression for #12295.""" from agent.anthropic_adapter import normalize_model_name assert normalize_model_name( "global.anthropic.claude-opus-4-7", preserve_dots=False - ) == "global-anthropic-claude-opus-4-7" + ) == "global.anthropic.claude-opus-4-7" def test_bare_foundation_model_id_preserved(self): """Non-inference-profile Bedrock IDs @@ -422,12 +420,11 @@ class TestBedrockBuildAnthropicKwargsEndToEnd: f"{kwargs['model']!r}" ) - def test_bedrock_model_mangled_without_preserve_dots(self): - """Inverse canary: without the flag, ``build_anthropic_kwargs`` - still produces the broken form — so the fix in - ``_anthropic_preserve_dots`` is the load-bearing piece that - wires ``preserve_dots=True`` through to this builder for the - Bedrock case.""" + def test_bedrock_model_preserved_without_preserve_dots(self): + """Bedrock inference profile IDs survive ``build_anthropic_kwargs`` + even without ``preserve_dots=True`` -- the prefix auto-detection + in ``normalize_model_name`` is the load-bearing piece. + Regression for #12295.""" from agent.anthropic_adapter import build_anthropic_kwargs kwargs = build_anthropic_kwargs( model="global.anthropic.claude-opus-4-7", @@ -437,4 +434,157 @@ class TestBedrockBuildAnthropicKwargsEndToEnd: reasoning_config=None, preserve_dots=False, ) - assert kwargs["model"] == "global-anthropic-claude-opus-4-7" + assert kwargs["model"] == "global.anthropic.claude-opus-4-7" + + +class TestBedrockModelIdDetection: + """Tests for ``_is_bedrock_model_id`` and the auto-detection that + makes ``normalize_model_name`` preserve dots for Bedrock IDs + regardless of ``preserve_dots``. Regression for #12295.""" + + def test_bare_bedrock_id_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("anthropic.claude-opus-4-7") is True + + def test_regional_us_prefix_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("us.anthropic.claude-sonnet-4-5-v1:0") is True + + def test_regional_global_prefix_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("global.anthropic.claude-opus-4-7") is True + + def test_regional_eu_prefix_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("eu.anthropic.claude-sonnet-4-6") is True + + def test_openrouter_format_not_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("claude-opus-4.6") is False + + def test_bare_claude_not_detected(self): + from agent.anthropic_adapter import _is_bedrock_model_id + assert _is_bedrock_model_id("claude-opus-4-7") is False + + def test_bare_bedrock_id_preserved_without_flag(self): + """The primary bug from #12295: ``anthropic.claude-opus-4-7`` + sent to bedrock-mantle via auxiliary clients that don't pass + ``preserve_dots=True``.""" + from agent.anthropic_adapter import normalize_model_name + assert normalize_model_name( + "anthropic.claude-opus-4-7", preserve_dots=False + ) == "anthropic.claude-opus-4-7" + + def test_openrouter_dots_still_converted(self): + """Non-Bedrock dotted model names must still be converted.""" + from agent.anthropic_adapter import normalize_model_name + assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6" + + def test_bare_bedrock_id_survives_build_kwargs(self): + """End-to-end: bare Bedrock ID through ``build_anthropic_kwargs`` + without ``preserve_dots=True`` -- the auxiliary client path.""" + from agent.anthropic_adapter import build_anthropic_kwargs + kwargs = build_anthropic_kwargs( + model="anthropic.claude-opus-4-7", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + preserve_dots=False, + ) + assert kwargs["model"] == "anthropic.claude-opus-4-7" + + +# --------------------------------------------------------------------------- +# auxiliary_client Bedrock resolution — fix for #13919 +# --------------------------------------------------------------------------- +# Before the fix, resolve_provider_client("bedrock", ...) fell through to the +# "unhandled auth_type" warning and returned (None, None), breaking all +# auxiliary tasks (compression, memory, summarization) for Bedrock users. + + +class TestAuxiliaryClientBedrockResolution: + """Verify resolve_provider_client handles Bedrock's aws_sdk auth type.""" + + def test_bedrock_returns_client_with_credentials(self, monkeypatch): + """With valid AWS credentials, Bedrock should return a usable client.""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + monkeypatch.setenv("AWS_REGION", "us-west-2") + + mock_anthropic_bedrock = MagicMock() + with patch("agent.anthropic_adapter.build_anthropic_bedrock_client", + return_value=mock_anthropic_bedrock): + from agent.auxiliary_client import resolve_provider_client, AnthropicAuxiliaryClient + client, model = resolve_provider_client("bedrock", None) + + assert client is not None, ( + "resolve_provider_client('bedrock') returned None — " + "aws_sdk auth type is not handled" + ) + assert isinstance(client, AnthropicAuxiliaryClient) + assert model is not None + assert client.api_key == "aws-sdk" + assert "us-west-2" in client.base_url + + def test_bedrock_returns_none_without_credentials(self, monkeypatch): + """Without AWS credentials, Bedrock should return (None, None) gracefully.""" + with patch("agent.bedrock_adapter.has_aws_credentials", return_value=False): + from agent.auxiliary_client import resolve_provider_client + client, model = resolve_provider_client("bedrock", None) + + assert client is None + assert model is None + + def test_bedrock_uses_configured_region(self, monkeypatch): + """Bedrock client base_url should reflect AWS_REGION.""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + monkeypatch.setenv("AWS_REGION", "eu-central-1") + + with patch("agent.anthropic_adapter.build_anthropic_bedrock_client", + return_value=MagicMock()): + from agent.auxiliary_client import resolve_provider_client + client, _ = resolve_provider_client("bedrock", None) + + assert client is not None + assert "eu-central-1" in client.base_url + + def test_bedrock_respects_explicit_model(self, monkeypatch): + """When caller passes an explicit model, it should be used.""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + + with patch("agent.anthropic_adapter.build_anthropic_bedrock_client", + return_value=MagicMock()): + from agent.auxiliary_client import resolve_provider_client + _, model = resolve_provider_client( + "bedrock", "us.anthropic.claude-sonnet-4-5-20250929-v1:0" + ) + + assert "claude-sonnet" in model + + def test_bedrock_async_mode(self, monkeypatch): + """Async mode should return an AsyncAnthropicAuxiliaryClient.""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + + with patch("agent.anthropic_adapter.build_anthropic_bedrock_client", + return_value=MagicMock()): + from agent.auxiliary_client import resolve_provider_client, AsyncAnthropicAuxiliaryClient + client, model = resolve_provider_client("bedrock", None, async_mode=True) + + assert client is not None + assert isinstance(client, AsyncAnthropicAuxiliaryClient) + + def test_bedrock_default_model_is_haiku(self, monkeypatch): + """Default auxiliary model for Bedrock should be Haiku (fast, cheap).""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE") + monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY") + + with patch("agent.anthropic_adapter.build_anthropic_bedrock_client", + return_value=MagicMock()): + from agent.auxiliary_client import resolve_provider_client + _, model = resolve_provider_client("bedrock", None) + + assert "haiku" in model.lower() diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py index 52ad20a35..63c87fdab 100644 --- a/tests/agent/test_copilot_acp_client.py +++ b/tests/agent/test_copilot_acp_client.py @@ -144,3 +144,60 @@ class CopilotACPClientSafetyTests(unittest.TestCase): if __name__ == "__main__": unittest.main() + + +# ── HOME env propagation tests (from PR #11285) ───────────────────── + +from unittest.mock import patch as _patch +import pytest + + +def _make_home_client(tmp_path): + return CopilotACPClient( + api_key="copilot-acp", + base_url="acp://copilot", + acp_command="copilot", + acp_args=["--acp", "--stdio"], + acp_cwd=str(tmp_path), + ) + + +def _fake_popen_capture(captured): + def _fake(cmd, **kwargs): + captured["cmd"] = cmd + captured["kwargs"] = kwargs + raise FileNotFoundError("copilot not found") + return _fake + + +def test_run_prompt_prefers_profile_home_when_available(monkeypatch, tmp_path): + hermes_home = tmp_path / "hermes" + profile_home = hermes_home / "home" + profile_home.mkdir(parents=True) + + monkeypatch.delenv("HOME", raising=False) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + captured = {} + client = _make_home_client(tmp_path) + + with _patch("agent.copilot_acp_client.subprocess.Popen", side_effect=_fake_popen_capture(captured)): + with pytest.raises(RuntimeError, match="Could not start Copilot ACP command"): + client._run_prompt("hello", timeout_seconds=1) + + assert captured["kwargs"]["env"]["HOME"] == str(profile_home) + + +def test_run_prompt_passes_home_when_parent_env_is_clean(monkeypatch, tmp_path): + monkeypatch.delenv("HOME", raising=False) + monkeypatch.delenv("HERMES_HOME", raising=False) + + captured = {} + client = _make_home_client(tmp_path) + + with _patch("agent.copilot_acp_client.subprocess.Popen", side_effect=_fake_popen_capture(captured)): + with pytest.raises(RuntimeError, match="Could not start Copilot ACP command"): + client._run_prompt("hello", timeout_seconds=1) + + assert "env" in captured["kwargs"] + assert captured["kwargs"]["env"]["HOME"] diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 76e1412bf..7f3a835f1 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -1102,3 +1102,271 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch) assert not pool.has_credentials() assert pool.entries() == [] + + +def test_nous_seed_from_singletons_preserves_obtained_at_timestamps(tmp_path, monkeypatch): + """Regression test for #15099 secondary issue. + + When ``_seed_from_singletons`` materialises a device_code pool entry from + the ``providers.nous`` singleton, it must carry the mint/refresh + timestamps (``obtained_at``, ``agent_key_obtained_at``, ``expires_in``, + etc.) into the pool entry. Without them, freshness-sensitive consumers + (self-heal hooks, pool pruning by age) treat just-minted credentials as + older than they actually are and evict them. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": { + "nous": { + "access_token": "at_XXXXXXXX", + "refresh_token": "rt_YYYYYYYY", + "client_id": "hermes-cli", + "portal_base_url": "https://portal.nousresearch.com", + "inference_base_url": "https://inference.nousresearch.com/v1", + "token_type": "Bearer", + "scope": "openid profile", + "obtained_at": "2026-04-24T10:00:00+00:00", + "expires_at": "2026-04-24T11:00:00+00:00", + "expires_in": 3600, + "agent_key": "sk-nous-AAAA", + "agent_key_id": "ak_123", + "agent_key_expires_at": "2026-04-25T10:00:00+00:00", + "agent_key_expires_in": 86400, + "agent_key_reused": False, + "agent_key_obtained_at": "2026-04-24T10:00:05+00:00", + "tls": {"insecure": False, "ca_bundle": None}, + }, + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entries = pool.entries() + + device_entries = [e for e in entries if e.source == "device_code"] + assert len(device_entries) == 1, f"expected single device_code entry; got {len(device_entries)}" + e = device_entries[0] + + # Direct dataclass fields — must survive the singleton → pool copy. + assert e.access_token == "at_XXXXXXXX" + assert e.refresh_token == "rt_YYYYYYYY" + assert e.expires_at == "2026-04-24T11:00:00+00:00" + assert e.agent_key == "sk-nous-AAAA" + assert e.agent_key_expires_at == "2026-04-25T10:00:00+00:00" + + # Extra fields — this is what regressed. These must be carried through + # via ``extra`` dict or __getattr__, NOT silently dropped. + assert e.obtained_at == "2026-04-24T10:00:00+00:00", ( + f"obtained_at was dropped during seed; got {e.obtained_at!r}. This breaks " + f"downstream pool-freshness consumers (#15099)." + ) + assert e.agent_key_obtained_at == "2026-04-24T10:00:05+00:00" + assert e.expires_in == 3600 + assert e.agent_key_id == "ak_123" + assert e.agent_key_expires_in == 86400 + assert e.agent_key_reused is False + + +class TestLeastUsedStrategy: + """Regression: least_used strategy must increment request_count on select.""" + + def test_request_count_increments(self): + """Each select() call should increment the chosen entry's request_count.""" + from unittest.mock import patch as _patch + from agent.credential_pool import CredentialPool, PooledCredential, STRATEGY_LEAST_USED + + entries = [ + PooledCredential(provider="test", id="a", label="a", auth_type="api_key", + source="a", access_token="tok-a", priority=0, request_count=0), + PooledCredential(provider="test", id="b", label="b", auth_type="api_key", + source="b", access_token="tok-b", priority=1, request_count=0), + ] + with _patch("agent.credential_pool.get_pool_strategy", return_value=STRATEGY_LEAST_USED): + pool = CredentialPool("test", entries) + + # First select should pick entry with lowest count (both 0 → first) + e1 = pool.select() + assert e1 is not None + count_after_first = e1.request_count + assert count_after_first == 1, f"Expected 1 after first select, got {count_after_first}" + + # Second select should pick the OTHER entry (now has lower count) + e2 = pool.select() + assert e2 is not None + assert e2.id != e1.id or e2.request_count == 2, ( + "least_used should alternate or increment" + ) + + +# ── PR #10160 salvage: Nous OAuth cross-process sync tests ───────────────── + +def test_sync_nous_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypatch): + """When auth.json has a newer refresh token, the pool entry should adopt it.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-OLD", + "refresh_token": "refresh-OLD", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key-OLD", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + assert entry.refresh_token == "refresh-OLD" + + # Simulate another process refreshing the token in auth.json + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-NEW", + "refresh_token": "refresh-NEW", + "expires_at": "2026-03-24T12:30:00+00:00", + "agent_key": "agent-key-NEW", + "agent_key_expires_at": "2026-03-24T14:00:00+00:00", + } + }, + }, + ) + + synced = pool._sync_nous_entry_from_auth_store(entry) + assert synced is not entry + assert synced.access_token == "access-NEW" + assert synced.refresh_token == "refresh-NEW" + assert synced.agent_key == "agent-key-NEW" + assert synced.agent_key_expires_at == "2026-03-24T14:00:00+00:00" + +def test_sync_nous_entry_noop_when_tokens_match(tmp_path, monkeypatch): + """When auth.json has the same refresh token, sync should be a no-op.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + from agent.credential_pool import load_pool + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + + synced = pool._sync_nous_entry_from_auth_store(entry) + assert synced is entry + +def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch): + """An exhausted Nous entry should recover when auth.json has newer tokens.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) + from agent.credential_pool import load_pool, STATUS_EXHAUSTED + from dataclasses import replace as dc_replace + + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-OLD", + "refresh_token": "refresh-OLD", + "expires_at": "2026-03-24T12:00:00+00:00", + "agent_key": "agent-key", + "agent_key_expires_at": "2026-03-24T13:30:00+00:00", + } + }, + }, + ) + + pool = load_pool("nous") + entry = pool.select() + assert entry is not None + + # Mark entry as exhausted (simulating a failed refresh) + exhausted = dc_replace( + entry, + last_status=STATUS_EXHAUSTED, + last_status_at=time.time(), + last_error_code=401, + ) + pool._replace_entry(entry, exhausted) + pool._persist() + + # Simulate another process having successfully refreshed + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "nous", + "providers": { + "nous": { + "portal_base_url": "https://portal.example.com", + "inference_base_url": "https://inference.example.com/v1", + "client_id": "hermes-cli", + "token_type": "Bearer", + "scope": "inference:mint_agent_key", + "access_token": "access-FRESH", + "refresh_token": "refresh-FRESH", + "expires_at": "2026-03-24T12:30:00+00:00", + "agent_key": "agent-key-FRESH", + "agent_key_expires_at": "2026-03-24T14:00:00+00:00", + } + }, + }, + ) + + available = pool._available_entries(clear_expired=True) + assert len(available) == 1 + assert available[0].refresh_token == "refresh-FRESH" + assert available[0].last_status is None diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index c8faffb0c..e8a92774b 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -56,6 +56,7 @@ class TestFailoverReason: "overloaded", "server_error", "timeout", "context_overflow", "payload_too_large", "model_not_found", "format_error", + "provider_policy_blocked", "thinking_signature", "long_context_tier", "unknown", } actual = {r.value for r in FailoverReason} @@ -308,6 +309,59 @@ class TestClassifyApiError: assert result.retryable is True assert result.should_fallback is False + # ── Provider policy-block (OpenRouter privacy/guardrail) ── + + def test_404_openrouter_policy_blocked(self): + # Real OpenRouter error when the user's account privacy setting + # excludes the only endpoint serving a model (e.g. DeepSeek V4 Pro + # which is hosted only by DeepSeek, and their endpoint may log + # inputs). Must NOT classify as model_not_found — the model + # exists, falling back won't help (same account setting applies), + # and the error body already tells the user where to fix it. + e = MockAPIError( + "No endpoints available matching your guardrail restrictions " + "and data policy. Configure: https://openrouter.ai/settings/privacy", + status_code=404, + ) + result = classify_api_error(e) + assert result.reason == FailoverReason.provider_policy_blocked + assert result.retryable is False + assert result.should_fallback is False + + def test_400_openrouter_policy_blocked(self): + # Defense-in-depth: if OpenRouter ever returns this as 400 instead + # of 404, still classify it distinctly rather than as format_error + # or model_not_found. + e = MockAPIError( + "No endpoints available matching your data policy", + status_code=400, + ) + result = classify_api_error(e) + assert result.reason == FailoverReason.provider_policy_blocked + assert result.retryable is False + assert result.should_fallback is False + + def test_message_only_openrouter_policy_blocked(self): + # No status code — classifier should still catch the fingerprint + # via the message-pattern fallback. + e = Exception( + "No endpoints available matching your guardrail restrictions " + "and data policy" + ) + result = classify_api_error(e) + assert result.reason == FailoverReason.provider_policy_blocked + + def test_404_model_not_found_still_works(self): + # Regression guard: the new policy-block check must not swallow + # genuine model_not_found 404s. + e = MockAPIError( + "openrouter/nonexistent-model is not a valid model ID", + status_code=404, + ) + result = classify_api_error(e) + assert result.reason == FailoverReason.model_not_found + assert result.should_fallback is True + # ── Payload too large ── def test_413_payload_too_large(self): @@ -1040,3 +1094,37 @@ class TestSSLTransientPatterns: result = classify_api_error(e) assert result.reason == FailoverReason.timeout assert result.retryable is True + +# ── Test: RateLimitError without status_code (Copilot/GitHub Models) ────────── + +class TestRateLimitErrorWithoutStatusCode: + """Regression tests for the Copilot/GitHub Models edge case where the + OpenAI SDK raises RateLimitError but does not populate .status_code.""" + + def _make_rate_limit_error(self, status_code=None): + """Create an exception whose class name is 'RateLimitError' with + an optionally missing status_code, mirroring the OpenAI SDK shape.""" + cls = type("RateLimitError", (Exception,), {}) + e = cls("You have exceeded your rate limit.") + e.status_code = status_code # None simulates the Copilot case + return e + + def test_rate_limit_error_without_status_code_classified_as_rate_limit(self): + """RateLimitError with status_code=None must classify as rate_limit.""" + e = self._make_rate_limit_error(status_code=None) + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason == FailoverReason.rate_limit + + def test_rate_limit_error_with_status_code_429_classified_as_rate_limit(self): + """RateLimitError that does set status_code=429 still classifies correctly.""" + e = self._make_rate_limit_error(status_code=429) + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason == FailoverReason.rate_limit + + def test_other_error_without_status_code_not_forced_to_rate_limit(self): + """A non-RateLimitError with missing status_code must NOT be forced to 429.""" + cls = type("APIError", (Exception,), {}) + e = cls("something went wrong") + e.status_code = None + result = classify_api_error(e, provider="copilot", model="gpt-4o") + assert result.reason != FailoverReason.rate_limit diff --git a/tests/agent/test_gemini_free_tier_gate.py b/tests/agent/test_gemini_free_tier_gate.py new file mode 100644 index 000000000..bbd74389f --- /dev/null +++ b/tests/agent/test_gemini_free_tier_gate.py @@ -0,0 +1,166 @@ +"""Tests for Gemini free-tier detection and blocking.""" +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from agent.gemini_native_adapter import ( + gemini_http_error, + is_free_tier_quota_error, + probe_gemini_tier, +) + + +def _mock_response(status: int, headers: dict | None = None, text: str = "") -> MagicMock: + resp = MagicMock() + resp.status_code = status + resp.headers = headers or {} + resp.text = text + return resp + + +def _run_probe(resp: MagicMock) -> str: + with patch("agent.gemini_native_adapter.httpx.Client") as MC: + inst = MagicMock() + inst.post.return_value = resp + MC.return_value.__enter__.return_value = inst + return probe_gemini_tier("fake-key") + + +class TestProbeGeminiTier: + """Verify the tier probe classifies keys correctly.""" + + def test_free_tier_via_rpd_header_flash(self): + # gemini-2.5-flash free tier: 250 RPD + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "250"}, "{}") + assert _run_probe(resp) == "free" + + def test_free_tier_via_rpd_header_pro(self): + # gemini-2.5-pro free tier: 100 RPD + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "100"}, "{}") + assert _run_probe(resp) == "free" + + def test_free_tier_via_rpd_header_flash_lite(self): + # flash-lite free tier: 1000 RPD (our upper bound) + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "1000"}, "{}") + assert _run_probe(resp) == "free" + + def test_paid_tier_via_rpd_header(self): + # Tier 1 starts at 1500+ RPD + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "1500"}, "{}") + assert _run_probe(resp) == "paid" + + def test_free_tier_via_429_body(self): + body = ( + '{"error":{"code":429,"message":"Quota exceeded for metric: ' + 'generativelanguage.googleapis.com/generate_content_free_tier_requests, ' + 'limit: 20"}}' + ) + resp = _mock_response(429, {}, body) + assert _run_probe(resp) == "free" + + def test_paid_429_has_no_free_tier_marker(self): + body = '{"error":{"code":429,"message":"rate limited"}}' + resp = _mock_response(429, {}, body) + assert _run_probe(resp) == "paid" + + def test_successful_200_without_rpd_header_is_paid(self): + resp = _mock_response(200, {}, '{"candidates":[]}') + assert _run_probe(resp) == "paid" + + def test_401_returns_unknown(self): + resp = _mock_response(401, {}, '{"error":{"code":401}}') + assert _run_probe(resp) == "unknown" + + def test_404_returns_unknown(self): + resp = _mock_response(404, {}, '{"error":{"code":404}}') + assert _run_probe(resp) == "unknown" + + def test_network_error_returns_unknown(self): + with patch( + "agent.gemini_native_adapter.httpx.Client", + side_effect=Exception("dns failure"), + ): + assert probe_gemini_tier("fake-key") == "unknown" + + def test_empty_key_returns_unknown(self): + assert probe_gemini_tier("") == "unknown" + assert probe_gemini_tier(" ") == "unknown" + assert probe_gemini_tier(None) == "unknown" # type: ignore[arg-type] + + def test_malformed_rpd_header_falls_through(self): + # Non-integer header value shouldn't crash; 200 with no usable header -> paid. + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "abc"}, "{}") + assert _run_probe(resp) == "paid" + + def test_openai_compat_suffix_stripped(self): + """Base URLs ending in /openai get normalized to the native endpoint.""" + resp = _mock_response(200, {"x-ratelimit-limit-requests-per-day": "1500"}, "{}") + with patch("agent.gemini_native_adapter.httpx.Client") as MC: + inst = MagicMock() + inst.post.return_value = resp + MC.return_value.__enter__.return_value = inst + probe_gemini_tier( + "fake", + "https://generativelanguage.googleapis.com/v1beta/openai", + ) + # Verify the post URL does NOT contain /openai + called_url = inst.post.call_args[0][0] + assert "/openai/" not in called_url + assert called_url.endswith(":generateContent") + + +class TestIsFreeTierQuotaError: + def test_detects_free_tier_marker(self): + assert is_free_tier_quota_error( + "Quota exceeded for metric: generate_content_free_tier_requests" + ) + + def test_case_insensitive(self): + assert is_free_tier_quota_error("QUOTA: FREE_TIER_REQUESTS") + + def test_no_free_tier_marker(self): + assert not is_free_tier_quota_error("rate limited") + + def test_empty_string(self): + assert not is_free_tier_quota_error("") + + def test_none(self): + assert not is_free_tier_quota_error(None) # type: ignore[arg-type] + + +class TestGeminiHttpErrorFreeTierGuidance: + """gemini_http_error should append free-tier guidance for free-tier 429s.""" + + class _FakeResp: + def __init__(self, status: int, text: str): + self.status_code = status + self.headers: dict = {} + self.text = text + + def test_free_tier_429_appends_guidance(self): + body = ( + '{"error":{"code":429,"message":"Quota exceeded for metric: ' + "generativelanguage.googleapis.com/generate_content_free_tier_requests, " + 'limit: 20","status":"RESOURCE_EXHAUSTED"}}' + ) + err = gemini_http_error(self._FakeResp(429, body)) + msg = str(err) + assert "free tier" in msg.lower() + assert "aistudio.google.com/apikey" in msg + + def test_paid_429_has_no_billing_url(self): + body = '{"error":{"code":429,"message":"Rate limited","status":"RESOURCE_EXHAUSTED"}}' + err = gemini_http_error(self._FakeResp(429, body)) + assert "aistudio.google.com/apikey" not in str(err) + + def test_non_429_has_no_billing_url(self): + body = '{"error":{"code":400,"message":"bad request","status":"INVALID_ARGUMENT"}}' + err = gemini_http_error(self._FakeResp(400, body)) + assert "aistudio.google.com/apikey" not in str(err) + + def test_401_has_no_billing_url(self): + body = '{"error":{"code":401,"message":"API key invalid","status":"UNAUTHENTICATED"}}' + err = gemini_http_error(self._FakeResp(401, body)) + assert "aistudio.google.com/apikey" not in str(err) diff --git a/tests/agent/test_gemini_native_adapter.py b/tests/agent/test_gemini_native_adapter.py index a36b1e71c..4b066b4f4 100644 --- a/tests/agent/test_gemini_native_adapter.py +++ b/tests/agent/test_gemini_native_adapter.py @@ -234,6 +234,19 @@ def test_native_client_accepts_injected_http_client(): assert client._http is injected +def test_native_client_rejects_empty_api_key_with_actionable_message(): + """Empty/whitespace api_key must raise at construction, not produce a cryptic + Google GFE 'Error 400 (Bad Request)!!1' HTML page on the first request.""" + from agent.gemini_native_adapter import GeminiNativeClient + + for bad in ("", " ", None): + with pytest.raises(RuntimeError) as excinfo: + GeminiNativeClient(api_key=bad) # type: ignore[arg-type] + msg = str(excinfo.value) + assert "GOOGLE_API_KEY" in msg and "GEMINI_API_KEY" in msg + assert "aistudio.google.com" in msg + + @pytest.mark.asyncio async def test_async_native_client_streams_without_requiring_async_iterator_from_sync_client(): from agent.gemini_native_adapter import AsyncGeminiNativeClient diff --git a/tests/agent/test_gemini_schema.py b/tests/agent/test_gemini_schema.py new file mode 100644 index 000000000..069c99a21 --- /dev/null +++ b/tests/agent/test_gemini_schema.py @@ -0,0 +1,140 @@ +"""Tests for agent.gemini_schema — OpenAI→Gemini tool parameter translation.""" + +from agent.gemini_schema import ( + sanitize_gemini_schema, + sanitize_gemini_tool_parameters, +) + + +class TestSanitizeGeminiSchema: + def test_strips_unknown_top_level_keys(self): + """$schema / additionalProperties etc. must not reach Gemini.""" + schema = { + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "additionalProperties": False, + "properties": {"foo": {"type": "string"}}, + } + cleaned = sanitize_gemini_schema(schema) + assert "$schema" not in cleaned + assert "additionalProperties" not in cleaned + assert cleaned["type"] == "object" + assert cleaned["properties"] == {"foo": {"type": "string"}} + + def test_preserves_string_enums(self): + """String-valued enums are valid for Gemini and must pass through.""" + schema = {"type": "string", "enum": ["pending", "done", "cancelled"]} + cleaned = sanitize_gemini_schema(schema) + assert cleaned["type"] == "string" + assert cleaned["enum"] == ["pending", "done", "cancelled"] + + def test_drops_integer_enum_to_satisfy_gemini(self): + """Gemini rejects int-typed enums; the sanitizer must drop the enum. + + Regression for the Discord tool's ``auto_archive_duration``: + ``{type: integer, enum: [60, 1440, 4320, 10080]}`` caused + Gemini HTTP 400 INVALID_ARGUMENT + "Invalid value ... (TYPE_STRING), 60" on every request that + shipped the full tool catalog to generativelanguage.googleapis.com. + """ + schema = { + "type": "integer", + "enum": [60, 1440, 4320, 10080], + "description": "Minutes (60, 1440, 4320, 10080).", + } + cleaned = sanitize_gemini_schema(schema) + assert cleaned["type"] == "integer" + assert "enum" not in cleaned + # description must survive so the model still sees the allowed values + assert cleaned["description"].startswith("Minutes") + + def test_drops_number_enum(self): + """Same rule applies to ``type: number``.""" + schema = {"type": "number", "enum": [0.5, 1.0, 2.0]} + cleaned = sanitize_gemini_schema(schema) + assert cleaned["type"] == "number" + assert "enum" not in cleaned + + def test_drops_boolean_enum(self): + """And to ``type: boolean`` (Gemini rejects non-string entries).""" + schema = {"type": "boolean", "enum": [True, False]} + cleaned = sanitize_gemini_schema(schema) + assert cleaned["type"] == "boolean" + assert "enum" not in cleaned + + def test_keeps_string_enum_even_when_numeric_values_coexist_as_strings(self): + """Stringified-numeric enums ARE valid for Gemini; don't drop them.""" + schema = {"type": "string", "enum": ["60", "1440", "4320", "10080"]} + cleaned = sanitize_gemini_schema(schema) + assert cleaned["enum"] == ["60", "1440", "4320", "10080"] + + def test_drops_nested_integer_enum_inside_properties(self): + """The fix must apply recursively — the Discord case is nested.""" + schema = { + "type": "object", + "properties": { + "auto_archive_duration": { + "type": "integer", + "enum": [60, 1440, 4320, 10080], + "description": "Thread archive duration in minutes.", + }, + "status": { + "type": "string", + "enum": ["active", "archived"], + }, + }, + } + cleaned = sanitize_gemini_schema(schema) + props = cleaned["properties"] + # Integer enum is dropped... + assert props["auto_archive_duration"]["type"] == "integer" + assert "enum" not in props["auto_archive_duration"] + # ...but the sibling string enum is preserved. + assert props["status"]["enum"] == ["active", "archived"] + + def test_drops_integer_enum_inside_array_items(self): + """Array item schemas recurse through ``items``.""" + schema = { + "type": "array", + "items": {"type": "integer", "enum": [1, 2, 3]}, + } + cleaned = sanitize_gemini_schema(schema) + assert cleaned["items"]["type"] == "integer" + assert "enum" not in cleaned["items"] + + def test_non_dict_input_returns_empty(self): + assert sanitize_gemini_schema(None) == {} + assert sanitize_gemini_schema("not a schema") == {} + assert sanitize_gemini_schema([1, 2, 3]) == {} + + +class TestSanitizeGeminiToolParameters: + def test_empty_parameters_return_valid_object_schema(self): + """Gemini requires ``parameters`` to be a valid object schema.""" + cleaned = sanitize_gemini_tool_parameters({}) + assert cleaned == {"type": "object", "properties": {}} + + def test_discord_create_thread_parameters_no_longer_trip_gemini(self): + """End-to-end regression: the exact shape that was rejected in prod.""" + params = { + "type": "object", + "properties": { + "action": {"type": "string", "enum": ["create_thread"]}, + "auto_archive_duration": { + "type": "integer", + "enum": [60, 1440, 4320, 10080], + "description": "Thread archive duration in minutes " + "(create_thread, default 1440).", + }, + }, + "required": ["action"], + } + cleaned = sanitize_gemini_tool_parameters(params) + aad = cleaned["properties"]["auto_archive_duration"] + # The field that triggered the Gemini 400 is gone. + assert "enum" not in aad + # Type + description survive so the model still knows what to send. + assert aad["type"] == "integer" + assert "1440" in aad["description"] + # And the string-enum sibling is untouched. + assert cleaned["properties"]["action"]["enum"] == ["create_thread"] diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py index 5cd0d8ab4..ca39da70f 100644 --- a/tests/agent/test_memory_provider.py +++ b/tests/agent/test_memory_provider.py @@ -77,6 +77,13 @@ class FakeMemoryProvider(MemoryProvider): self.memory_writes.append((action, target, content)) +class MetadataMemoryProvider(FakeMemoryProvider): + """Provider that opts into write metadata.""" + + def on_memory_write(self, action, target, content, metadata=None): + self.memory_writes.append((action, target, content, metadata or {})) + + # --------------------------------------------------------------------------- # MemoryProvider ABC tests # --------------------------------------------------------------------------- @@ -862,6 +869,51 @@ class TestOnMemoryWriteBridge: mgr.on_memory_write("add", "memory", "new fact") assert p.memory_writes == [("add", "memory", "new fact")] + def test_on_memory_write_metadata_passed_to_opt_in_provider(self): + """Providers that accept metadata receive structured write provenance.""" + mgr = MemoryManager() + p = MetadataMemoryProvider("ext") + mgr.add_provider(p) + + mgr.on_memory_write( + "add", + "memory", + "new fact", + metadata={ + "write_origin": "assistant_tool", + "execution_context": "foreground", + "session_id": "sess-1", + }, + ) + + assert p.memory_writes == [ + ( + "add", + "memory", + "new fact", + { + "write_origin": "assistant_tool", + "execution_context": "foreground", + "session_id": "sess-1", + }, + ) + ] + + def test_on_memory_write_metadata_keeps_legacy_provider_compatible(self): + """Old 3-arg providers keep working when the manager receives metadata.""" + mgr = MemoryManager() + p = FakeMemoryProvider("ext") + mgr.add_provider(p) + + mgr.on_memory_write( + "add", + "user", + "legacy provider fact", + metadata={"write_origin": "assistant_tool"}, + ) + + assert p.memory_writes == [("add", "user", "legacy provider fact")] + def test_on_memory_write_replace(self): """on_memory_write fires for 'replace' actions.""" mgr = MemoryManager() diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 4356b61c5..9ae865d57 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -341,6 +341,7 @@ class TestMinimaxSwitchModelCredentialGuard: agent._client_kwargs = {} agent.client = None agent._anthropic_client = MagicMock() + agent._fallback_chain = [] with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \ diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 8c5261f48..fc4ed0bf5 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -200,6 +200,218 @@ class TestDefaultContextLengths: assert len(DEFAULT_CONTEXT_LENGTHS) >= 10 +# ========================================================================= +# Codex OAuth context-window resolution (provider="openai-codex") +# ========================================================================= + +class TestCodexOAuthContextLength: + """ChatGPT Codex OAuth imposes lower context limits than the direct + OpenAI API for the same slugs. Verified Apr 2026 via live probe of + chatgpt.com/backend-api/codex/models: every model returns 272k, while + models.dev reports 1.05M for gpt-5.5/gpt-5.4 and 400k for the rest. + """ + + def setup_method(self): + import agent.model_metadata as mm + mm._codex_oauth_context_cache = {} + mm._codex_oauth_context_cache_time = 0.0 + + def test_fallback_table_used_without_token(self): + """With no access token, the hardcoded Codex fallback table wins + over models.dev (which reports 1.05M for gpt-5.5 but Codex is 272k). + """ + from agent.model_metadata import get_model_context_length + + with patch("agent.model_metadata.get_cached_context_length", return_value=None), \ + patch("agent.model_metadata.save_context_length"): + for model in ( + "gpt-5.5", + "gpt-5.4", + "gpt-5.4-mini", + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-5.1-codex-max", + "gpt-5.1-codex-mini", + ): + ctx = get_model_context_length( + model=model, + base_url="https://chatgpt.com/backend-api/codex", + api_key="", + provider="openai-codex", + ) + assert ctx == 272_000, ( + f"Codex {model}: expected 272000 fallback, got {ctx} " + "(models.dev leakage?)" + ) + + def test_live_probe_overrides_fallback(self): + """When a token is provided, the live /models probe is preferred + and its context_window drives the result.""" + from agent.model_metadata import get_model_context_length + + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "models": [ + {"slug": "gpt-5.5", "context_window": 300_000}, + {"slug": "gpt-5.4", "context_window": 400_000}, + ] + } + + with patch("agent.model_metadata.requests.get", return_value=fake_response), \ + patch("agent.model_metadata.get_cached_context_length", return_value=None), \ + patch("agent.model_metadata.save_context_length"): + ctx_55 = get_model_context_length( + model="gpt-5.5", + base_url="https://chatgpt.com/backend-api/codex", + api_key="fake-token", + provider="openai-codex", + ) + ctx_54 = get_model_context_length( + model="gpt-5.4", + base_url="https://chatgpt.com/backend-api/codex", + api_key="fake-token", + provider="openai-codex", + ) + assert ctx_55 == 300_000 + assert ctx_54 == 400_000 + + def test_probe_failure_falls_back_to_hardcoded(self): + """If the probe fails (non-200 / network error), we still return + the hardcoded 272k rather than leaking through to models.dev 1.05M.""" + from agent.model_metadata import get_model_context_length + + fake_response = MagicMock() + fake_response.status_code = 401 + fake_response.json.return_value = {} + + with patch("agent.model_metadata.requests.get", return_value=fake_response), \ + patch("agent.model_metadata.get_cached_context_length", return_value=None), \ + patch("agent.model_metadata.save_context_length"): + ctx = get_model_context_length( + model="gpt-5.5", + base_url="https://chatgpt.com/backend-api/codex", + api_key="expired-token", + provider="openai-codex", + ) + assert ctx == 272_000 + + def test_non_codex_providers_unaffected(self): + """Resolving gpt-5.5 on non-Codex providers must NOT use the Codex + 272k override — OpenRouter / direct OpenAI API have different limits. + """ + from agent.model_metadata import get_model_context_length + + # OpenRouter — should hit its own catalog path first; when mocked + # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k). + with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ + patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ + patch("agent.model_metadata.get_cached_context_length", return_value=None), \ + patch("agent.models_dev.lookup_models_dev_context", return_value=None): + ctx = get_model_context_length( + model="openai/gpt-5.5", + base_url="https://openrouter.ai/api/v1", + api_key="", + provider="openrouter", + ) + assert ctx == 400_000, ( + f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override " + "leaked outside openai-codex provider" + ) + + def test_stale_codex_cache_over_400k_is_invalidated(self, tmp_path, monkeypatch): + """Pre-PR #14935 builds cached gpt-5.5 at 1.05M (from models.dev) + before the Codex-aware branch existed. Upgrading users keep that + stale entry on disk and the cache-first lookup returns it forever. + Codex OAuth caps at 272k for every slug, so any cached Codex + entry >= 400k must be dropped and re-resolved via the live probe. + """ + from agent import model_metadata as mm + + # Isolate the cache file to tmp_path + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://chatgpt.com/backend-api/codex/" + stale_key = f"gpt-5.5@{base_url}" + other_key = "other-model@https://api.openai.com/v1/" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + stale_key: 1_050_000, # stale pre-fix value + other_key: 128_000, # unrelated, must survive + }})) + + fake_response = MagicMock() + fake_response.status_code = 200 + fake_response.json.return_value = { + "models": [{"slug": "gpt-5.5", "context_window": 272_000}] + } + + with patch("agent.model_metadata.requests.get", return_value=fake_response), \ + patch("agent.model_metadata.save_context_length") as mock_save: + ctx = mm.get_model_context_length( + model="gpt-5.5", + base_url=base_url, + api_key="fake-token", + provider="openai-codex", + ) + + assert ctx == 272_000, f"Stale entry should have been re-resolved to 272k, got {ctx}" + # Live save was called with the fresh value + mock_save.assert_called_with("gpt-5.5", base_url, 272_000) + # The stale entry was removed from disk; unrelated entries survived + remaining = _yaml.safe_load(cache_file.read_text()).get("context_lengths", {}) + assert stale_key not in remaining, "Stale entry was not invalidated from the cache file" + assert remaining.get(other_key) == 128_000, "Unrelated cache entries must not be touched" + + def test_fresh_codex_cache_under_400k_is_respected(self, tmp_path, monkeypatch): + """Codex entries at the correct 272k must NOT be invalidated — + only stale pre-fix values (>= 400k) get dropped.""" + from agent import model_metadata as mm + + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://chatgpt.com/backend-api/codex/" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + f"gpt-5.5@{base_url}": 272_000, + }})) + + # If the invalidation incorrectly fired, this would be called; assert it isn't. + with patch("agent.model_metadata.requests.get") as mock_get: + ctx = mm.get_model_context_length( + model="gpt-5.5", + base_url=base_url, + api_key="fake-token", + provider="openai-codex", + ) + assert ctx == 272_000 + mock_get.assert_not_called() + + def test_stale_invalidation_scoped_to_codex_provider(self, tmp_path, monkeypatch): + """A cached 1M entry for a non-Codex provider (e.g. Anthropic opus on + OpenRouter, legitimately 1M) must NOT be invalidated by this guard.""" + from agent import model_metadata as mm + + cache_file = tmp_path / "context_length_cache.yaml" + monkeypatch.setattr(mm, "_get_context_cache_path", lambda: cache_file) + + base_url = "https://openrouter.ai/api/v1" + import yaml as _yaml + cache_file.write_text(_yaml.dump({"context_lengths": { + f"anthropic/claude-opus-4.6@{base_url}": 1_000_000, + }})) + + ctx = mm.get_model_context_length( + model="anthropic/claude-opus-4.6", + base_url=base_url, + api_key="fake", + provider="openrouter", + ) + assert ctx == 1_000_000, "Non-codex 1M cache entries must be respected" + + # ========================================================================= # get_model_context_length — resolution order # ========================================================================= @@ -376,6 +588,57 @@ class TestGetModelContextLength: assert result == 200000 +# ========================================================================= +# Bedrock context resolution — must run BEFORE custom-endpoint probe +# ========================================================================= + +class TestBedrockContextResolution: + """Regression tests for Bedrock context-length resolution order. + + Bug: because ``bedrock-runtime..amazonaws.com`` is not listed in + ``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and + the custom-endpoint probe at step 2 ran first — fetching ``/models`` from + Bedrock (which it doesn't serve), returning the 128K default-fallback + before execution ever reached the Bedrock branch. + + Fix: promote the Bedrock branch ahead of the custom-endpoint probe. + """ + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch): + """provider='bedrock' resolves via static table, bypasses /models probe.""" + ctx = get_model_context_length( + "anthropic.claude-opus-4-v1:0", + provider="bedrock", + base_url="https://bedrock-runtime.us-east-1.amazonaws.com", + ) + # Must return the static Bedrock table value (200K for Claude), + # NOT DEFAULT_FALLBACK_CONTEXT (128K). + assert ctx == 200000 + mock_fetch.assert_not_called() + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_bedrock_url_without_provider_hint(self, mock_fetch): + """bedrock-runtime host infers Bedrock even when provider is omitted.""" + ctx = get_model_context_length( + "anthropic.claude-sonnet-4-v1:0", + base_url="https://bedrock-runtime.us-west-2.amazonaws.com", + ) + assert ctx == 200000 + mock_fetch.assert_not_called() + + @patch("agent.model_metadata.fetch_endpoint_model_metadata") + def test_non_bedrock_url_still_probes(self, mock_fetch): + """Non-Bedrock hosts still reach the custom-endpoint probe.""" + mock_fetch.return_value = {"some-model": {"context_length": 50000}} + ctx = get_model_context_length( + "some-model", + base_url="https://api.example.com/v1", + ) + assert ctx == 50000 + assert mock_fetch.called + + # ========================================================================= # _strip_provider_prefix — Ollama model:tag vs provider:model # ========================================================================= diff --git a/tests/agent/test_model_metadata_ssl.py b/tests/agent/test_model_metadata_ssl.py new file mode 100644 index 000000000..6859fd309 --- /dev/null +++ b/tests/agent/test_model_metadata_ssl.py @@ -0,0 +1,90 @@ +"""Tests for _resolve_requests_verify() env var precedence. + +Verifies that custom provider `/models` fetches honour the three supported +CA bundle env vars (HERMES_CA_BUNDLE, REQUESTS_CA_BUNDLE, SSL_CERT_FILE) +in the documented priority order, and that non-existent paths are +skipped gracefully rather than breaking the request. + +No filesystem or network I/O required — we use tmp_path to create real +CA bundle stand-in files and monkeypatch env vars. +""" + +import os +import sys +from pathlib import Path + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest + +from agent.model_metadata import _resolve_requests_verify + + +_CA_ENV_VARS = ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE") + + +@pytest.fixture +def clean_env(monkeypatch): + """Clear all three SSL env vars so each test starts from a known state.""" + for var in _CA_ENV_VARS: + monkeypatch.delenv(var, raising=False) + return monkeypatch + + +@pytest.fixture +def bundle_file(tmp_path: Path) -> str: + """Create a placeholder CA bundle file and return its absolute path.""" + path = tmp_path / "ca.pem" + path.write_text("-----BEGIN CERTIFICATE-----\nstub\n-----END CERTIFICATE-----\n") + return str(path) + + +class TestResolveRequestsVerify: + def test_no_env_returns_true(self, clean_env): + assert _resolve_requests_verify() is True + + def test_hermes_ca_bundle_returns_path(self, clean_env, bundle_file): + clean_env.setenv("HERMES_CA_BUNDLE", bundle_file) + assert _resolve_requests_verify() == bundle_file + + def test_requests_ca_bundle_returns_path(self, clean_env, bundle_file): + clean_env.setenv("REQUESTS_CA_BUNDLE", bundle_file) + assert _resolve_requests_verify() == bundle_file + + def test_ssl_cert_file_returns_path(self, clean_env, bundle_file): + clean_env.setenv("SSL_CERT_FILE", bundle_file) + assert _resolve_requests_verify() == bundle_file + + def test_priority_hermes_over_requests(self, clean_env, tmp_path, bundle_file): + other = tmp_path / "other.pem" + other.write_text("stub") + clean_env.setenv("HERMES_CA_BUNDLE", bundle_file) + clean_env.setenv("REQUESTS_CA_BUNDLE", str(other)) + assert _resolve_requests_verify() == bundle_file + + def test_priority_requests_over_ssl_cert_file(self, clean_env, tmp_path, bundle_file): + other = tmp_path / "other.pem" + other.write_text("stub") + clean_env.setenv("REQUESTS_CA_BUNDLE", bundle_file) + clean_env.setenv("SSL_CERT_FILE", str(other)) + assert _resolve_requests_verify() == bundle_file + + def test_nonexistent_path_falls_through(self, clean_env, tmp_path, bundle_file): + missing = tmp_path / "does_not_exist.pem" + clean_env.setenv("HERMES_CA_BUNDLE", str(missing)) + clean_env.setenv("REQUESTS_CA_BUNDLE", bundle_file) + assert _resolve_requests_verify() == bundle_file + + def test_all_nonexistent_returns_true(self, clean_env, tmp_path): + missing1 = tmp_path / "a.pem" + missing2 = tmp_path / "b.pem" + missing3 = tmp_path / "c.pem" + clean_env.setenv("HERMES_CA_BUNDLE", str(missing1)) + clean_env.setenv("REQUESTS_CA_BUNDLE", str(missing2)) + clean_env.setenv("SSL_CERT_FILE", str(missing3)) + assert _resolve_requests_verify() is True + + def test_empty_string_env_var_ignored(self, clean_env, bundle_file): + clean_env.setenv("HERMES_CA_BUNDLE", "") + clean_env.setenv("REQUESTS_CA_BUNDLE", bundle_file) + assert _resolve_requests_verify() == bundle_file diff --git a/tests/agent/test_moonshot_schema.py b/tests/agent/test_moonshot_schema.py new file mode 100644 index 000000000..da5380658 --- /dev/null +++ b/tests/agent/test_moonshot_schema.py @@ -0,0 +1,254 @@ +"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer. + +Moonshot's tool-parameter validator rejects several shapes that the rest of +the JSON Schema ecosystem accepts: + +1. Properties without ``type`` — Moonshot requires ``type`` on every node. +2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside + ``anyOf`` children. + +These tests cover the repairs applied by ``agent/moonshot_schema.py``. +""" + +from __future__ import annotations + +import pytest + +from agent.moonshot_schema import ( + is_moonshot_model, + sanitize_moonshot_tool_parameters, + sanitize_moonshot_tools, +) + + +class TestMoonshotModelDetection: + """is_moonshot_model() must match across aggregator prefixes.""" + + @pytest.mark.parametrize( + "model", + [ + "kimi-k2.6", + "kimi-k2-thinking", + "moonshotai/Kimi-K2.6", + "moonshotai/kimi-k2.6", + "nous/moonshotai/kimi-k2.6", + "openrouter/moonshotai/kimi-k2-thinking", + "MOONSHOTAI/KIMI-K2.6", + ], + ) + def test_positive_matches(self, model): + assert is_moonshot_model(model) is True + + @pytest.mark.parametrize( + "model", + [ + "", + None, + "anthropic/claude-sonnet-4.6", + "openai/gpt-5.4", + "google/gemini-3-flash-preview", + "deepseek-chat", + ], + ) + def test_negative_matches(self, model): + assert is_moonshot_model(model) is False + + +class TestMissingTypeFilled: + """Rule 1: every property must carry a type.""" + + def test_property_without_type_gets_string(self): + params = { + "type": "object", + "properties": {"query": {"description": "a bare property"}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["query"]["type"] == "string" + + def test_property_with_enum_infers_type_from_first_value(self): + params = { + "type": "object", + "properties": {"flag": {"enum": [True, False]}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["flag"]["type"] == "boolean" + + def test_nested_properties_are_repaired(self): + params = { + "type": "object", + "properties": { + "filter": { + "type": "object", + "properties": { + "field": {"description": "no type"}, + }, + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["filter"]["properties"]["field"]["type"] == "string" + + def test_array_items_without_type_get_repaired(self): + params = { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": {"description": "tag entry"}, + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["tags"]["items"]["type"] == "string" + + def test_ref_node_is_not_given_synthetic_type(self): + """$ref nodes should NOT get a synthetic type — the referenced + definition supplies it, and Moonshot would reject the conflict.""" + params = { + "type": "object", + "properties": {"payload": {"$ref": "#/$defs/Payload"}}, + "$defs": {"Payload": {"type": "object", "properties": {}}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert "type" not in out["properties"]["payload"] + assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload" + + +class TestAnyOfParentType: + """Rule 2: type must not appear at the anyOf parent level.""" + + def test_parent_type_stripped_when_anyof_present(self): + params = { + "type": "object", + "properties": { + "from_format": { + "type": "string", + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + from_format = out["properties"]["from_format"] + assert "type" not in from_format + assert "anyOf" in from_format + + def test_anyof_children_missing_type_get_filled(self): + params = { + "type": "object", + "properties": { + "value": { + "anyOf": [ + {"type": "string"}, + {"description": "A typeless option"}, + ], + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + children = out["properties"]["value"]["anyOf"] + assert children[0]["type"] == "string" + assert "type" in children[1] + + +class TestTopLevelGuarantees: + """The returned top-level schema is always a well-formed object.""" + + def test_non_dict_input_returns_empty_object(self): + assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}} + assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}} + assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}} + + def test_non_object_top_level_coerced(self): + params = {"type": "string"} + out = sanitize_moonshot_tool_parameters(params) + assert out["type"] == "object" + assert "properties" in out + + def test_does_not_mutate_input(self): + params = { + "type": "object", + "properties": {"q": {"description": "no type"}}, + } + snapshot = { + "type": params["type"], + "properties": {"q": dict(params["properties"]["q"])}, + } + sanitize_moonshot_tool_parameters(params) + assert params["type"] == snapshot["type"] + assert "type" not in params["properties"]["q"] + + +class TestToolListSanitizer: + """sanitize_moonshot_tools() walks an OpenAI-format tool list.""" + + def test_applies_per_tool(self): + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": { + "type": "object", + "properties": {"q": {"description": "query"}}, + }, + }, + }, + { + "type": "function", + "function": { + "name": "noop", + "description": "Does nothing", + "parameters": {"type": "object", "properties": {}}, + }, + }, + ] + out = sanitize_moonshot_tools(tools) + assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string" + # Second tool already clean — should be structurally equivalent + assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}} + + def test_empty_list_is_passthrough(self): + assert sanitize_moonshot_tools([]) == [] + assert sanitize_moonshot_tools(None) is None + + def test_skips_malformed_entries(self): + """Entries without a function dict are passed through untouched.""" + tools = [{"type": "function"}, {"not": "a tool"}] + out = sanitize_moonshot_tools(tools) + assert out == tools + + +class TestRealWorldMCPShape: + """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot.""" + + def test_combined_rewrites(self): + # Shape: missing type on a property, anyOf with parent type, array + # items without type — all in one tool. + params = { + "type": "object", + "properties": { + "query": {"description": "search text"}, + "filter": { + "type": "string", + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + }, + "tags": { + "type": "array", + "items": {"description": "tag"}, + }, + }, + "required": ["query"], + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["query"]["type"] == "string" + assert "type" not in out["properties"]["filter"] + assert out["properties"]["filter"]["anyOf"][0]["type"] == "string" + assert out["properties"]["tags"]["items"]["type"] == "string" + assert out["required"] == ["query"] diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index bf8742690..6879baed8 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -1,13 +1,11 @@ """Tests for agent/skill_commands.py — skill slash command scanning and platform filtering.""" import os -from datetime import datetime from pathlib import Path from unittest.mock import patch import tools.skills_tool as skills_tool_module from agent.skill_commands import ( - build_plan_path, build_preloaded_skills_prompt, build_skill_invocation_message, resolve_skill_command_key, @@ -399,40 +397,6 @@ Generate some audio. assert 'file_path=""' in msg -class TestPlanSkillHelpers: - def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self): - path = build_plan_path( - "Implement OAuth login + refresh tokens!", - now=datetime(2026, 3, 15, 9, 30, 45), - ) - - assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md" - - def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path): - with patch("tools.skills_tool.SKILLS_DIR", tmp_path): - _make_skill( - tmp_path, - "plan", - body="Save plans under .hermes/plans in the active workspace and do not execute the work.", - ) - scan_skill_commands() - msg = build_skill_invocation_message( - "/plan", - "Add a /plan command", - runtime_note=( - "Save the markdown plan with write_file to this exact relative path inside " - "the active workspace/backend cwd: .hermes/plans/plan.md" - ), - ) - - assert msg is not None - assert "Save plans under $HERMES_HOME/plans" not in msg - assert ".hermes/plans" in msg - assert "Add a /plan command" in msg - assert ".hermes/plans/plan.md" in msg - assert "Runtime note:" in msg - - class TestSkillDirectoryHeader: """The activation message must expose the absolute skill directory and explain how to resolve relative paths, so skills with bundled scripts diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index b44eafd45..cb8e17c6a 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -238,6 +238,56 @@ class TestChatCompletionsKimi: ) assert kw["extra_body"]["thinking"] == {"type": "disabled"} + def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport): + """Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL.""" + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": { + "type": "object", + "properties": { + "q": {"description": "query"}, # missing type + }, + }, + }, + }, + ] + kw = transport.build_kwargs( + model="moonshotai/kimi-k2.6", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + max_tokens_param_fn=lambda n: {"max_tokens": n}, + ) + assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string" + + def test_non_moonshot_tools_are_not_mutated(self, transport): + """Other models don't go through the Moonshot sanitizer.""" + original_params = { + "type": "object", + "properties": {"q": {"description": "query"}}, # missing type + } + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": original_params, + }, + }, + ] + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + max_tokens_param_fn=lambda n: {"max_tokens": n}, + ) + # The parameters dict is passed through untouched (no synthetic type) + assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"] + class TestChatCompletionsValidate: diff --git a/tests/cli/test_busy_input_mode_command.py b/tests/cli/test_busy_input_mode_command.py new file mode 100644 index 000000000..6dd0afbc7 --- /dev/null +++ b/tests/cli/test_busy_input_mode_command.py @@ -0,0 +1,94 @@ +"""Tests for the /busy CLI command and busy-input-mode config handling.""" + +import unittest +from types import SimpleNamespace +from unittest.mock import patch + + +def _import_cli(): + import hermes_cli.config as config_mod + + if not hasattr(config_mod, "save_env_value_secure"): + config_mod.save_env_value_secure = lambda key, value: { + "success": True, + "stored_as": key, + "validated": False, + } + + import cli as cli_mod + + return cli_mod + + +class TestHandleBusyCommand(unittest.TestCase): + def _make_cli(self, busy_input_mode="interrupt"): + return SimpleNamespace( + busy_input_mode=busy_input_mode, + agent=None, + ) + + def test_no_args_shows_status(self): + cli_mod = _import_cli() + stub = self._make_cli("queue") + with ( + patch.object(cli_mod, "_cprint") as mock_cprint, + patch.object(cli_mod, "save_config_value") as mock_save, + ): + cli_mod.HermesCLI._handle_busy_command(stub, "/busy") + + mock_save.assert_not_called() + printed = " ".join(str(c) for c in mock_cprint.call_args_list) + self.assertIn("queue", printed) + self.assertIn("interrupt", printed) + + def test_queue_argument_sets_queue_mode_and_saves(self): + cli_mod = _import_cli() + stub = self._make_cli("interrupt") + with ( + patch.object(cli_mod, "_cprint"), + patch.object(cli_mod, "save_config_value", return_value=True) as mock_save, + ): + cli_mod.HermesCLI._handle_busy_command(stub, "/busy queue") + + self.assertEqual(stub.busy_input_mode, "queue") + mock_save.assert_called_once_with("display.busy_input_mode", "queue") + + def test_interrupt_argument_sets_interrupt_mode_and_saves(self): + cli_mod = _import_cli() + stub = self._make_cli("queue") + with ( + patch.object(cli_mod, "_cprint"), + patch.object(cli_mod, "save_config_value", return_value=True) as mock_save, + ): + cli_mod.HermesCLI._handle_busy_command(stub, "/busy interrupt") + + self.assertEqual(stub.busy_input_mode, "interrupt") + mock_save.assert_called_once_with("display.busy_input_mode", "interrupt") + + def test_invalid_argument_prints_usage(self): + cli_mod = _import_cli() + stub = self._make_cli() + with ( + patch.object(cli_mod, "_cprint") as mock_cprint, + patch.object(cli_mod, "save_config_value") as mock_save, + ): + cli_mod.HermesCLI._handle_busy_command(stub, "/busy nonsense") + + mock_save.assert_not_called() + printed = " ".join(str(c) for c in mock_cprint.call_args_list) + self.assertIn("Usage: /busy", printed) + + +class TestBusyCommandRegistry(unittest.TestCase): + def test_busy_in_registry(self): + from hermes_cli.commands import COMMAND_REGISTRY + + names = [c.name for c in COMMAND_REGISTRY] + assert "busy" in names + + def test_busy_subcommands_documented(self): + from hermes_cli.commands import COMMAND_REGISTRY + + busy = next(c for c in COMMAND_REGISTRY if c.name == "busy") + assert busy.args_hint == "[queue|interrupt|status]" + assert busy.category == "Configuration" diff --git a/tests/cli/test_cli_plan_command.py b/tests/cli/test_cli_plan_command.py deleted file mode 100644 index 8f8205d75..000000000 --- a/tests/cli/test_cli_plan_command.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Tests for the /plan CLI slash command.""" - -from unittest.mock import MagicMock, patch - -from agent.skill_commands import scan_skill_commands -from cli import HermesCLI - - -def _make_cli(): - cli_obj = HermesCLI.__new__(HermesCLI) - cli_obj.config = {} - cli_obj.console = MagicMock() - cli_obj.agent = None - cli_obj.conversation_history = [] - cli_obj.session_id = "sess-123" - cli_obj._pending_input = MagicMock() - return cli_obj - - -def _make_plan_skill(skills_dir): - skill_dir = skills_dir / "plan" - skill_dir.mkdir(parents=True, exist_ok=True) - (skill_dir / "SKILL.md").write_text( - """--- -name: plan -description: Plan mode skill. ---- - -# Plan - -Use the current conversation context when no explicit instruction is provided. -Save plans under the active workspace's .hermes/plans directory. -""" - ) - - -class TestCLIPlanCommand: - def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch): - cli_obj = _make_cli() - - with patch("tools.skills_tool.SKILLS_DIR", tmp_path): - _make_plan_skill(tmp_path) - scan_skill_commands() - result = cli_obj.process_command("/plan Add OAuth login") - - assert result is True - cli_obj._pending_input.put.assert_called_once() - queued = cli_obj._pending_input.put.call_args[0][0] - assert "Plan mode skill" in queued - assert "Add OAuth login" in queued - assert ".hermes/plans" in queued - assert str(tmp_path / "plans") not in queued - assert "active workspace/backend cwd" in queued - assert "Runtime note:" in queued - - def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch): - cli_obj = _make_cli() - - with patch("tools.skills_tool.SKILLS_DIR", tmp_path): - _make_plan_skill(tmp_path) - scan_skill_commands() - cli_obj.process_command("/plan") - - queued = cli_obj._pending_input.put.call_args[0][0] - assert "current conversation context" in queued - assert ".hermes/plans/" in queued - assert "conversation-plan.md" in queued diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py index 1c94cb1b0..c89d639d1 100644 --- a/tests/cli/test_quick_commands.py +++ b/tests/cli/test_quick_commands.py @@ -23,6 +23,11 @@ class TestCLIQuickCommands: cli.console = MagicMock() cli.agent = None cli.conversation_history = [] + # session_id is accessed by the fallback skill/fuzzy-match path in + # process_command; without it, tests that exercise `/alias args` + # can trip an AttributeError when cross-test state leaks a skill + # command matching the alias target. + cli.session_id = "test-session" return cli def test_exec_command_runs_and_prints_output(self): diff --git a/tests/cron/test_cron_workdir.py b/tests/cron/test_cron_workdir.py new file mode 100644 index 000000000..03777dd47 --- /dev/null +++ b/tests/cron/test_cron_workdir.py @@ -0,0 +1,380 @@ +"""Tests for per-job workdir support in cron jobs. + +Covers: + - jobs.create_job: param plumbing, validation, default-None preserved + - jobs._normalize_workdir: absolute / relative / missing / file-not-dir + - jobs.update_job: set, clear, re-validate + - tools.cronjob_tools.cronjob: create + update JSON round-trip, schema + includes workdir, _format_job exposes it when set + - scheduler.tick(): partitions workdir jobs off the thread pool, restores + TERMINAL_CWD in finally, honours the env override during run_job +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +@pytest.fixture() +def tmp_cron_dir(tmp_path, monkeypatch): + """Isolate cron job storage into a temp dir so tests don't stomp on real jobs.""" + monkeypatch.setattr("cron.jobs.CRON_DIR", tmp_path / "cron") + monkeypatch.setattr("cron.jobs.JOBS_FILE", tmp_path / "cron" / "jobs.json") + monkeypatch.setattr("cron.jobs.OUTPUT_DIR", tmp_path / "cron" / "output") + return tmp_path + + +# --------------------------------------------------------------------------- +# jobs._normalize_workdir +# --------------------------------------------------------------------------- + +class TestNormalizeWorkdir: + def test_none_returns_none(self): + from cron.jobs import _normalize_workdir + assert _normalize_workdir(None) is None + + def test_empty_string_returns_none(self): + from cron.jobs import _normalize_workdir + assert _normalize_workdir("") is None + assert _normalize_workdir(" ") is None + + def test_absolute_existing_dir_returns_resolved_str(self, tmp_path): + from cron.jobs import _normalize_workdir + result = _normalize_workdir(str(tmp_path)) + assert result == str(tmp_path.resolve()) + + def test_tilde_expands(self, tmp_path, monkeypatch): + from cron.jobs import _normalize_workdir + monkeypatch.setenv("HOME", str(tmp_path)) + result = _normalize_workdir("~") + assert result == str(tmp_path.resolve()) + + def test_relative_path_rejected(self): + from cron.jobs import _normalize_workdir + with pytest.raises(ValueError, match="absolute path"): + _normalize_workdir("some/relative/path") + + def test_missing_dir_rejected(self, tmp_path): + from cron.jobs import _normalize_workdir + missing = tmp_path / "does-not-exist" + with pytest.raises(ValueError, match="does not exist"): + _normalize_workdir(str(missing)) + + def test_file_not_dir_rejected(self, tmp_path): + from cron.jobs import _normalize_workdir + f = tmp_path / "file.txt" + f.write_text("hi") + with pytest.raises(ValueError, match="not a directory"): + _normalize_workdir(str(f)) + + +# --------------------------------------------------------------------------- +# jobs.create_job and update_job +# --------------------------------------------------------------------------- + +class TestCreateJobWorkdir: + def test_workdir_stored_when_set(self, tmp_cron_dir): + from cron.jobs import create_job, get_job + job = create_job( + prompt="hello", + schedule="every 1h", + workdir=str(tmp_cron_dir), + ) + stored = get_job(job["id"]) + assert stored["workdir"] == str(tmp_cron_dir.resolve()) + + def test_workdir_none_preserves_old_behaviour(self, tmp_cron_dir): + from cron.jobs import create_job, get_job + job = create_job(prompt="hello", schedule="every 1h") + stored = get_job(job["id"]) + # Field is present on the dict but None — downstream code checks + # truthiness to decide whether the feature is active. + assert stored.get("workdir") is None + + def test_create_rejects_invalid_workdir(self, tmp_cron_dir): + from cron.jobs import create_job + with pytest.raises(ValueError): + create_job( + prompt="hello", + schedule="every 1h", + workdir="not/absolute", + ) + + +class TestUpdateJobWorkdir: + def test_set_workdir_via_update(self, tmp_cron_dir): + from cron.jobs import create_job, get_job, update_job + job = create_job(prompt="x", schedule="every 1h") + update_job(job["id"], {"workdir": str(tmp_cron_dir)}) + assert get_job(job["id"])["workdir"] == str(tmp_cron_dir.resolve()) + + def test_clear_workdir_with_none(self, tmp_cron_dir): + from cron.jobs import create_job, get_job, update_job + job = create_job( + prompt="x", schedule="every 1h", workdir=str(tmp_cron_dir) + ) + update_job(job["id"], {"workdir": None}) + assert get_job(job["id"])["workdir"] is None + + def test_clear_workdir_with_empty_string(self, tmp_cron_dir): + from cron.jobs import create_job, get_job, update_job + job = create_job( + prompt="x", schedule="every 1h", workdir=str(tmp_cron_dir) + ) + update_job(job["id"], {"workdir": ""}) + assert get_job(job["id"])["workdir"] is None + + def test_update_rejects_invalid_workdir(self, tmp_cron_dir): + from cron.jobs import create_job, update_job + job = create_job(prompt="x", schedule="every 1h") + with pytest.raises(ValueError): + update_job(job["id"], {"workdir": "nope/relative"}) + + +# --------------------------------------------------------------------------- +# tools.cronjob_tools: end-to-end JSON round-trip +# --------------------------------------------------------------------------- + +class TestCronjobToolWorkdir: + def test_create_with_workdir_json_roundtrip(self, tmp_cron_dir): + from tools.cronjob_tools import cronjob + + result = json.loads( + cronjob( + action="create", + prompt="hi", + schedule="every 1h", + workdir=str(tmp_cron_dir), + ) + ) + assert result["success"] is True + assert result["job"]["workdir"] == str(tmp_cron_dir.resolve()) + + def test_create_without_workdir_hides_field_in_format(self, tmp_cron_dir): + from tools.cronjob_tools import cronjob + + result = json.loads( + cronjob( + action="create", + prompt="hi", + schedule="every 1h", + ) + ) + assert result["success"] is True + # _format_job omits the field when unset — reduces noise in agent output. + assert "workdir" not in result["job"] + + def test_update_clears_workdir_with_empty_string(self, tmp_cron_dir): + from tools.cronjob_tools import cronjob + + created = json.loads( + cronjob( + action="create", + prompt="hi", + schedule="every 1h", + workdir=str(tmp_cron_dir), + ) + ) + job_id = created["job_id"] + + updated = json.loads( + cronjob(action="update", job_id=job_id, workdir="") + ) + assert updated["success"] is True + assert "workdir" not in updated["job"] + + def test_schema_advertises_workdir(self): + from tools.cronjob_tools import CRONJOB_SCHEMA + assert "workdir" in CRONJOB_SCHEMA["parameters"]["properties"] + desc = CRONJOB_SCHEMA["parameters"]["properties"]["workdir"]["description"] + assert "absolute" in desc.lower() + + +# --------------------------------------------------------------------------- +# scheduler.tick(): workdir partition +# --------------------------------------------------------------------------- + +class TestTickWorkdirPartition: + """ + tick() must run workdir jobs sequentially (outside the ThreadPoolExecutor) + because run_job mutates os.environ["TERMINAL_CWD"], which is process-global. + We verify the partition without booting the real scheduler by patching the + pieces tick() calls. + """ + + def test_workdir_jobs_run_sequentially(self, tmp_path, monkeypatch): + import cron.scheduler as sched + + # Two "jobs" — one with workdir, one without. get_due_jobs returns both. + workdir_job = {"id": "a", "name": "A", "workdir": str(tmp_path)} + parallel_job = {"id": "b", "name": "B", "workdir": None} + + monkeypatch.setattr(sched, "get_due_jobs", lambda: [workdir_job, parallel_job]) + monkeypatch.setattr(sched, "advance_next_run", lambda *_a, **_kw: None) + + # Record call order / thread context. + import threading + calls: list[tuple[str, bool]] = [] + + def fake_run_job(job): + # Return a minimal tuple matching run_job's signature. + calls.append((job["id"], threading.current_thread().name)) + return True, "output", "response", None + + monkeypatch.setattr(sched, "run_job", fake_run_job) + monkeypatch.setattr(sched, "save_job_output", lambda _jid, _o: None) + monkeypatch.setattr(sched, "mark_job_run", lambda *_a, **_kw: None) + monkeypatch.setattr( + sched, "_deliver_result", lambda *_a, **_kw: None + ) + + n = sched.tick(verbose=False) + assert n == 2 + + ids = [c[0] for c in calls] + # Workdir jobs always come before parallel jobs. + assert ids.index("a") < ids.index("b") + + # The workdir job must run on the main thread (sequential pass). + main_thread_name = threading.current_thread().name + workdir_thread_name = next(t for jid, t in calls if jid == "a") + assert workdir_thread_name == main_thread_name + + +# --------------------------------------------------------------------------- +# scheduler.run_job: TERMINAL_CWD + skip_context_files wiring +# --------------------------------------------------------------------------- + +class TestRunJobTerminalCwd: + """ + run_job sets TERMINAL_CWD + flips skip_context_files=False when workdir + is set, and restores the prior TERMINAL_CWD in finally — even on error. + We stub AIAgent so no real API call happens. + """ + + @staticmethod + def _install_stubs(monkeypatch, observed: dict): + """Patch enough of run_job's deps that it executes without real creds.""" + import os + import sys + import cron.scheduler as sched + + class FakeAgent: + def __init__(self, **kwargs): + observed["skip_context_files"] = kwargs.get("skip_context_files") + observed["terminal_cwd_during_init"] = os.environ.get( + "TERMINAL_CWD", "_UNSET_" + ) + + def run_conversation(self, *_a, **_kw): + observed["terminal_cwd_during_run"] = os.environ.get( + "TERMINAL_CWD", "_UNSET_" + ) + return {"final_response": "done", "messages": []} + + def get_activity_summary(self): + return {"seconds_since_activity": 0.0} + + fake_mod = type(sys)("run_agent") + fake_mod.AIAgent = FakeAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_mod) + + # Bypass the real provider resolver — it reads ~/.hermes and credentials. + from hermes_cli import runtime_provider as _rtp + monkeypatch.setattr( + _rtp, + "resolve_runtime_provider", + lambda **_kw: { + "provider": "test", + "api_key": "k", + "base_url": "http://test.local", + "api_mode": "chat_completions", + }, + ) + + # Stub scheduler helpers that would otherwise hit the filesystem / config. + monkeypatch.setattr(sched, "_build_job_prompt", lambda job, prerun_script=None: "hi") + monkeypatch.setattr(sched, "_resolve_origin", lambda job: None) + monkeypatch.setattr(sched, "_resolve_delivery_target", lambda job: None) + monkeypatch.setattr(sched, "_resolve_cron_enabled_toolsets", lambda job, cfg: None) + # Unlimited inactivity so the poll loop returns immediately. + monkeypatch.setenv("HERMES_CRON_TIMEOUT", "0") + + # run_job calls load_dotenv(~/.hermes/.env, override=True), which will + # happily clobber TERMINAL_CWD out from under us if the real user .env + # has TERMINAL_CWD set (common on dev boxes). Stub it out. + import dotenv + monkeypatch.setattr(dotenv, "load_dotenv", lambda *_a, **_kw: True) + + def test_workdir_sets_and_restores_terminal_cwd( + self, tmp_path, monkeypatch + ): + import os + import cron.scheduler as sched + + # Make sure the test's TERMINAL_CWD starts at a known non-workdir value. + # Use monkeypatch.setenv so it's restored on teardown regardless of + # whatever other tests in this xdist worker have left behind. + monkeypatch.setenv("TERMINAL_CWD", "/original/cwd") + + observed: dict = {} + self._install_stubs(monkeypatch, observed) + + job = { + "id": "abc", + "name": "wd-job", + "workdir": str(tmp_path), + "schedule_display": "manual", + } + + success, _output, response, error = sched.run_job(job) + assert success is True, f"run_job failed: error={error!r} response={response!r}" + + # AIAgent was built with skip_context_files=False (feature ON). + assert observed["skip_context_files"] is False + # TERMINAL_CWD was pointing at the job workdir while the agent ran. + assert observed["terminal_cwd_during_init"] == str(tmp_path.resolve()) + assert observed["terminal_cwd_during_run"] == str(tmp_path.resolve()) + + # And it was restored to the original value in finally. + assert os.environ["TERMINAL_CWD"] == "/original/cwd" + + def test_no_workdir_leaves_terminal_cwd_untouched(self, monkeypatch): + """When workdir is absent, run_job must not touch TERMINAL_CWD at all — + whatever value was present before the call should be present after. + + We don't assert on the *content* of TERMINAL_CWD (other tests in the + same xdist worker may leave it set to something like '.'); we just + check it's unchanged by run_job. + """ + import os + import cron.scheduler as sched + + # Pin TERMINAL_CWD to a sentinel via monkeypatch so we control both + # the before-value and the after-value regardless of cross-test state. + monkeypatch.setenv("TERMINAL_CWD", "/cron-test-sentinel") + before = os.environ["TERMINAL_CWD"] + + observed: dict = {} + self._install_stubs(monkeypatch, observed) + + job = { + "id": "xyz", + "name": "no-wd-job", + "workdir": None, + "schedule_display": "manual", + } + + success, *_ = sched.run_job(job) + assert success is True + + # Feature is OFF — skip_context_files stays True. + assert observed["skip_context_files"] is True + # TERMINAL_CWD saw the same value during init as it had before. + assert observed["terminal_cwd_during_init"] == before + # And after run_job completes, it's still the sentinel (nothing + # overwrote or cleared it). + assert os.environ["TERMINAL_CWD"] == before diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 421d6859d..4cd4b7cd7 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -710,7 +710,15 @@ class TestRunJobSessionPersistence: kwargs = mock_agent_cls.call_args.kwargs assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"] - def test_run_job_enabled_toolsets_none_when_not_set(self, tmp_path): + def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path): + """When a job has no explicit enabled_toolsets, the scheduler now + resolves them from ``hermes tools`` platform config for ``cron`` + (PR #14xxx — blanket fix for Norbert's surprise ``moa`` run). + + The legacy "pass None → AIAgent loads full default" path is still + reachable, but only when ``_get_platform_tools`` raises (safety net + for any unexpected config shape). + """ job = { "id": "no-toolset-job", "name": "test", @@ -725,7 +733,39 @@ class TestRunJobSessionPersistence: run_job(job) kwargs = mock_agent_cls.call_args.kwargs - assert kwargs["enabled_toolsets"] is None + # Resolution happened — not None, is a list. + assert isinstance(kwargs["enabled_toolsets"], list) + # The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS + # (``moa``, ``homeassistant``, ``rl``) removed. The most important + # invariant: ``moa`` is NOT in the default cron toolset, so a cron + # run cannot accidentally spin up frontier models. + assert "moa" not in kwargs["enabled_toolsets"] + + def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path): + """Per-job enabled_toolsets (via cronjob tool) always take precedence + over the platform-level ``hermes tools`` config.""" + job = { + "id": "override-job", + "name": "test", + "prompt": "hello", + "enabled_toolsets": ["terminal"], + } + fake_db, patches = self._make_run_job_patches(tmp_path) + # Even if the user has ``hermes tools`` configured to enable web+file + # for cron, the per-job override wins. + with patches[0], patches[1], patches[2], patches[3], patches[4], \ + patch("run_agent.AIAgent") as mock_agent_cls, \ + patch( + "hermes_cli.tools_config._get_platform_tools", + return_value={"web", "file"}, + ): + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + run_job(job) + + kwargs = mock_agent_cls.call_args.kwargs + assert kwargs["enabled_toolsets"] == ["terminal"] def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path): """Empty final_response should stay empty for delivery logic (issue #2234). diff --git a/tests/e2e/test_platform_commands.py b/tests/e2e/test_platform_commands.py index 1b325ba02..1597e54cc 100644 --- a/tests/e2e/test_platform_commands.py +++ b/tests/e2e/test_platform_commands.py @@ -73,14 +73,6 @@ class TestSlashCommands: send_status = await send_and_capture(adapter, "/status", platform) send_status.assert_called_once() - @pytest.mark.asyncio - async def test_provider_shows_current_provider(self, adapter, platform): - send = await send_and_capture(adapter, "/provider", platform) - - send.assert_called_once() - response_text = send.call_args[1].get("content") or send.call_args[0][1] - assert "provider" in response_text.lower() - @pytest.mark.asyncio async def test_verbose_responds(self, adapter, platform): send = await send_and_capture(adapter, "/verbose", platform) diff --git a/tests/gateway/conftest.py b/tests/gateway/conftest.py index d2f55ff9f..3e734e0d4 100644 --- a/tests/gateway/conftest.py +++ b/tests/gateway/conftest.py @@ -88,11 +88,63 @@ def _ensure_discord_mock() -> None: discord_mod.Thread = type("Thread", (), {}) discord_mod.ForumChannel = type("ForumChannel", (), {}) discord_mod.Interaction = object - discord_mod.Embed = MagicMock + discord_mod.Message = type("Message", (), {}) + + # Embed: accept the kwargs production code / tests use + # (title, description, color). MagicMock auto-attributes work too, + # but some tests construct and inspect .title/.description directly. + class _FakeEmbed: + def __init__(self, *, title=None, description=None, color=None, **_): + self.title = title + self.description = description + self.color = color + discord_mod.Embed = _FakeEmbed + + # ui.View / ui.Select / ui.Button: real classes (not MagicMock) so + # tests that subclass ModelPickerView / iterate .children / clear + # items work. + class _FakeView: + def __init__(self, timeout=None): + self.timeout = timeout + self.children = [] + def add_item(self, item): + self.children.append(item) + def clear_items(self): + self.children.clear() + + class _FakeSelect: + def __init__(self, *, placeholder=None, options=None, custom_id=None, **_): + self.placeholder = placeholder + self.options = options or [] + self.custom_id = custom_id + self.callback = None + self.disabled = False + + class _FakeButton: + def __init__(self, *, label=None, style=None, custom_id=None, emoji=None, + url=None, disabled=False, row=None, sku_id=None, **_): + self.label = label + self.style = style + self.custom_id = custom_id + self.emoji = emoji + self.url = url + self.disabled = disabled + self.row = row + self.sku_id = sku_id + self.callback = None + + class _FakeSelectOption: + def __init__(self, *, label=None, value=None, description=None, **_): + self.label = label + self.value = value + self.description = description + discord_mod.SelectOption = _FakeSelectOption + discord_mod.ui = SimpleNamespace( - View=object, + View=_FakeView, + Select=_FakeSelect, + Button=_FakeButton, button=lambda *a, **k: (lambda fn: fn), - Button=object, ) discord_mod.ButtonStyle = SimpleNamespace( success=1, primary=2, secondary=2, danger=3, @@ -100,7 +152,7 @@ def _ensure_discord_mock() -> None: ) discord_mod.Color = SimpleNamespace( orange=lambda: 1, green=lambda: 2, blue=lambda: 3, - red=lambda: 4, purple=lambda: 5, + red=lambda: 4, purple=lambda: 5, greyple=lambda: 6, ) # app_commands — needed by _register_slash_commands auto-registration diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py index ae6c73ef7..d4019e1d5 100644 --- a/tests/gateway/test_agent_cache.py +++ b/tests/gateway/test_agent_cache.py @@ -950,7 +950,7 @@ class TestAgentCacheIdleResume: release_clients() (soft — session may resume). """ from run_agent import AIAgent - from tools import terminal_tool as _tt + import run_agent as _ra # Agent A: evicted from cache (soft) — terminal survives. # Agent B: session expired (hard) — terminal torn down. @@ -970,13 +970,16 @@ class TestAgentCacheIdleResume: ) vm_calls: list = [] - original_vm = _tt.cleanup_vm - _tt.cleanup_vm = lambda tid: vm_calls.append(tid) + # AIAgent.close() calls the ``cleanup_vm`` name bound into + # ``run_agent`` at import time, not ``tools.terminal_tool.cleanup_vm`` + # directly — so patch the ``run_agent`` reference. + original_vm = _ra.cleanup_vm + _ra.cleanup_vm = lambda tid: vm_calls.append(tid) try: agent_a.release_clients() # cache eviction agent_b.close() # session expiry finally: - _tt.cleanup_vm = original_vm + _ra.cleanup_vm = original_vm try: agent_a.close() except Exception: diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index ca229f26f..828585106 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -1374,6 +1374,139 @@ class TestResponsesStreaming: assert data["status"] == "completed" assert data["output"][-1]["content"][0]["text"] == "Stored response" + @pytest.mark.asyncio + async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter): + """Server-side asyncio.CancelledError (shutdown, request timeout) must + still leave an ``incomplete`` snapshot in ResponseStore so + GET /v1/responses/{id} and previous_response_id chaining keep + working. Regression for PR #15171 follow-up. + + Calls _write_sse_responses directly so the test can await the + handler to completion (TestClient disconnection races the server + handler, which makes end-to-end assertion on the final stored + snapshot flaky). + """ + # Build a minimal fake request + stream queue the writer understands. + fake_request = MagicMock() + fake_request.headers = {} + + written_payloads: list = [] + + class _FakeStreamResponse: + async def prepare(self, req): + pass + + async def write(self, payload): + written_payloads.append(payload) + + # Patch web.StreamResponse for the duration of the writer call. + import gateway.platforms.api_server as api_mod + import queue as _q + + stream_q: _q.Queue = _q.Queue() + + async def _agent_coro(): + # Feed one partial delta into the stream queue... + stream_q.put("partial output") + # ...then give the drain loop a moment to pick it up before + # raising CancelledError to simulate a server-side cancel. + await asyncio.sleep(0.01) + raise asyncio.CancelledError() + + agent_task = asyncio.ensure_future(_agent_coro()) + response_id = f"resp_{uuid.uuid4().hex[:28]}" + + with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()): + with pytest.raises(asyncio.CancelledError): + await adapter._write_sse_responses( + request=fake_request, + response_id=response_id, + model="hermes-agent", + created_at=int(time.time()), + stream_q=stream_q, + agent_task=agent_task, + agent_ref=[None], + conversation_history=[], + user_message="will be cancelled", + instructions=None, + conversation=None, + store=True, + session_id=None, + ) + + # The in_progress snapshot was persisted on response.created, + # and the CancelledError handler must have updated it to + # ``incomplete`` with the partial text it saw. + stored = adapter._response_store.get(response_id) + assert stored is not None, "snapshot must be retrievable after cancellation" + assert stored["response"]["status"] == "incomplete" + # Partial text captured before cancel should be preserved. + output_text = "".join( + part.get("text", "") + for item in stored["response"].get("output", []) + if item.get("type") == "message" + for part in item.get("content", []) + ) + assert "partial output" in output_text + + @pytest.mark.asyncio + async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter): + """Client disconnect (ConnectionResetError) during streaming must + persist an ``incomplete`` snapshot in ResponseStore. Regression + for PR #15171.""" + fake_request = MagicMock() + fake_request.headers = {} + + write_call_count = {"n": 0} + + class _DisconnectingStreamResponse: + async def prepare(self, req): + pass + + async def write(self, payload): + # First two writes succeed (prepare + response.created). + # On the third write (a text delta), the "client" + # disconnects — simulate with ConnectionResetError. + write_call_count["n"] += 1 + if write_call_count["n"] >= 3: + raise ConnectionResetError("simulated client disconnect") + + import gateway.platforms.api_server as api_mod + import queue as _q + + stream_q: _q.Queue = _q.Queue() + stream_q.put("some streamed text") + stream_q.put(None) # EOS sentinel + + async def _agent_coro(): + await asyncio.sleep(0.01) + return ({"final_response": "", "messages": [], "api_calls": 0}, + {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}) + + agent_task = asyncio.ensure_future(_agent_coro()) + response_id = f"resp_{uuid.uuid4().hex[:28]}" + + with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()): + await adapter._write_sse_responses( + request=fake_request, + response_id=response_id, + model="hermes-agent", + created_at=int(time.time()), + stream_q=stream_q, + agent_task=agent_task, + agent_ref=[None], + conversation_history=[], + user_message="will disconnect", + instructions=None, + conversation=None, + store=True, + session_id=None, + ) + + stored = adapter._response_store.get(response_id) + assert stored is not None, "snapshot must survive client disconnect" + assert stored["response"]["status"] == "incomplete" + # --------------------------------------------------------------------------- # Auth on endpoints diff --git a/tests/gateway/test_auth_fallback.py b/tests/gateway/test_auth_fallback.py new file mode 100644 index 000000000..3edb8b1ee --- /dev/null +++ b/tests/gateway/test_auth_fallback.py @@ -0,0 +1,73 @@ +"""Test that AuthError triggers fallback provider resolution (#7230).""" + +import os +from unittest.mock import patch, MagicMock + +import pytest + + +class TestResolveRuntimeAgentKwargsAuthFallback: + """_resolve_runtime_agent_kwargs should try fallback on AuthError.""" + + def test_auth_error_tries_fallback(self, tmp_path, monkeypatch): + """When primary provider raises AuthError, fallback is attempted.""" + from hermes_cli.auth import AuthError + + # Create a config with fallback + config_path = tmp_path / "config.yaml" + config_path.write_text( + "model:\n provider: openai-codex\n" + "fallback_model:\n provider: openrouter\n" + " model: meta-llama/llama-4-maverick\n" + ) + + monkeypatch.setattr("gateway.run._hermes_home", tmp_path) + + call_count = {"n": 0} + + def _mock_resolve(**kwargs): + call_count["n"] += 1 + requested = kwargs.get("requested", "") + if requested and "codex" in str(requested).lower(): + raise AuthError("Codex token refresh failed with status 401") + return { + "api_key": "fallback-key", + "base_url": "https://openrouter.ai/api/v1", + "provider": "openrouter", + "api_mode": "openai_chat", + "command": None, + "args": None, + "credential_pool": None, + } + + monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openai-codex") + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=_mock_resolve, + ): + from gateway.run import _resolve_runtime_agent_kwargs + result = _resolve_runtime_agent_kwargs() + + assert result["provider"] == "openrouter" + assert result["api_key"] == "fallback-key" + # Should have been called at least twice (primary + fallback) + assert call_count["n"] >= 2 + + def test_auth_error_no_fallback_raises(self, tmp_path, monkeypatch): + """When primary fails and no fallback configured, RuntimeError is raised.""" + from hermes_cli.auth import AuthError + + config_path = tmp_path / "config.yaml" + config_path.write_text("model:\n provider: openai-codex\n") + + monkeypatch.setattr("gateway.run._hermes_home", tmp_path) + monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openai-codex") + + with patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + side_effect=AuthError("token expired"), + ): + from gateway.run import _resolve_runtime_agent_kwargs + with pytest.raises(RuntimeError): + _resolve_runtime_agent_kwargs() diff --git a/tests/gateway/test_bluebubbles.py b/tests/gateway/test_bluebubbles.py index 86b4ac351..e3ff26cc6 100644 --- a/tests/gateway/test_bluebubbles.py +++ b/tests/gateway/test_bluebubbles.py @@ -66,6 +66,37 @@ class TestBlueBubblesHelpers: assert check_bluebubbles_requirements() is True + def test_supports_message_editing_is_false(self, monkeypatch): + adapter = _make_adapter(monkeypatch) + assert adapter.SUPPORTS_MESSAGE_EDITING is False + + def test_truncate_message_omits_pagination_suffixes(self, monkeypatch): + adapter = _make_adapter(monkeypatch) + chunks = adapter.truncate_message("abcdefghij", max_length=6) + assert len(chunks) > 1 + assert "".join(chunks) == "abcdefghij" + assert all("(" not in chunk for chunk in chunks) + + @pytest.mark.asyncio + async def test_send_splits_paragraphs_into_multiple_bubbles(self, monkeypatch): + adapter = _make_adapter(monkeypatch) + sent = [] + + async def fake_resolve_chat_guid(chat_id): + return "iMessage;-;user@example.com" + + async def fake_api_post(path, payload): + sent.append(payload["message"]) + return {"data": {"guid": f"msg-{len(sent)}"}} + + monkeypatch.setattr(adapter, "_resolve_chat_guid", fake_resolve_chat_guid) + monkeypatch.setattr(adapter, "_api_post", fake_api_post) + + result = await adapter.send("user@example.com", "first thought\n\nsecond thought") + + assert result.success is True + assert sent == ["first thought", "second thought"] + def test_format_message_strips_markdown(self, monkeypatch): adapter = _make_adapter(monkeypatch) assert adapter.format_message("**Hello** `world`") == "Hello world" diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py index 52d4c23df..290c1a4b8 100644 --- a/tests/gateway/test_busy_session_ack.py +++ b/tests/gateway/test_busy_session_ack.py @@ -70,6 +70,9 @@ def _make_runner(): runner.session_store = None runner.hooks = MagicMock() runner.hooks.emit = AsyncMock() + runner.pairing_store = MagicMock() + runner.pairing_store.is_approved.return_value = True + runner._is_user_authorized = lambda _source: True return runner, _AGENT_PENDING_SENTINEL @@ -91,6 +94,30 @@ def _make_adapter(platform_val="telegram"): class TestBusySessionAck: """User sends a message while agent is running — should get acknowledgment.""" + @pytest.mark.asyncio + async def test_handle_message_queue_mode_queues_without_interrupt(self): + """Runner queue mode must not interrupt an active agent for text follow-ups.""" + from gateway.run import GatewayRunner + + runner, _sentinel = _make_runner() + adapter = _make_adapter() + + event = _make_event(text="follow up in queue mode") + sk = build_session_key(event.source) + + running_agent = MagicMock() + runner._busy_input_mode = "queue" + runner._running_agents[sk] = running_agent + runner.adapters[event.source.platform] = adapter + + result = await GatewayRunner._handle_message(runner, event) + + assert result is None + assert sk in adapter._pending_messages + assert adapter._pending_messages[sk] is event + assert sk not in runner._pending_messages + running_agent.interrupt.assert_not_called() + @pytest.mark.asyncio async def test_sends_ack_when_agent_running(self): """First message during busy session should get a status ack.""" diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py index ea910d30b..aae68b6b5 100644 --- a/tests/gateway/test_command_bypass_active_session.py +++ b/tests/gateway/test_command_bypass_active_session.py @@ -272,7 +272,7 @@ class TestCommandBypassActiveSession: # Tests: non-bypass-set commands (no dedicated Level-2 handler) also bypass # instead of interrupting + being discarded. Regression for the Discord # ghost-slash-command bug where /model, /reasoning, /voice, /insights, /title, -# /resume, /retry, /undo, /compress, /usage, /provider, /reload-mcp, +# /resume, /retry, /undo, /compress, /usage, /reload-mcp, # /sethome, /reset silently interrupted the running agent. # --------------------------------------------------------------------------- @@ -298,7 +298,6 @@ class TestAllResolvableCommandsBypassGuard: ("/undo", "undo"), ("/compress", "compress"), ("/usage", "usage"), - ("/provider", "provider"), ("/reload-mcp", "reload-mcp"), ("/sethome", "sethome"), ], @@ -326,7 +325,7 @@ class TestAllResolvableCommandsBypassGuard: for cmd in ( "model", "reasoning", "personality", "voice", "insights", "title", - "resume", "retry", "undo", "compress", "usage", "provider", + "resume", "retry", "undo", "compress", "usage", "reload-mcp", "sethome", "reset", ): assert should_bypass_active_session(cmd) is True, ( diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py index 9e5031c0d..4a3e292b0 100644 --- a/tests/gateway/test_complete_path_at_filter.py +++ b/tests/gateway/test_complete_path_at_filter.py @@ -1,22 +1,28 @@ """Regression tests for the TUI gateway's `complete.path` handler. -Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder` -with no colon yet) still surfaced files alongside directories in the -TUI composer, because the gateway-side completion lives in -`tui_gateway/server.py` and was never touched by the earlier fix to -`hermes_cli/commands.py`. +Reported during the TUI v2 blitz retest: + - typing `@folder:` (and `@folder` with no colon yet) surfaced files + alongside directories — the gateway-side completion lives in + `tui_gateway/server.py` and was never touched by the earlier fix to + `hermes_cli/commands.py`. + - typing `@appChrome` required the full `@ui-tui/src/components/app…` + path to find the file — users expect Cmd-P-style fuzzy basename + matching across the repo, not a strict directory prefix filter. Covers: - `@folder:` only yields directories - `@file:` only yields regular files - Bare `@folder` / `@file` (no colon) lists cwd directly - Explicit prefix is preserved in the completion text + - `@` with no slash fuzzy-matches basenames anywhere in the tree """ from __future__ import annotations from pathlib import Path +import pytest + from tui_gateway import server @@ -33,6 +39,15 @@ def _items(word: str): return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]] +@pytest.fixture(autouse=True) +def _reset_fuzzy_cache(monkeypatch): + # Each test walks a fresh tmp dir; clear the cached listing so prior + # roots can't leak through the TTL window. + server._fuzzy_cache.clear() + yield + server._fuzzy_cache.clear() + + def test_at_folder_colon_only_dirs(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) _fixture(tmp_path) @@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch): for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"): assert expected in texts, f"missing static ref {expected!r} in {texts!r}" + + +# ── Fuzzy basename matching ────────────────────────────────────────────── +# Users shouldn't have to know the full path — typing `@appChrome` should +# find `ui-tui/src/components/appChrome.tsx`. + + +def _nested_fixture(tmp_path: Path): + (tmp_path / "readme.md").write_text("x") + (tmp_path / ".env").write_text("x") + (tmp_path / "ui-tui/src/components").mkdir(parents=True) + (tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x") + (tmp_path / "ui-tui/src/hooks").mkdir(parents=True) + (tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x") + (tmp_path / "tui_gateway").mkdir() + (tmp_path / "tui_gateway/server.py").write_text("x") + + +def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch): + """`@appChrome` — with no slash — should surface the nested file.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + entries = _items("@appChrome") + texts = [t for t, _, _ in entries] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + # Display is the basename, meta is the containing directory, so the + # picker can show `appChrome.tsx ui-tui/src/components` on one row. + row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx") + assert row[1] == "appChrome.tsx" + assert row[2] == "ui-tui/src/components" + + +def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch): + """Better matches sort before weaker matches regardless of path depth.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + (tmp_path / "server.py").write_text("x") # exact basename match at root + + texts = [t for t, _, _ in _items("@server")] + + # Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both + # rank 1 on basename but exact basename wins on the sort key; shorter + # rel path breaks ties. + assert texts[0] == "@file:server.py", texts + assert "@file:tui_gateway/server.py" in texts + + +def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch): + """Mid-basename camelCase pieces match without substring scanning.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@Chrome")] + + # `Chrome` starts a camelCase word inside `appChrome.tsx`. + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch): + """`@uCo` → `useCompletion.ts` via subsequence, last-resort tier.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@uCo")] + + assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts + + +def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch): + """Explicit `@file:` prefix still wins the completion tag.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@file:appChrome")] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch): + """Any `/` in the query = user is navigating; keep directory listing.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@ui-tui/src/components/app")] + + # Directory-listing mode prefixes with `@file:` / `@folder:` per entry. + # It should only surface direct children of the named dir — not the + # nested `useCompletion.ts`. + assert any("appChrome.tsx" in t for t in texts), texts + assert not any("useCompletion.ts" in t for t in texts), texts + + +def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch): + """`@folder:` still lists directories — fuzzy scanner only walks + files (git-tracked + untracked), so defer to the dir-listing path.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@folder:ui")] + + # Root has `ui-tui/` as a directory; the listing branch should surface it. + assert any(t.startswith("@folder:ui-tui") for t in texts), texts + + +def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch): + """`.env` doesn't leak into `@env` but does show for `@.env`.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + assert not any(".env" in t for t, _, _ in _items("@env")) + assert any(t.endswith(".env") for t, _, _ in _items("@.env")) + + +def test_fuzzy_caps_results(tmp_path, monkeypatch): + """The 30-item cap survives a big tree.""" + monkeypatch.chdir(tmp_path) + for i in range(60): + (tmp_path / f"mod_{i:03d}.py").write_text("x") + + items = _items("@mod") + + assert len(items) == 30 + + +def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch): + """When the gateway runs from a subdirectory of a git repo, fuzzy + completion paths must resolve under that cwd — not under the repo root. + + Without this, `@appChrome` from inside `apps/web/` would suggest + `@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would + look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every + `git ls-files` result back to a `relpath(root)` and drop anything + outside `root` so the completion contract stays "paths are cwd-relative". + """ + import subprocess + + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True) + + (tmp_path / "apps" / "web" / "src").mkdir(parents=True) + (tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x") + (tmp_path / "apps" / "api" / "src").mkdir(parents=True) + (tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x") + (tmp_path / "README.md").write_text("x") + + subprocess.run(["git", "add", "."], cwd=tmp_path, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True) + + # Run from `apps/web/` — completions should be relative to here, and + # files outside this subtree (apps/api, README.md at root) shouldn't + # appear at all. + monkeypatch.chdir(tmp_path / "apps" / "web") + + texts = [t for t, _, _ in _items("@appChrome")] + + assert "@file:src/appChrome.tsx" in texts, texts + assert not any("apps/web/" in t for t in texts), texts + + server._fuzzy_cache.clear() + other_texts = [t for t, _, _ in _items("@server")] + + assert not any("server.ts" in t for t in other_texts), other_texts + + server._fuzzy_cache.clear() + readme_texts = [t for t, _, _ in _items("@README")] + + assert not any("README.md" in t for t in readme_texts), readme_texts diff --git a/tests/gateway/test_compress_command.py b/tests/gateway/test_compress_command.py index 021e98773..91627f92b 100644 --- a/tests/gateway/test_compress_command.py +++ b/tests/gateway/test_compress_command.py @@ -64,9 +64,7 @@ async def test_compress_command_reports_noop_without_success_banner(): agent_instance = MagicMock() agent_instance.shutdown_memory_provider = MagicMock() agent_instance.close = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (list(history), "") @@ -101,9 +99,7 @@ async def test_compress_command_explains_when_token_estimate_rises(): agent_instance = MagicMock() agent_instance.shutdown_memory_provider = MagicMock() agent_instance.close = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (compressed, "") diff --git a/tests/gateway/test_compress_focus.py b/tests/gateway/test_compress_focus.py index 8a1ee060f..597185e57 100644 --- a/tests/gateway/test_compress_focus.py +++ b/tests/gateway/test_compress_focus.py @@ -64,9 +64,7 @@ async def test_compress_focus_topic_passed_to_agent(): compressed = [history[0], history[-1]] runner = _make_runner(history) agent_instance = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (compressed, "") @@ -96,9 +94,7 @@ async def test_compress_no_focus_passes_none(): history = _make_history() runner = _make_runner(history) agent_instance = MagicMock() - agent_instance.context_compressor.protect_first_n = 0 - agent_instance.context_compressor._align_boundary_forward.return_value = 0 - agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2 + agent_instance.context_compressor.has_content_to_compress.return_value = True agent_instance.session_id = "sess-1" agent_instance._compress_context.return_value = (list(history), "") diff --git a/tests/gateway/test_compress_plugin_engine.py b/tests/gateway/test_compress_plugin_engine.py new file mode 100644 index 000000000..4604e7723 --- /dev/null +++ b/tests/gateway/test_compress_plugin_engine.py @@ -0,0 +1,173 @@ +"""Regression test: /compress works with context engine plugins. + +Reported by @selfhostedsoul (Discord, Apr 2026) with the LCM plugin installed: + + Compression failed: 'LCMEngine' object has no attribute '_align_boundary_forward' + +Root cause: the gateway /compress handler used to reach into +ContextCompressor-specific private helpers (_align_boundary_forward, +_find_tail_cut_by_tokens) for its preflight check. Those helpers are not +part of the generic ContextEngine ABC, so any plugin engine (LCM, etc.) +raised AttributeError. + +The fix promotes the preflight into an optional ABC method +(has_content_to_compress) with a safe default of True. +""" + +from datetime import datetime +from typing import Any, Dict, List +from unittest.mock import MagicMock, patch + +import pytest + +from agent.context_engine import ContextEngine +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionEntry, SessionSource, build_session_key + + +class _FakePluginEngine(ContextEngine): + """Minimal ContextEngine that only implements the ABC — no private helpers. + + Mirrors the shape of a third-party context engine plugin such as LCM. + If /compress reaches into any ContextCompressor-specific internals this + engine will raise AttributeError, just like the real bug. + """ + + @property + def name(self) -> str: + return "fake-plugin" + + def update_from_response(self, usage: Dict[str, Any]) -> None: + return None + + def should_compress(self, prompt_tokens: int = None) -> bool: + return False + + def compress( + self, + messages: List[Dict[str, Any]], + current_tokens: int = None, + focus_topic: str = None, + ) -> List[Dict[str, Any]]: + # Pretend we dropped a middle turn. + self.compression_count += 1 + if len(messages) >= 3: + return [messages[0], messages[-1]] + return list(messages) + + +def _make_source() -> SessionSource: + return SessionSource( + platform=Platform.TELEGRAM, + user_id="u1", + chat_id="c1", + user_name="tester", + chat_type="dm", + ) + + +def _make_event(text: str = "/compress") -> MessageEvent: + return MessageEvent(text=text, source=_make_source(), message_id="m1") + + +def _make_history() -> list[dict[str, str]]: + return [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + +def _make_runner(history: list[dict[str, str]]): + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = history + runner.session_store.rewrite_transcript = MagicMock() + runner.session_store.update_session = MagicMock() + runner.session_store._save = MagicMock() + return runner + + +@pytest.mark.asyncio +async def test_compress_works_with_plugin_context_engine(): + """/compress must not call ContextCompressor-only private helpers. + + Uses a fake ContextEngine subclass that only implements the ABC — + matches what a real plugin (LCM, etc.) exposes. If the gateway + reaches into ``_align_boundary_forward`` or ``_find_tail_cut_by_tokens`` + on this engine, AttributeError propagates and the test fails with the + exact user-visible error selfhostedsoul reported. + """ + history = _make_history() + compressed = [history[0], history[-1]] + runner = _make_runner(history) + + plugin_engine = _FakePluginEngine() + agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() + # Real plugin engine — no MagicMock auto-attributes masking missing helpers. + agent_instance.context_compressor = plugin_engine + agent_instance.session_id = "sess-1" + agent_instance._compress_context.return_value = (compressed, "") + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100), + ): + result = await runner._handle_compress_command(_make_event("/compress")) + + # No AttributeError surfaced as "Compression failed: ..." + assert "Compression failed" not in result + assert "_align_boundary_forward" not in result + assert "_find_tail_cut_by_tokens" not in result + # Happy path fired + agent_instance._compress_context.assert_called_once() + + +@pytest.mark.asyncio +async def test_compress_respects_plugin_has_content_to_compress_false(): + """If a plugin reports no compressible content, gateway skips the LLM call.""" + + class _EmptyEngine(_FakePluginEngine): + def has_content_to_compress(self, messages): + return False + + history = _make_history() + runner = _make_runner(history) + + plugin_engine = _EmptyEngine() + agent_instance = MagicMock() + agent_instance.shutdown_memory_provider = MagicMock() + agent_instance.close = MagicMock() + agent_instance.context_compressor = plugin_engine + agent_instance.session_id = "sess-1" + + with ( + patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "***"}), + patch("gateway.run._resolve_gateway_model", return_value="test-model"), + patch("run_agent.AIAgent", return_value=agent_instance), + patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=100), + ): + result = await runner._handle_compress_command(_make_event("/compress")) + + assert "Nothing to compress" in result + agent_instance._compress_context.assert_not_called() diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py index 41a7a49fe..9e82a5da7 100644 --- a/tests/gateway/test_config.py +++ b/tests/gateway/test_config.py @@ -52,6 +52,10 @@ class TestPlatformConfigRoundtrip: assert restored.enabled is False assert restored.token is None + def test_from_dict_coerces_quoted_false_enabled(self): + restored = PlatformConfig.from_dict({"enabled": "false"}) + assert restored.enabled is False + class TestGetConnectedPlatforms: def test_returns_enabled_with_token(self): @@ -140,6 +144,10 @@ class TestSessionResetPolicy: assert restored.at_hour == 4 assert restored.idle_minutes == 1440 + def test_from_dict_coerces_quoted_false_notify(self): + restored = SessionResetPolicy.from_dict({"notify": "false"}) + assert restored.notify is False + class TestGatewayConfigRoundtrip: def test_full_roundtrip(self): @@ -182,6 +190,10 @@ class TestGatewayConfigRoundtrip: assert restored.unauthorized_dm_behavior == "ignore" assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair" + def test_from_dict_coerces_quoted_false_always_log_local(self): + restored = GatewayConfig.from_dict({"always_log_local": "false"}) + assert restored.always_log_local is False + class TestLoadGatewayConfig: def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch): @@ -238,6 +250,55 @@ class TestLoadGatewayConfig: assert config.thread_sessions_per_user is False + def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text( + "platforms:\n" + " api_server:\n" + " enabled: \"false\"\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + config = load_gateway_config() + + assert config.platforms[Platform.API_SERVER].enabled is False + assert Platform.API_SERVER not in config.get_connected_platforms() + + def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text( + "session_reset:\n" + " notify: \"false\"\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + config = load_gateway_config() + + assert config.default_reset_policy.notify is False + + def test_bridges_quoted_false_always_log_local_from_config_yaml(self, tmp_path, monkeypatch): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text( + "always_log_local: \"false\"\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + config = load_gateway_config() + + assert config.always_log_local is False + def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch): hermes_home = tmp_path / ".hermes" hermes_home.mkdir() diff --git a/tests/gateway/test_discord_allowed_channels.py b/tests/gateway/test_discord_allowed_channels.py new file mode 100644 index 000000000..abc79bc76 --- /dev/null +++ b/tests/gateway/test_discord_allowed_channels.py @@ -0,0 +1,104 @@ +"""Regression guard for #14920: wildcard "*" in Discord channel config lists. + +Setting ``allowed_channels: "*"``, ``free_response_channels: "*"``, or +``ignored_channels: "*"`` in config (or their ``DISCORD_*_CHANNELS`` env var +equivalents) must behave as a wildcard — i.e. the bot responds in every +channel (or is silenced in every channel, for the ignored list). Previously +the literal string "*" was placed into a set and compared against numeric +channel IDs via set-intersection, which always produced an empty set and +caused every message to be silently dropped (for ``allowed_channels``) or +every ``free_response`` / ``ignored`` check to fail open. +""" + +import unittest + + +def _channel_is_allowed(channel_id: str, allowed_channels_raw: str) -> bool: + """Replicate the channel-allow-list check from discord.py on_message.""" + if not allowed_channels_raw: + return True + allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()} + if "*" in allowed_channels: + return True + return bool({channel_id} & allowed_channels) + + +def _channel_is_ignored(channel_id: str, ignored_channels_raw: str) -> bool: + """Replicate the ignored-channel check from discord.py on_message.""" + ignored_channels = { + ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip() + } + return "*" in ignored_channels or bool({channel_id} & ignored_channels) + + +def _channel_is_free_response(channel_id: str, free_channels_raw: str) -> bool: + """Replicate the free-response-channel check from discord.py on_message.""" + free_channels = { + ch.strip() for ch in free_channels_raw.split(",") if ch.strip() + } + return "*" in free_channels or bool({channel_id} & free_channels) + + +class TestDiscordAllowedChannelsWildcard(unittest.TestCase): + """Wildcard and channel-list behaviour for DISCORD_ALLOWED_CHANNELS.""" + + def test_wildcard_allows_any_channel(self): + """'*' should allow messages from any channel ID.""" + self.assertTrue(_channel_is_allowed("1234567890", "*")) + + def test_wildcard_in_list_allows_any_channel(self): + """'*' mixed with other entries still allows any channel.""" + self.assertTrue(_channel_is_allowed("9999999999", "111,*,222")) + + def test_exact_match_allowed(self): + """Channel ID present in the explicit list is allowed.""" + self.assertTrue(_channel_is_allowed("1234567890", "1234567890,9876543210")) + + def test_non_matching_channel_blocked(self): + """Channel ID absent from the explicit list is blocked.""" + self.assertFalse(_channel_is_allowed("5555555555", "1234567890,9876543210")) + + def test_empty_allowlist_allows_all(self): + """Empty DISCORD_ALLOWED_CHANNELS means no restriction.""" + self.assertTrue(_channel_is_allowed("1234567890", "")) + + def test_whitespace_only_entry_ignored(self): + """Entries that are only whitespace are stripped and ignored.""" + self.assertFalse(_channel_is_allowed("1234567890", " , ")) + + +class TestDiscordIgnoredChannelsWildcard(unittest.TestCase): + """Wildcard and channel-list behaviour for DISCORD_IGNORED_CHANNELS.""" + + def test_wildcard_silences_every_channel(self): + """'*' in ignored_channels silences the bot everywhere.""" + self.assertTrue(_channel_is_ignored("1234567890", "*")) + + def test_empty_ignored_list_silences_nothing(self): + self.assertFalse(_channel_is_ignored("1234567890", "")) + + def test_exact_match_is_ignored(self): + self.assertTrue(_channel_is_ignored("111", "111,222")) + + def test_non_match_not_ignored(self): + self.assertFalse(_channel_is_ignored("333", "111,222")) + + +class TestDiscordFreeResponseChannelsWildcard(unittest.TestCase): + """Wildcard and channel-list behaviour for DISCORD_FREE_RESPONSE_CHANNELS.""" + + def test_wildcard_makes_every_channel_free_response(self): + """'*' in free_response_channels exempts every channel from mention-required.""" + self.assertTrue(_channel_is_free_response("1234567890", "*")) + + def test_wildcard_in_list_applies_everywhere(self): + self.assertTrue(_channel_is_free_response("9999999999", "111,*,222")) + + def test_exact_match_is_free_response(self): + self.assertTrue(_channel_is_free_response("111", "111,222")) + + def test_non_match_not_free_response(self): + self.assertFalse(_channel_is_free_response("333", "111,222")) + + def test_empty_list_no_free_response(self): + self.assertFalse(_channel_is_free_response("111", "")) diff --git a/tests/gateway/test_discord_model_picker.py b/tests/gateway/test_discord_model_picker.py new file mode 100644 index 000000000..a1ff434bd --- /dev/null +++ b/tests/gateway/test_discord_model_picker.py @@ -0,0 +1,82 @@ +"""Regression tests for the Discord /model picker. + +Uses the shared discord mock from tests/gateway/conftest.py (installed +at collection time via _ensure_discord_mock()). Previously this file +installed its own mock at module-import time and clobbered sys.modules, +breaking other gateway tests under pytest-xdist. +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from gateway.platforms.discord import ModelPickerView + + +@pytest.mark.asyncio +async def test_model_picker_clears_controls_before_running_switch_callback(): + events: list[object] = [] + + async def on_model_selected(chat_id: str, model_id: str, provider_slug: str) -> str: + events.append(("switch", chat_id, model_id, provider_slug)) + return "Model switched" + + async def edit_message(**kwargs): + events.append( + ( + "initial-edit", + kwargs["embed"].title, + kwargs["embed"].description, + kwargs["view"], + ) + ) + + async def edit_original_response(**kwargs): + events.append(( + "final-edit", + kwargs["embed"].title, + kwargs["embed"].description, + kwargs["view"], + )) + + view = ModelPickerView( + providers=[ + { + "slug": "copilot", + "name": "GitHub Copilot", + "models": ["gpt-5.4"], + "total_models": 1, + "is_current": True, + } + ], + current_model="gpt-5-mini", + current_provider="copilot", + session_key="session-1", + on_model_selected=on_model_selected, + allowed_user_ids=set(), + ) + view._selected_provider = "copilot" + + interaction = SimpleNamespace( + user=SimpleNamespace(id=123), + channel_id=456, + data={"values": ["gpt-5.4"]}, + response=SimpleNamespace( + defer=AsyncMock(), + send_message=AsyncMock(), + edit_message=AsyncMock(side_effect=edit_message), + ), + edit_original_response=AsyncMock(side_effect=edit_original_response), + ) + + await view._on_model_selected(interaction) + + assert events == [ + ("initial-edit", "⚙ Switching Model", "Switching to `gpt-5.4`...", None), + ("switch", "456", "gpt-5.4", "copilot"), + ("final-edit", "⚙ Model Switched", "Model switched", None), + ] + interaction.response.edit_message.assert_awaited_once() + interaction.response.defer.assert_not_called() + interaction.edit_original_response.assert_awaited_once() diff --git a/tests/gateway/test_discord_slash_commands.py b/tests/gateway/test_discord_slash_commands.py index 7e1f5d4a8..7b15a7ed0 100644 --- a/tests/gateway/test_discord_slash_commands.py +++ b/tests/gateway/test_discord_slash_commands.py @@ -164,7 +164,7 @@ async def test_auto_registers_missing_gateway_commands(adapter): # These commands are gateway-available but were not in the original # hardcoded registration list — they should be auto-registered. - expected_auto = {"debug", "yolo", "reload", "profile"} + expected_auto = {"debug", "yolo", "profile"} for name in expected_auto: assert name in tree_names, f"/{name} should be auto-registered on Discord" diff --git a/tests/gateway/test_plan_command.py b/tests/gateway/test_plan_command.py deleted file mode 100644 index d43f46cde..000000000 --- a/tests/gateway/test_plan_command.py +++ /dev/null @@ -1,129 +0,0 @@ -"""Tests for the /plan gateway slash command.""" - -from datetime import datetime -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from agent.skill_commands import scan_skill_commands -from gateway.config import GatewayConfig, Platform, PlatformConfig -from gateway.platforms.base import MessageEvent -from gateway.session import SessionEntry, SessionSource - - -def _make_runner(): - from gateway.run import GatewayRunner - - runner = object.__new__(GatewayRunner) - runner.config = GatewayConfig( - platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} - ) - runner.adapters = {} - runner._voice_mode = {} - runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) - runner.session_store = MagicMock() - runner.session_store.get_or_create_session.return_value = SessionEntry( - session_key="agent:main:telegram:dm:c1:u1", - session_id="sess-1", - created_at=datetime.now(), - updated_at=datetime.now(), - platform=Platform.TELEGRAM, - chat_type="dm", - ) - runner.session_store.load_transcript.return_value = [] - runner.session_store.has_any_sessions.return_value = True - runner.session_store.append_to_transcript = MagicMock() - runner.session_store.rewrite_transcript = MagicMock() - runner._running_agents = {} - runner._pending_messages = {} - runner._pending_approvals = {} - runner._session_db = None - runner._reasoning_config = None - runner._provider_routing = {} - runner._fallback_model = None - runner._show_reasoning = False - runner._is_user_authorized = lambda _source: True - runner._set_session_env = lambda _context: None - runner._run_agent = AsyncMock( - return_value={ - "final_response": "planned", - "messages": [], - "tools": [], - "history_offset": 0, - "last_prompt_tokens": 0, - } - ) - return runner - - -def _make_event(text="/plan"): - return MessageEvent( - text=text, - source=SessionSource( - platform=Platform.TELEGRAM, - user_id="u1", - chat_id="c1", - user_name="tester", - chat_type="dm", - ), - message_id="m1", - ) - - -def _make_plan_skill(skills_dir): - skill_dir = skills_dir / "plan" - skill_dir.mkdir(parents=True, exist_ok=True) - (skill_dir / "SKILL.md").write_text( - """--- -name: plan -description: Plan mode skill. ---- - -# Plan - -Use the current conversation context when no explicit instruction is provided. -Save plans under the active workspace's .hermes/plans directory. -""" - ) - - -class TestGatewayPlanCommand: - @pytest.mark.asyncio - async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path): - import gateway.run as gateway_run - - runner = _make_runner() - event = _make_event("/plan Add OAuth login") - - monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) - monkeypatch.setattr( - "agent.model_metadata.get_model_context_length", - lambda *_args, **_kwargs: 100_000, - ) - - with patch("tools.skills_tool.SKILLS_DIR", tmp_path): - _make_plan_skill(tmp_path) - scan_skill_commands() - result = await runner._handle_message(event) - - assert result == "planned" - forwarded = runner._run_agent.call_args.kwargs["message"] - assert "Plan mode skill" in forwarded - assert "Add OAuth login" in forwarded - assert ".hermes/plans" in forwarded - assert str(tmp_path / "plans") not in forwarded - assert "active workspace/backend cwd" in forwarded - assert "Runtime note:" in forwarded - - @pytest.mark.asyncio - async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path): - runner = _make_runner() - event = _make_event("/help") - - with patch("tools.skills_tool.SKILLS_DIR", tmp_path): - _make_plan_skill(tmp_path) - scan_skill_commands() - result = await runner._handle_help_command(event) - - assert "/plan" in result diff --git a/tests/gateway/test_pre_gateway_dispatch.py b/tests/gateway/test_pre_gateway_dispatch.py new file mode 100644 index 000000000..530224807 --- /dev/null +++ b/tests/gateway/test_pre_gateway_dispatch.py @@ -0,0 +1,179 @@ +"""Tests for the pre_gateway_dispatch plugin hook. + +The hook allows plugins to intercept incoming messages before auth and +agent dispatch. It runs in _handle_message and acts on returned action +dicts: {"action": "skip"|"rewrite"|"allow"}. +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _clear_auth_env(monkeypatch) -> None: + for key in ( + "TELEGRAM_ALLOWED_USERS", + "WHATSAPP_ALLOWED_USERS", + "GATEWAY_ALLOWED_USERS", + "TELEGRAM_ALLOW_ALL_USERS", + "WHATSAPP_ALLOW_ALL_USERS", + "GATEWAY_ALLOW_ALL_USERS", + ): + monkeypatch.delenv(key, raising=False) + + +def _make_event(text: str = "hello", platform: Platform = Platform.WHATSAPP) -> MessageEvent: + return MessageEvent( + text=text, + message_id="m1", + source=SessionSource( + platform=platform, + user_id="15551234567@s.whatsapp.net", + chat_id="15551234567@s.whatsapp.net", + user_name="tester", + chat_type="dm", + ), + ) + + +def _make_runner(platform: Platform): + from gateway.run import GatewayRunner + + config = GatewayConfig( + platforms={platform: PlatformConfig(enabled=True)}, + ) + runner = object.__new__(GatewayRunner) + runner.config = config + adapter = SimpleNamespace(send=AsyncMock()) + runner.adapters = {platform: adapter} + runner.pairing_store = MagicMock() + runner.pairing_store.is_approved.return_value = False + runner.pairing_store._is_rate_limited.return_value = False + runner.session_store = MagicMock() + runner._running_agents = {} + runner._update_prompt_pending = {} + return runner, adapter + + +@pytest.mark.asyncio +async def test_hook_skip_short_circuits_dispatch(monkeypatch): + """A plugin returning {'action': 'skip'} drops the message before auth.""" + _clear_auth_env(monkeypatch) + + def _fake_hook(name, **kwargs): + if name == "pre_gateway_dispatch": + return [{"action": "skip", "reason": "plugin-handled"}] + return [] + + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", _fake_hook) + + runner, adapter = _make_runner(Platform.WHATSAPP) + + result = await runner._handle_message(_make_event("hi")) + + assert result is None + adapter.send.assert_not_awaited() + runner.pairing_store.generate_code.assert_not_called() + + +@pytest.mark.asyncio +async def test_hook_rewrite_replaces_event_text(monkeypatch): + """A plugin returning {'action': 'rewrite', 'text': ...} mutates event.text.""" + _clear_auth_env(monkeypatch) + monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "*") + + seen_text = {} + + def _fake_hook(name, **kwargs): + if name == "pre_gateway_dispatch": + return [{"action": "rewrite", "text": "REWRITTEN"}] + return [] + + async def _capture(event, source, _quick_key, _run_generation): + seen_text["value"] = event.text + return "ok" + + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", _fake_hook) + + runner, _adapter = _make_runner(Platform.WHATSAPP) + runner._handle_message_with_agent = _capture # noqa: SLF001 + + await runner._handle_message(_make_event("original")) + + assert seen_text.get("value") == "REWRITTEN" + + +@pytest.mark.asyncio +async def test_hook_allow_falls_through_to_auth(monkeypatch): + """A plugin returning {'action': 'allow'} continues to normal dispatch.""" + _clear_auth_env(monkeypatch) + # No allowed users set → auth fails → pairing flow triggers. + monkeypatch.delenv("WHATSAPP_ALLOWED_USERS", raising=False) + + def _fake_hook(name, **kwargs): + if name == "pre_gateway_dispatch": + return [{"action": "allow"}] + return [] + + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", _fake_hook) + + runner, adapter = _make_runner(Platform.WHATSAPP) + runner.pairing_store.generate_code.return_value = "12345" + + result = await runner._handle_message(_make_event("hi")) + + # auth chain ran → pairing code was generated + assert result is None + runner.pairing_store.generate_code.assert_called_once() + + +@pytest.mark.asyncio +async def test_hook_exception_does_not_break_dispatch(monkeypatch): + """A raising plugin hook does not break the gateway.""" + _clear_auth_env(monkeypatch) + monkeypatch.delenv("WHATSAPP_ALLOWED_USERS", raising=False) + + def _fake_hook(name, **kwargs): + raise RuntimeError("plugin blew up") + + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", _fake_hook) + + runner, _adapter = _make_runner(Platform.WHATSAPP) + runner.pairing_store.generate_code.return_value = None + + # Should not raise; falls through to auth chain. + result = await runner._handle_message(_make_event("hi")) + assert result is None + + +@pytest.mark.asyncio +async def test_internal_events_bypass_hook(monkeypatch): + """Internal events (event.internal=True) skip the plugin hook entirely.""" + _clear_auth_env(monkeypatch) + monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "*") + + called = {"count": 0} + + def _fake_hook(name, **kwargs): + called["count"] += 1 + return [{"action": "skip"}] + + async def _capture(event, source, _quick_key, _run_generation): + return "ok" + + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", _fake_hook) + + runner, _adapter = _make_runner(Platform.WHATSAPP) + runner._handle_message_with_agent = _capture # noqa: SLF001 + + event = _make_event("hi") + event.internal = True + + # Even though the hook would say skip, internal events bypass it. + await runner._handle_message(event) + assert called["count"] == 0 diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py index e25f226ee..7ed6a19cb 100644 --- a/tests/gateway/test_proxy_mode.py +++ b/tests/gateway/test_proxy_mode.py @@ -137,11 +137,38 @@ class TestGetProxyUrl: class TestResolveProxyUrl: def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch): for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", - "https_proxy", "http_proxy", "all_proxy"): + "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"): monkeypatch.delenv(key, raising=False) monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") assert resolve_proxy_url() == "socks5://127.0.0.1:1080/" + def test_no_proxy_bypasses_matching_host(self, monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") + monkeypatch.setenv("NO_PROXY", "api.telegram.org") + + assert resolve_proxy_url(target_hosts="api.telegram.org") is None + + def test_no_proxy_bypasses_cidr_target(self, monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") + monkeypatch.setenv("NO_PROXY", "149.154.160.0/20") + + assert resolve_proxy_url(target_hosts=["149.154.167.220"]) is None + + def test_no_proxy_ignored_without_target(self, monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") + monkeypatch.setenv("NO_PROXY", "*") + + assert resolve_proxy_url() == "http://proxy.example:8080" + class TestRunAgentProxyDispatch: """Test that _run_agent() delegates to proxy when configured.""" diff --git a/tests/gateway/test_resume_command.py b/tests/gateway/test_resume_command.py index 4c82f4894..c451b3fe3 100644 --- a/tests/gateway/test_resume_command.py +++ b/tests/gateway/test_resume_command.py @@ -179,6 +179,40 @@ class TestHandleResumeCommand: assert call_args[0][1] == "sess_v2" db.close() + @pytest.mark.asyncio + async def test_resume_follows_compression_continuation(self, tmp_path): + """Gateway /resume should reopen the live descendant after compression.""" + from hermes_state import SessionDB + + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("compressed_root", "telegram") + db.set_session_title("compressed_root", "Compressed Work") + db.end_session("compressed_root", "compression") + db.create_session("compressed_child", "telegram", parent_session_id="compressed_root") + db.append_message("compressed_child", "user", "hello from continuation") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume Compressed Work") + runner = _make_runner( + session_db=db, + current_session_id="current_session_001", + event=event, + ) + runner.session_store.load_transcript.side_effect = ( + lambda session_id: [{"role": "user", "content": "hello from continuation"}] + if session_id == "compressed_child" + else [] + ) + + result = await runner._handle_resume_command(event) + + assert "Resumed session" in result + assert "(1 message)" in result + call_args = runner.session_store.switch_session.call_args + assert call_args[0][1] == "compressed_child" + runner.session_store.load_transcript.assert_called_with("compressed_child") + db.close() + @pytest.mark.asyncio async def test_resume_clears_running_agent(self, tmp_path): """Switching sessions clears any cached running agent.""" diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py index 59e9fa040..49fb91d44 100644 --- a/tests/gateway/test_run_progress_topics.py +++ b/tests/gateway/test_run_progress_topics.py @@ -58,6 +58,13 @@ class ProgressCaptureAdapter(BasePlatformAdapter): return {"id": chat_id} +class NonEditingProgressCaptureAdapter(ProgressCaptureAdapter): + SUPPORTS_MESSAGE_EDITING = False + + async def edit_message(self, chat_id, message_id, content) -> SendResult: + raise AssertionError("non-editable adapters should not receive edit_message calls") + + class FakeAgent: def __init__(self, **kwargs): self.tool_progress_callback = kwargs.get("tool_progress_callback") @@ -502,6 +509,7 @@ async def _run_with_agent( chat_id="-1001", chat_type="group", thread_id="17585", + adapter_cls=ProgressCaptureAdapter, ): if config_data: import yaml @@ -516,7 +524,7 @@ async def _run_with_agent( fake_run_agent.AIAgent = agent_cls monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) - adapter = ProgressCaptureAdapter(platform=platform) + adapter = adapter_cls(platform=platform) runner = _make_runner(adapter) gateway_run = importlib.import_module("gateway.run") if config_data and "streaming" in config_data: @@ -666,6 +674,26 @@ async def test_run_agent_interim_commentary_works_with_tool_progress_off(monkeyp assert any(call["content"] == "I'll inspect the repo first." for call in adapter.sent) +@pytest.mark.asyncio +async def test_run_agent_bluebubbles_uses_commentary_send_path_for_quick_replies(monkeypatch, tmp_path): + adapter, result = await _run_with_agent( + monkeypatch, + tmp_path, + CommentaryAgent, + session_id="sess-bluebubbles-commentary", + config_data={"display": {"interim_assistant_messages": True}}, + platform=Platform.BLUEBUBBLES, + chat_id="iMessage;-;user@example.com", + chat_type="dm", + thread_id=None, + adapter_cls=NonEditingProgressCaptureAdapter, + ) + + assert result.get("already_sent") is not True + assert [call["content"] for call in adapter.sent] == ["I'll inspect the repo first."] + assert adapter.edits == [] + + @pytest.mark.asyncio async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_path): adapter, result = await _run_with_agent( diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 539b12a5e..deeb55940 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -11,6 +11,8 @@ from gateway.session import ( build_session_context, build_session_context_prompt, build_session_key, + canonical_whatsapp_identifier, + normalize_whatsapp_identifier, ) @@ -183,6 +185,25 @@ class TestBuildSessionContextPrompt: assert "Telegram" in prompt assert "Home Chat" in prompt + def test_bluebubbles_prompt_mentions_short_conversational_i_message_format(self): + config = GatewayConfig( + platforms={ + Platform.BLUEBUBBLES: PlatformConfig(enabled=True, extra={"server_url": "http://localhost:1234", "password": "secret"}), + }, + ) + source = SessionSource( + platform=Platform.BLUEBUBBLES, + chat_id="iMessage;-;user@example.com", + chat_name="Ben", + chat_type="dm", + ) + ctx = build_session_context(source, config) + prompt = build_session_context_prompt(ctx) + + assert "responding via iMessage" in prompt + assert "short and conversational" in prompt + assert "blank line" in prompt + def test_discord_prompt(self): config = GatewayConfig( platforms={ @@ -626,9 +647,9 @@ class TestSessionStoreSwitchSession: db.close() -class TestWhatsAppDMSessionKeyConsistency: - """Regression: all session-key construction must go through build_session_key - so DMs are isolated by chat_id across platforms.""" +class TestWhatsAppSessionKeyConsistency: + """Regression: WhatsApp session keys must collapse JID/LID aliases to a + single stable identity for both DM chat_ids and group participant_ids.""" @pytest.fixture() def store(self, tmp_path): @@ -639,7 +660,7 @@ class TestWhatsAppDMSessionKeyConsistency: s._loaded = True return s - def test_whatsapp_dm_includes_chat_id(self): + def test_whatsapp_dm_uses_canonical_identifier(self): source = SessionSource( platform=Platform.WHATSAPP, chat_id="15551234567@s.whatsapp.net", @@ -647,7 +668,80 @@ class TestWhatsAppDMSessionKeyConsistency: user_name="Phone User", ) key = build_session_key(source) - assert key == "agent:main:whatsapp:dm:15551234567@s.whatsapp.net" + assert key == "agent:main:whatsapp:dm:15551234567" + + def test_whatsapp_dm_aliases_share_one_session_key(self, tmp_path, monkeypatch): + tmp_home = tmp_path / "hermes-home" + mapping_dir = tmp_home / "whatsapp" / "session" + mapping_dir.mkdir(parents=True, exist_ok=True) + (mapping_dir / "lid-mapping-999999999999999.json").write_text( + json.dumps("15551234567@s.whatsapp.net"), + encoding="utf-8", + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_home)) + + lid_source = SessionSource( + platform=Platform.WHATSAPP, + chat_id="999999999999999@lid", + chat_type="dm", + user_name="Phone User", + ) + phone_source = SessionSource( + platform=Platform.WHATSAPP, + chat_id="15551234567@s.whatsapp.net", + chat_type="dm", + user_name="Phone User", + ) + + assert build_session_key(lid_source) == "agent:main:whatsapp:dm:15551234567" + assert build_session_key(phone_source) == "agent:main:whatsapp:dm:15551234567" + + def test_whatsapp_group_participant_aliases_share_session_key(self, tmp_path, monkeypatch): + """With group_sessions_per_user, the same human flipping between + phone-JID and LID inside a group must not produce two isolated + per-user sessions.""" + tmp_home = tmp_path / "hermes-home" + mapping_dir = tmp_home / "whatsapp" / "session" + mapping_dir.mkdir(parents=True, exist_ok=True) + (mapping_dir / "lid-mapping-999999999999999.json").write_text( + json.dumps("15551234567@s.whatsapp.net"), + encoding="utf-8", + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_home)) + + lid_source = SessionSource( + platform=Platform.WHATSAPP, + chat_id="120363000000000000@g.us", + chat_type="group", + user_id="999999999999999@lid", + user_name="Group Member", + ) + phone_source = SessionSource( + platform=Platform.WHATSAPP, + chat_id="120363000000000000@g.us", + chat_type="group", + user_id="15551234567@s.whatsapp.net", + user_name="Group Member", + ) + + expected = "agent:main:whatsapp:group:120363000000000000@g.us:15551234567" + assert build_session_key(lid_source, group_sessions_per_user=True) == expected + assert build_session_key(phone_source, group_sessions_per_user=True) == expected + + def test_whatsapp_group_shared_sessions_untouched_by_canonicalisation(self): + """When group_sessions_per_user is False, participant_id is not in the + key at all, so canonicalisation is a no-op for this mode.""" + source = SessionSource( + platform=Platform.WHATSAPP, + chat_id="120363000000000000@g.us", + chat_type="group", + user_id="999999999999999@lid", + user_name="Group Member", + ) + assert ( + build_session_key(source, group_sessions_per_user=False) + == "agent:main:whatsapp:group:120363000000000000@g.us" + ) def test_store_delegates_to_build_session_key(self, store): """SessionStore._generate_session_key must produce the same result.""" @@ -866,6 +960,57 @@ class TestWhatsAppDMSessionKeyConsistency: assert key == "agent:main:telegram:dm:99:topic-1" +class TestWhatsAppIdentifierPublicHelpers: + """Contract tests for the public WhatsApp identifier helpers. + + These helpers are part of the public API for plugins that need + WhatsApp identity awareness. Breaking these contracts is a + breaking change for downstream plugins. + """ + + def test_normalize_strips_jid_suffix(self): + assert normalize_whatsapp_identifier("60123456789@s.whatsapp.net") == "60123456789" + + def test_normalize_strips_lid_suffix(self): + assert normalize_whatsapp_identifier("999999999999999@lid") == "999999999999999" + + def test_normalize_strips_device_suffix(self): + assert normalize_whatsapp_identifier("60123456789:47@s.whatsapp.net") == "60123456789" + + def test_normalize_strips_leading_plus(self): + assert normalize_whatsapp_identifier("+60123456789") == "60123456789" + + def test_normalize_handles_bare_numeric(self): + assert normalize_whatsapp_identifier("60123456789") == "60123456789" + + def test_normalize_handles_empty_and_none(self): + assert normalize_whatsapp_identifier("") == "" + assert normalize_whatsapp_identifier(None) == "" # type: ignore[arg-type] + + def test_canonical_without_mapping_returns_normalized(self, tmp_path, monkeypatch): + """With no bridge mapping files, the normalized input is returned.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + assert canonical_whatsapp_identifier("60123456789@lid") == "60123456789" + + def test_canonical_walks_lid_mapping(self, tmp_path, monkeypatch): + """LID is resolved to its paired phone identity via lid-mapping files.""" + mapping_dir = tmp_path / "whatsapp" / "session" + mapping_dir.mkdir(parents=True, exist_ok=True) + (mapping_dir / "lid-mapping-999999999999999.json").write_text( + json.dumps("15551234567@s.whatsapp.net"), + encoding="utf-8", + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + canonical = canonical_whatsapp_identifier("999999999999999@lid") + assert canonical == "15551234567" + assert canonical_whatsapp_identifier("15551234567@s.whatsapp.net") == "15551234567" + + def test_canonical_empty_input(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + assert canonical_whatsapp_identifier("") == "" + + class TestSessionStoreEntriesAttribute: """Regression: /reset must access _entries, not _sessions.""" diff --git a/tests/gateway/test_session_boundary_hooks.py b/tests/gateway/test_session_boundary_hooks.py index a55662436..52a5238cd 100644 --- a/tests/gateway/test_session_boundary_hooks.py +++ b/tests/gateway/test_session_boundary_hooks.py @@ -166,3 +166,80 @@ async def test_hook_error_does_not_break_reset(mock_invoke_hook): # Should still return a success message despite hook errors assert "Session reset" in result or "New session" in result + + +@pytest.mark.asyncio +@patch("hermes_cli.plugins.invoke_hook") +async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook): + """Regression test for #14981. + + When ``_session_expiry_watcher`` sweeps a session that has aged past + its reset policy (idle timeout, scheduled reset), it must fire + ``on_session_finalize`` so plugin providers get the same final-pass + extraction opportunity they'd get from /new or CLI shutdown. Before + the fix, the expiry path flushed memories and evicted the agent but + silently skipped the hook. + """ + from datetime import datetime, timedelta + + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) + runner._running = True + runner._running_agents = {} + runner._agent_cache = {} + runner._agent_cache_lock = None + runner._last_session_store_prune_ts = 0.0 + + session_key = "agent:main:telegram:dm:42" + expired_entry = SessionEntry( + session_key=session_key, + session_id="sess-expired", + created_at=datetime.now() - timedelta(hours=2), + updated_at=datetime.now() - timedelta(hours=2), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + expired_entry.memory_flushed = False + + runner.session_store = MagicMock() + runner.session_store._ensure_loaded = MagicMock() + runner.session_store._entries = {session_key: expired_entry} + runner.session_store._is_session_expired = MagicMock(return_value=True) + runner.session_store._lock = MagicMock() + runner.session_store._lock.__enter__ = MagicMock(return_value=None) + runner.session_store._lock.__exit__ = MagicMock(return_value=None) + runner.session_store._save = MagicMock() + + runner._async_flush_memories = AsyncMock() + runner._evict_cached_agent = MagicMock() + runner._cleanup_agent_resources = MagicMock() + runner._sweep_idle_cached_agents = MagicMock(return_value=0) + + # The watcher starts with `await asyncio.sleep(60)` and loops while + # `self._running`. Patch sleep so the 60s initial delay is instant, then + # flip `_running` false inside the flush call so the loop exits cleanly + # after one pass. + _orig_sleep = __import__("asyncio").sleep + + async def _fast_sleep(_): + await _orig_sleep(0) + + async def _flush_and_stop(session_id, key): + runner._running = False # terminate the loop after this iteration + + runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop) + + with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep): + await runner._session_expiry_watcher(interval=0) + + # Look for the finalize call targeting the expired session. + finalize_calls = [ + c for c in mock_invoke_hook.call_args_list + if c[0] and c[0][0] == "on_session_finalize" + ] + session_ids = {c[1].get("session_id") for c in finalize_calls} + assert "sess-expired" in session_ids, ( + f"on_session_finalize was not fired during idle expiry; " + f"got session_ids={session_ids} (regression of #14981)" + ) diff --git a/tests/gateway/test_telegram_network.py b/tests/gateway/test_telegram_network.py index ff74d4c66..be0abb57b 100644 --- a/tests/gateway/test_telegram_network.py +++ b/tests/gateway/test_telegram_network.py @@ -322,7 +322,7 @@ class TestFallbackTransportInit: seen_kwargs.append(kwargs.copy()) return FakeTransport([], {}) - for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY"): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"): monkeypatch.delenv(key, raising=False) monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory) @@ -333,6 +333,25 @@ class TestFallbackTransportInit: assert len(seen_kwargs) == 2 assert all(kwargs["proxy"] == "http://proxy.example:8080" for kwargs in seen_kwargs) + def test_no_proxy_bypasses_fallback_ip_cidr(self, monkeypatch): + seen_kwargs = [] + + def factory(**kwargs): + seen_kwargs.append(kwargs.copy()) + return FakeTransport([], {}) + + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080") + monkeypatch.setenv("NO_PROXY", "149.154.160.0/20") + monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory) + + transport = tnet.TelegramFallbackTransport(["149.154.167.220"]) + + assert transport._fallback_ips == ["149.154.167.220"] + assert len(seen_kwargs) == 2 + assert all("proxy" not in kwargs for kwargs in seen_kwargs) + class TestFallbackTransportClose: @pytest.mark.asyncio diff --git a/tests/gateway/test_unauthorized_dm_behavior.py b/tests/gateway/test_unauthorized_dm_behavior.py index 98e71442b..9571f3f4e 100644 --- a/tests/gateway/test_unauthorized_dm_behavior.py +++ b/tests/gateway/test_unauthorized_dm_behavior.py @@ -3,7 +3,6 @@ from unittest.mock import AsyncMock, MagicMock import pytest -import gateway.run as gateway_run from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.base import MessageEvent from gateway.session import SessionSource @@ -12,6 +11,7 @@ from gateway.session import SessionSource def _clear_auth_env(monkeypatch) -> None: for key in ( "TELEGRAM_ALLOWED_USERS", + "TELEGRAM_GROUP_ALLOWED_USERS", "DISCORD_ALLOWED_USERS", "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS", @@ -75,7 +75,7 @@ def _make_runner(platform: Platform, config: GatewayConfig): def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path): _clear_auth_env(monkeypatch) monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001") - monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) session_dir = tmp_path / "whatsapp" / "session" session_dir.mkdir(parents=True) @@ -178,6 +178,26 @@ def test_qq_group_allowlist_does_not_authorize_other_groups(monkeypatch): assert runner._is_user_authorized(source) is False +def test_telegram_group_allowlist_authorizes_forum_chat_without_user_allowlist(monkeypatch): + _clear_auth_env(monkeypatch) + monkeypatch.setenv("TELEGRAM_GROUP_ALLOWED_USERS", "-1001878443972") + + runner, _adapter = _make_runner( + Platform.TELEGRAM, + GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}), + ) + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="999", + chat_id="-1001878443972", + user_name="tester", + chat_type="forum", + ) + + assert runner._is_user_authorized(source) is True + + @pytest.mark.asyncio async def test_unauthorized_dm_pairs_by_default(monkeypatch): _clear_auth_env(monkeypatch) diff --git a/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py b/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py new file mode 100644 index 000000000..85055e108 --- /dev/null +++ b/tests/hermes_cli/test_anthropic_model_flow_stale_oauth.py @@ -0,0 +1,210 @@ +"""Tests for Bug #12905 fix — stale OAuth token detection in hermes model flow. + +Bug 3: `hermes model` with `provider=anthropic` skips OAuth re-authentication +when a stale ANTHROPIC_TOKEN exists in ~/.hermes/.env but no valid +Claude Code credentials are available. The fast-path silently proceeds to +model selection with a broken token instead of offering re-auth. +""" + +import json +import pytest +from unittest.mock import patch, MagicMock + +from hermes_cli.config import load_env, save_env_value + + +class TestStaleOAuthTokenDetection: + """Bug 3: stale OAuth token must trigger needs_auth=True in _model_flow_anthropic.""" + + def test_stale_oauth_token_triggers_reauth(self, tmp_path, monkeypatch, capsys): + """ + Scenario: ANTHROPIC_TOKEN is an expired OAuth token and there are no + valid Claude Code credentials anywhere. The flow MUST offer re-auth + instead of silently skipping to model selection. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + # Pre-load .env with an expired OAuth token (sk-ant- prefix = OAuth) + save_env_value("ANTHROPIC_TOKEN", "sk-ant-oat-ExpiredToken00000") + save_env_value("ANTHROPIC_API_KEY", "") + + # No valid Claude Code credentials available (expired, no refresh token) + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: { + "accessToken": "expired-cc-token", + "refreshToken": "", # No refresh — can't recover + "expiresAt": 0, # Already expired + "source": "claude_code_credentials_file", + }, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.is_claude_code_token_valid", + lambda creds: False, # Explicitly expired + ) + monkeypatch.setattr( + "agent.anthropic_adapter._is_oauth_token", + lambda key: key.startswith("sk-ant-"), + ) + # _resolve_claude_code_token_from_credentials has no valid path + monkeypatch.setattr( + "agent.anthropic_adapter._resolve_claude_code_token_from_credentials", + lambda creds=None: None, + ) + + # Simulate user types "3" (Cancel) when prompted for re-auth + monkeypatch.setattr("builtins.input", lambda _: "3") + monkeypatch.setattr("getpass.getpass", lambda _: "") + + from hermes_cli.main import _model_flow_anthropic + cfg = {} + + _model_flow_anthropic(cfg) + + output = capsys.readouterr().out + # Must show auth method choice since token is stale + assert "subscription" in output or "API key" in output, ( + f"Expected auth method menu but got: {output!r}" + ) + + def test_valid_api_key_skips_stale_check(self, tmp_path, monkeypatch, capsys): + """ + A non-OAuth ANTHROPIC_API_KEY (regular pay-per-token key) must NOT be + flagged as stale even when cc_creds are invalid. Regular API keys don't + expire the same way OAuth tokens do. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + # Regular API key — NOT an OAuth token + save_env_value("ANTHROPIC_API_KEY", "sk-ant-api03-RegularPayPerTokenKey") + save_env_value("ANTHROPIC_TOKEN", "") + + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: None, # No CC creds + ) + monkeypatch.setattr( + "agent.anthropic_adapter.is_claude_code_token_valid", + lambda creds: False, + ) + monkeypatch.setattr( + "agent.anthropic_adapter._is_oauth_token", + lambda key: key.startswith("sk-ant-") and "oat" in key, + ) + + # Simulate user picks "1" (use existing) + monkeypatch.setattr("builtins.input", lambda _: "1") + + from hermes_cli.main import _model_flow_anthropic + cfg = {} + + _model_flow_anthropic(cfg) + + output = capsys.readouterr().out + # Should show "Use existing credentials" menu, NOT auth method choice + assert "Use existing" in output or "credentials" in output.lower() + + def test_valid_oauth_token_with_refresh_available_skips_reauth(self, tmp_path, monkeypatch, capsys): + """ + When ANTHROPIC_TOKEN is OAuth and valid cc_creds with refresh exist, + the flow should use existing credentials (no forced re-auth). + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + save_env_value("ANTHROPIC_TOKEN", "sk-ant-oat-GoodOAuthToken") + save_env_value("ANTHROPIC_API_KEY", "") + + # Valid Claude Code credentials with refresh token + monkeypatch.setattr( + "agent.anthropic_adapter.read_claude_code_credentials", + lambda: { + "accessToken": "valid-cc-token", + "refreshToken": "valid-refresh", + "expiresAt": 9999999999999, + }, + ) + monkeypatch.setattr( + "agent.anthropic_adapter.is_claude_code_token_valid", + lambda creds: True, + ) + monkeypatch.setattr( + "agent.anthropic_adapter._is_oauth_token", + lambda key: key.startswith("sk-ant-"), + ) + monkeypatch.setattr( + "agent.anthropic_adapter._resolve_claude_code_token_from_credentials", + lambda creds=None: "valid-cc-token", + ) + + # Simulate user picks "1" (use existing) + monkeypatch.setattr("builtins.input", lambda _: "1") + + from hermes_cli.main import _model_flow_anthropic + cfg = {} + + _model_flow_anthropic(cfg) + + output = capsys.readouterr().out + # Should show "Use existing" without forcing re-auth + assert "Use existing" in output or "credentials" in output.lower() + + +class TestStaleOAuthGuardLogic: + """Unit-level test of the stale-OAuth detection guard logic.""" + + def test_stale_oauth_flag_logic_no_cc_creds(self): + """ + When existing_key is OAuth and cc_available is False, + existing_is_stale_oauth should be True → has_creds = False. + """ + existing_key = "sk-ant-oat-expiredtoken123" + _is_oauth_token = lambda k: k.startswith("sk-ant-") + cc_available = False + + existing_is_stale_oauth = ( + bool(existing_key) and + _is_oauth_token(existing_key) and + not cc_available + ) + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available + + assert existing_is_stale_oauth is True + assert has_creds is False + + def test_stale_oauth_flag_logic_with_valid_cc_creds(self): + """ + When existing_key is OAuth but cc_available is True (valid creds exist), + has_creds should be True — the cc_creds will be used instead. + """ + existing_key = "sk-ant-oat-sometoken" + _is_oauth_token = lambda k: k.startswith("sk-ant-") + cc_available = True + + existing_is_stale_oauth = ( + bool(existing_key) and + _is_oauth_token(existing_key) and + not cc_available + ) + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available + + assert existing_is_stale_oauth is False + assert has_creds is True + + def test_non_oauth_key_not_flagged_as_stale(self): + """ + Regular ANTHROPIC_API_KEY (non-OAuth) must not be flagged as stale + even when cc_available is False. + """ + existing_key = "sk-ant-api03-regular-key" + _is_oauth_token = lambda k: k.startswith("sk-ant-") and "oat" in k + cc_available = False + + existing_is_stale_oauth = ( + bool(existing_key) and + _is_oauth_token(existing_key) and + not cc_available + ) + has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available + + assert existing_is_stale_oauth is False + assert has_creds is True diff --git a/tests/hermes_cli/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py index ddcaf1721..ad5ce40f3 100644 --- a/tests/hermes_cli/test_auth_codex_provider.py +++ b/tests/hermes_cli/test_auth_codex_provider.py @@ -4,6 +4,7 @@ import json import time import base64 from pathlib import Path +from types import SimpleNamespace import pytest import yaml @@ -15,8 +16,10 @@ from hermes_cli.auth import ( _read_codex_tokens, _save_codex_tokens, _import_codex_cli_tokens, + _login_openai_codex, get_codex_auth_status, get_provider_auth_state, + refresh_codex_oauth_pure, resolve_codex_runtime_credentials, resolve_provider, ) @@ -190,3 +193,161 @@ def test_resolve_returns_hermes_auth_store_source(tmp_path, monkeypatch): assert creds["source"] == "hermes-auth-store" assert creds["provider"] == "openai-codex" assert creds["base_url"] == DEFAULT_CODEX_BASE_URL + + +class _StubHTTPResponse: + def __init__(self, status_code: int, payload): + self.status_code = status_code + self._payload = payload + self.text = json.dumps(payload) if isinstance(payload, (dict, list)) else str(payload) + + def json(self): + if isinstance(self._payload, Exception): + raise self._payload + return self._payload + + +class _StubHTTPClient: + def __init__(self, response): + self._response = response + + def __enter__(self): + return self + + def __exit__(self, *args): + return False + + def post(self, *args, **kwargs): + return self._response + + +def _patch_httpx(monkeypatch, response): + def _factory(*args, **kwargs): + return _StubHTTPClient(response) + + monkeypatch.setattr("hermes_cli.auth.httpx.Client", _factory) + + +def test_refresh_parses_openai_nested_error_shape_refresh_token_reused(monkeypatch): + """OpenAI returns {"error": {"code": "refresh_token_reused", "message": "..."}} + — parser must surface relogin_required and the dedicated message. + """ + response = _StubHTTPResponse( + 401, + { + "error": { + "message": "Your refresh token has already been used to generate a new access token. Please try signing in again.", + "type": "invalid_request_error", + "param": None, + "code": "refresh_token_reused", + } + }, + ) + _patch_httpx(monkeypatch, response) + + with pytest.raises(AuthError) as exc_info: + refresh_codex_oauth_pure("a-tok", "r-tok") + + err = exc_info.value + assert err.code == "refresh_token_reused" + assert err.relogin_required is True + # The existing dedicated branch should override the message with actionable guidance. + assert "already consumed by another client" in str(err) + + +def test_refresh_parses_openai_nested_error_shape_generic_code(monkeypatch): + """Nested error with arbitrary code still surfaces code + message.""" + response = _StubHTTPResponse( + 400, + { + "error": { + "message": "Invalid client credentials.", + "type": "invalid_request_error", + "code": "invalid_client", + } + }, + ) + _patch_httpx(monkeypatch, response) + + with pytest.raises(AuthError) as exc_info: + refresh_codex_oauth_pure("a-tok", "r-tok") + + err = exc_info.value + assert err.code == "invalid_client" + assert "Invalid client credentials." in str(err) + + +def test_refresh_parses_oauth_spec_flat_error_shape_invalid_grant(monkeypatch): + """Fallback path: OAuth spec-shape {"error": "invalid_grant", "error_description": "..."} + must still map to relogin_required=True via the existing code set. + """ + response = _StubHTTPResponse( + 400, + { + "error": "invalid_grant", + "error_description": "Refresh token is expired or revoked.", + }, + ) + _patch_httpx(monkeypatch, response) + + with pytest.raises(AuthError) as exc_info: + refresh_codex_oauth_pure("a-tok", "r-tok") + + err = exc_info.value + assert err.code == "invalid_grant" + assert err.relogin_required is True + assert "Refresh token is expired or revoked." in str(err) + + +def test_refresh_falls_back_to_generic_message_on_unparseable_body(monkeypatch): + """No JSON body → generic 'with status 401' message; 401 always forces relogin.""" + response = _StubHTTPResponse(401, ValueError("not json")) + _patch_httpx(monkeypatch, response) + + with pytest.raises(AuthError) as exc_info: + refresh_codex_oauth_pure("a-tok", "r-tok") + + err = exc_info.value + assert err.code == "codex_refresh_failed" + # 401/403 from the token endpoint always means the refresh token is + # invalid/expired — force relogin even without a parseable error body. + assert err.relogin_required is True + assert "status 401" in str(err) + + +def test_login_openai_codex_force_new_login_skips_existing_reuse_prompt(monkeypatch): + called = {"device_login": 0} + + monkeypatch.setattr( + "hermes_cli.auth.resolve_codex_runtime_credentials", + lambda: {"base_url": DEFAULT_CODEX_BASE_URL}, + ) + monkeypatch.setattr( + "hermes_cli.auth._import_codex_cli_tokens", + lambda: {"access_token": "cli-at", "refresh_token": "cli-rt"}, + ) + monkeypatch.setattr( + "hermes_cli.auth._codex_device_code_login", + lambda: { + "tokens": {"access_token": "fresh-at", "refresh_token": "fresh-rt"}, + "last_refresh": "2026-04-01T00:00:00Z", + "base_url": DEFAULT_CODEX_BASE_URL, + }, + ) + + def _fake_save(tokens, last_refresh=None): + called["device_login"] += 1 + called["tokens"] = dict(tokens) + called["last_refresh"] = last_refresh + + monkeypatch.setattr("hermes_cli.auth._save_codex_tokens", _fake_save) + monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda *args, **kwargs: "/tmp/config.yaml") + monkeypatch.setattr( + "builtins.input", + lambda prompt="": (_ for _ in ()).throw(AssertionError("force_new_login should not prompt for reuse/import")), + ) + + _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"], force_new_login=True) + + assert called["device_login"] == 1 + assert called["tokens"]["access_token"] == "fresh-at" diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py index fb749b6ae..23602c9f0 100644 --- a/tests/hermes_cli/test_auth_commands.py +++ b/tests/hermes_cli/test_auth_commands.py @@ -504,6 +504,91 @@ def test_clear_provider_auth_removes_provider_pool_entries(tmp_path, monkeypatch assert "openrouter" in payload.get("credential_pool", {}) +def test_logout_resets_codex_config_when_auth_state_already_cleared(tmp_path, monkeypatch, capsys): + """`hermes logout --provider openai-codex` must still clear model.provider. + + Users can end up with auth.json already cleared but config.yaml still set to + openai-codex. Previously logout reported no auth state and left the agent + pinned to the Codex provider. + """ + hermes_home = tmp_path / "hermes" + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + _write_auth_store(tmp_path, {"version": 1, "providers": {}, "credential_pool": {}}) + (hermes_home / "config.yaml").write_text( + "model:\n" + " default: gpt-5.3-codex\n" + " provider: openai-codex\n" + " base_url: https://chatgpt.com/backend-api/codex\n" + ) + + from types import SimpleNamespace + from hermes_cli.auth import logout_command + + logout_command(SimpleNamespace(provider="openai-codex")) + + out = capsys.readouterr().out + assert "Logged out of OpenAI Codex." in out + config_text = (hermes_home / "config.yaml").read_text() + assert "provider: auto" in config_text + assert "base_url: https://openrouter.ai/api/v1" in config_text + + +def test_logout_defaults_to_configured_codex_when_no_active_provider(tmp_path, monkeypatch, capsys): + """Bare `hermes logout` should target configured Codex if auth has no active provider.""" + hermes_home = tmp_path / "hermes" + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + _write_auth_store(tmp_path, {"version": 1, "providers": {}, "credential_pool": {}}) + (hermes_home / "config.yaml").write_text( + "model:\n" + " default: gpt-5.3-codex\n" + " provider: openai-codex\n" + " base_url: https://chatgpt.com/backend-api/codex\n" + ) + + from types import SimpleNamespace + from hermes_cli.auth import logout_command + + logout_command(SimpleNamespace(provider=None)) + + out = capsys.readouterr().out + assert "Logged out of OpenAI Codex." in out + config_text = (hermes_home / "config.yaml").read_text() + assert "provider: auto" in config_text + + +def test_logout_clears_stale_active_codex_without_provider_credentials(tmp_path, monkeypatch, capsys): + """Logout must clear active_provider even when provider credential payloads are gone.""" + hermes_home = tmp_path / "hermes" + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + _write_auth_store( + tmp_path, + { + "version": 1, + "active_provider": "openai-codex", + "providers": {}, + "credential_pool": {}, + }, + ) + (hermes_home / "config.yaml").write_text( + "model:\n" + " default: gpt-5.3-codex\n" + " provider: openai-codex\n" + " base_url: https://chatgpt.com/backend-api/codex\n" + ) + + from types import SimpleNamespace + from hermes_cli.auth import logout_command + + logout_command(SimpleNamespace(provider=None)) + + out = capsys.readouterr().out + assert "Logged out of OpenAI Codex." in out + auth_payload = json.loads((hermes_home / "auth.json").read_text()) + assert auth_payload.get("active_provider") is None + config_text = (hermes_home / "config.yaml").read_text() + assert "provider: auto" in config_text + + def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys): from hermes_cli.auth_commands import auth_list_command @@ -569,10 +654,45 @@ def test_auth_list_shows_exhausted_cooldown(monkeypatch, capsys): auth_list_command(_Args()) out = capsys.readouterr().out - assert "exhausted (429)" in out + assert "rate-limited (429)" in out assert "59m 30s left" in out +def test_auth_list_shows_auth_failure_when_exhausted_entry_is_unauthorized(monkeypatch, capsys): + from hermes_cli.auth_commands import auth_list_command + + class _Entry: + id = "cred-1" + label = "primary" + auth_type = "oauth" + source = "manual:device_code" + last_status = "exhausted" + last_error_code = 401 + last_error_reason = "invalid_token" + last_error_message = "Access token expired or revoked." + last_status_at = 1000.0 + + class _Pool: + def entries(self): + return [_Entry()] + + def peek(self): + return None + + monkeypatch.setattr("hermes_cli.auth_commands.load_pool", lambda provider: _Pool()) + monkeypatch.setattr("hermes_cli.auth_commands.time.time", lambda: 1030.0) + + class _Args: + provider = "openai-codex" + + auth_list_command(_Args()) + + out = capsys.readouterr().out + assert "auth failed invalid_token (401)" in out + assert "re-auth may be required" in out + assert "left" not in out + + def test_auth_list_prefers_explicit_reset_time(monkeypatch, capsys): from hermes_cli.auth_commands import auth_list_command diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index b6d70a26f..75221b16a 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -19,6 +19,12 @@ from hermes_cli.auth import AuthError, get_provider_auth_state, resolve_nous_run class TestResolveVerifyFallback: """Verify _resolve_verify falls back to True when CA bundle path doesn't exist.""" + @pytest.fixture(autouse=True) + def _pin_platform_to_linux(self, monkeypatch): + """Pin sys.platform so the macOS certifi fallback doesn't alter the + generic "default trust" return value asserted by these tests.""" + monkeypatch.setattr("sys.platform", "linux") + def test_missing_ca_bundle_in_auth_state_falls_back(self): from hermes_cli.auth import _resolve_verify @@ -192,12 +198,82 @@ def test_get_nous_auth_status_auth_store_fallback(tmp_path, monkeypatch): hermes_home = tmp_path / "hermes" _setup_nous_auth(hermes_home, access_token="at-123") monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda min_key_ttl_seconds=60: { + "base_url": "https://inference.example.com/v1", + "expires_at": "2099-01-01T00:00:00+00:00", + "key_id": "key-1", + "source": "cache", + }, + ) status = get_nous_auth_status() assert status["logged_in"] is True assert status["portal_base_url"] == "https://portal.example.com" +def test_get_nous_auth_status_prefers_runtime_auth_store_over_stale_pool(tmp_path, monkeypatch): + from hermes_cli.auth import get_nous_auth_status + from agent.credential_pool import PooledCredential, load_pool + + hermes_home = tmp_path / "hermes" + _setup_nous_auth(hermes_home, access_token="at-fresh") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + pool = load_pool("nous") + stale = PooledCredential.from_dict("nous", { + "access_token": "at-stale", + "refresh_token": "rt-stale", + "portal_base_url": "https://portal.stale.example.com", + "inference_base_url": "https://inference.stale.example.com/v1", + "agent_key": "agent-stale", + "agent_key_expires_at": "2020-01-01T00:00:00+00:00", + "expires_at": "2020-01-01T00:00:00+00:00", + "label": "dashboard device_code", + "auth_type": "oauth", + "source": "manual:dashboard_device_code", + "base_url": "https://inference.stale.example.com/v1", + "priority": 0, + }) + pool.add_entry(stale) + + monkeypatch.setattr( + "hermes_cli.auth.resolve_nous_runtime_credentials", + lambda min_key_ttl_seconds=60: { + "base_url": "https://inference.example.com/v1", + "expires_at": "2099-01-01T00:00:00+00:00", + "key_id": "key-fresh", + "source": "portal", + }, + ) + + status = get_nous_auth_status() + assert status["logged_in"] is True + assert status["portal_base_url"] == "https://portal.example.com" + assert status["inference_base_url"] == "https://inference.example.com/v1" + assert status["source"] == "runtime:portal" + + +def test_get_nous_auth_status_reports_revoked_refresh_session(tmp_path, monkeypatch): + from hermes_cli.auth import get_nous_auth_status + + hermes_home = tmp_path / "hermes" + _setup_nous_auth(hermes_home, access_token="at-123") + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + def _boom(min_key_ttl_seconds=60): + raise AuthError("Refresh session has been revoked", provider="nous", relogin_required=True) + + monkeypatch.setattr("hermes_cli.auth.resolve_nous_runtime_credentials", _boom) + + status = get_nous_auth_status() + assert status["logged_in"] is False + assert status["relogin_required"] is True + assert "revoked" in status["error"].lower() + assert status["portal_base_url"] == "https://portal.example.com" + + def test_get_nous_auth_status_empty_returns_not_logged_in(tmp_path, monkeypatch): """get_nous_auth_status() returns logged_in=False when both pool and auth store are empty. @@ -726,3 +802,83 @@ def test_persist_nous_credentials_no_label_uses_auto_derived(tmp_path, monkeypat # No "label" key embedded in providers.nous when the caller didn't supply one. payload = json.loads((hermes_home / "auth.json").read_text()) assert "label" not in payload["providers"]["nous"] + + +def test_refresh_token_reuse_detection_surfaces_actionable_message(): + """Regression for #15099. + + When the Nous Portal server returns ``invalid_grant`` with + ``error_description`` containing "reuse detected", Hermes must surface an + actionable message explaining that an external process consumed the + refresh token. The default opaque "Refresh token reuse detected; please + re-authenticate" string led users to report this as a Hermes persistence + bug when the true cause is external RT consumption (monitoring scripts, + custom self-heal hooks). + """ + from hermes_cli.auth import _refresh_access_token + + class _FakeResponse: + status_code = 400 + + def json(self): + return { + "error": "invalid_grant", + "error_description": "Refresh token reuse detected; please re-authenticate", + } + + class _FakeClient: + def post(self, *args, **kwargs): + return _FakeResponse() + + with pytest.raises(AuthError) as exc_info: + _refresh_access_token( + client=_FakeClient(), + portal_base_url="https://portal.nousresearch.com", + client_id="hermes-cli", + refresh_token="rt_consumed_elsewhere", + ) + + message = str(exc_info.value) + assert "refresh-token reuse" in message.lower() or "refresh token reuse" in message.lower() + # The message must mention the external-process cause and give next steps. + assert "external process" in message.lower() or "monitoring script" in message.lower() + assert "hermes auth add nous" in message.lower() + # Must still be classified as invalid_grant + relogin_required. + assert exc_info.value.code == "invalid_grant" + assert exc_info.value.relogin_required is True + + +def test_refresh_non_reuse_error_keeps_original_description(): + """Non-reuse invalid_grant errors must keep their original description untouched. + + Only the "reuse detected" signature should trigger the actionable message; + generic ``invalid_grant: Refresh session has been revoked`` (the + downstream consequence) keeps its original text so we don't overwrite + useful server context for unrelated failure modes. + """ + from hermes_cli.auth import _refresh_access_token + + class _FakeResponse: + status_code = 400 + + def json(self): + return { + "error": "invalid_grant", + "error_description": "Refresh session has been revoked", + } + + class _FakeClient: + def post(self, *args, **kwargs): + return _FakeResponse() + + with pytest.raises(AuthError) as exc_info: + _refresh_access_token( + client=_FakeClient(), + portal_base_url="https://portal.nousresearch.com", + client_id="hermes-cli", + refresh_token="rt_anything", + ) + + assert "Refresh session has been revoked" in str(exc_info.value) + # Must not have been rewritten with the reuse message. + assert "external process" not in str(exc_info.value).lower() diff --git a/tests/hermes_cli/test_auth_ssl_macos.py b/tests/hermes_cli/test_auth_ssl_macos.py new file mode 100644 index 000000000..a6ebb3168 --- /dev/null +++ b/tests/hermes_cli/test_auth_ssl_macos.py @@ -0,0 +1,115 @@ +"""Tests for hermes_cli.auth._default_verify platform-aware fallback. + +On macOS with Homebrew Python, the system OpenSSL cannot locate the +system trust store, so we explicitly load certifi's bundle. On other +platforms we defer to httpx's own default (which itself uses certifi). + +Most tests use monkeypatching — no real SSL handshakes. A handful use +an openssl-generated self-signed cert via the `real_bundle_file` +fixture because `ssl.create_default_context(cafile=...)` parses the +bundle and refuses stubs. +""" + +import os +import shutil +import ssl +import subprocess +import sys +from pathlib import Path + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from hermes_cli.auth import _default_verify, _resolve_verify + + +@pytest.fixture +def real_bundle_file(tmp_path: Path) -> str: + """Return a path to a real openssl-generated self-signed cert. + + Skips the test when the `openssl` binary isn't on PATH, so CI images + without it degrade gracefully instead of erroring out. + """ + if shutil.which("openssl") is None: + pytest.skip("openssl binary not available") + cert = tmp_path / "ca.pem" + key = tmp_path / "key.pem" + result = subprocess.run( + [ + "openssl", "req", "-x509", "-newkey", "rsa:2048", + "-keyout", str(key), "-out", str(cert), + "-sha256", "-days", "1", "-nodes", + "-subj", "/CN=test", + ], + capture_output=True, + timeout=10, + ) + if result.returncode != 0: + pytest.skip(f"openssl failed: {result.stderr.decode('utf-8', 'ignore')[:200]}") + return str(cert) + + +class TestDefaultVerify: + def test_returns_ssl_context_on_darwin(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "darwin") + result = _default_verify() + assert isinstance(result, ssl.SSLContext) + + def test_returns_true_on_linux(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "linux") + assert _default_verify() is True + + def test_returns_true_on_windows(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "win32") + assert _default_verify() is True + + def test_darwin_falls_back_to_true_when_certifi_missing(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "darwin") + + real_import = __import__ + + def fake_import(name, *args, **kwargs): + if name == "certifi": + raise ImportError("simulated missing certifi") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr("builtins.__import__", fake_import) + assert _default_verify() is True + + +class TestResolveVerifyIntegration: + """_resolve_verify should defer to _default_verify in the no-CA path.""" + + def test_no_ca_uses_default_verify_on_darwin(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "darwin") + for var in ("HERMES_CA_BUNDLE", "SSL_CERT_FILE", "REQUESTS_CA_BUNDLE"): + monkeypatch.delenv(var, raising=False) + result = _resolve_verify() + assert isinstance(result, ssl.SSLContext) + + def test_no_ca_uses_default_verify_on_linux(self, monkeypatch): + monkeypatch.setattr(sys, "platform", "linux") + for var in ("HERMES_CA_BUNDLE", "SSL_CERT_FILE", "REQUESTS_CA_BUNDLE"): + monkeypatch.delenv(var, raising=False) + assert _resolve_verify() is True + + def test_requests_ca_bundle_respected(self, monkeypatch, real_bundle_file): + for var in ("HERMES_CA_BUNDLE", "SSL_CERT_FILE"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("REQUESTS_CA_BUNDLE", real_bundle_file) + result = _resolve_verify() + assert isinstance(result, ssl.SSLContext) + + def test_missing_ca_path_falls_back_to_default_verify(self, monkeypatch, tmp_path): + monkeypatch.setattr(sys, "platform", "linux") + monkeypatch.setenv("HERMES_CA_BUNDLE", str(tmp_path / "missing.pem")) + for var in ("SSL_CERT_FILE", "REQUESTS_CA_BUNDLE"): + monkeypatch.delenv(var, raising=False) + assert _resolve_verify() is True + + def test_insecure_wins_over_everything(self, monkeypatch, tmp_path): + bundle = tmp_path / "ca.pem" + bundle.write_text("stub") + monkeypatch.setenv("HERMES_CA_BUNDLE", str(bundle)) + assert _resolve_verify(insecure=True) is False diff --git a/tests/hermes_cli/test_banner.py b/tests/hermes_cli/test_banner.py index 4ea089fd0..9945c78c4 100644 --- a/tests/hermes_cli/test_banner.py +++ b/tests/hermes_cli/test_banner.py @@ -68,3 +68,68 @@ def test_build_welcome_banner_uses_normalized_toolset_names(): assert "homeassistant_tools:" not in output assert "honcho_tools:" not in output assert "web_tools:" not in output + + +def test_build_welcome_banner_title_is_hyperlinked_to_release(): + """Panel title (version label) is wrapped in an OSC-8 hyperlink to the GitHub release.""" + import io + from unittest.mock import patch as _patch + import hermes_cli.banner as _banner + import model_tools as _mt + import tools.mcp_tool as _mcp + + _banner._latest_release_cache = None + tag_url = ("v2026.4.23", "https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.23") + + buf = io.StringIO() + with ( + _patch.object(_mt, "check_tool_availability", return_value=(["web"], [])), + _patch.object(_banner, "get_available_skills", return_value={}), + _patch.object(_banner, "get_update_result", return_value=None), + _patch.object(_mcp, "get_mcp_status", return_value=[]), + _patch.object(_banner, "get_latest_release_tag", return_value=tag_url), + ): + console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160) + _banner.build_welcome_banner( + console=console, model="x", cwd="/tmp", + session_id="abc123", + tools=[{"function": {"name": "read_file"}}], + get_toolset_for_tool=lambda n: "file", + ) + + raw = buf.getvalue() + # The existing version label must still be present in the title + assert "Hermes Agent v" in raw, "Version label missing from title" + # OSC-8 hyperlink escape sequence present with the release URL + assert "\x1b]8;" in raw, "OSC-8 hyperlink not emitted" + assert "releases/tag/v2026.4.23" in raw, "Release URL missing from banner output" + + +def test_build_welcome_banner_title_falls_back_when_no_tag(): + """Without a resolvable tag, the panel title renders as plain text (no hyperlink escape).""" + import io + from unittest.mock import patch as _patch + import hermes_cli.banner as _banner + import model_tools as _mt + import tools.mcp_tool as _mcp + + _banner._latest_release_cache = None + buf = io.StringIO() + with ( + _patch.object(_mt, "check_tool_availability", return_value=(["web"], [])), + _patch.object(_banner, "get_available_skills", return_value={}), + _patch.object(_banner, "get_update_result", return_value=None), + _patch.object(_mcp, "get_mcp_status", return_value=[]), + _patch.object(_banner, "get_latest_release_tag", return_value=None), + ): + console = Console(file=buf, force_terminal=True, color_system="truecolor", width=160) + _banner.build_welcome_banner( + console=console, model="x", cwd="/tmp", + session_id="abc123", + tools=[{"function": {"name": "read_file"}}], + get_toolset_for_tool=lambda n: "file", + ) + + raw = buf.getvalue() + assert "Hermes Agent v" in raw, "Version label missing from title" + assert "\x1b]8;" not in raw, "OSC-8 hyperlink should not be emitted without a tag" diff --git a/tests/hermes_cli/test_codex_models.py b/tests/hermes_cli/test_codex_models.py index cffce2a0e..949d1c8e2 100644 --- a/tests/hermes_cli/test_codex_models.py +++ b/tests/hermes_cli/test_codex_models.py @@ -72,7 +72,9 @@ def test_model_command_uses_runtime_access_token_for_codex_list(monkeypatch): from hermes_cli.main import _model_flow_openai_codex captured = {} + choices = iter(["1"]) + monkeypatch.setattr("builtins.input", lambda prompt="": next(choices)) monkeypatch.setattr( "hermes_cli.auth.get_codex_auth_status", lambda: {"logged_in": True}, @@ -107,6 +109,83 @@ def test_model_command_uses_runtime_access_token_for_codex_list(monkeypatch): assert captured["current_model"] == "openai/gpt-5.4" +def test_model_command_prompts_to_reuse_or_reauthenticate_codex_session(monkeypatch, capsys): + from hermes_cli.main import _model_flow_openai_codex + + captured = {"login_calls": 0} + choices = iter(["2"]) + + monkeypatch.setattr("builtins.input", lambda prompt="": next(choices)) + monkeypatch.setattr( + "hermes_cli.auth.get_codex_auth_status", + lambda: {"logged_in": True, "source": "hermes-auth-store"}, + ) + monkeypatch.setattr( + "hermes_cli.auth.resolve_codex_runtime_credentials", + lambda *args, **kwargs: {"api_key": "fresh-codex-token"}, + ) + + def _fake_login(*args, force_new_login=False, **kwargs): + captured["login_calls"] += 1 + captured["force_new_login"] = force_new_login + + monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_login) + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + lambda access_token=None: ["gpt-5.4", "gpt-5.3-codex"], + ) + monkeypatch.setattr( + "hermes_cli.auth._prompt_model_selection", + lambda model_ids, current_model="": None, + ) + + _model_flow_openai_codex({}, current_model="gpt-5.4") + + out = capsys.readouterr().out + assert "Use existing credentials" in out + assert "Reauthenticate (new OAuth login)" in out + assert captured["login_calls"] == 1 + assert captured["force_new_login"] is True + + +def test_model_command_uses_existing_codex_session_without_relogin(monkeypatch): + from hermes_cli.main import _model_flow_openai_codex + + choices = iter(["1"]) + captured = {} + + monkeypatch.setattr("builtins.input", lambda prompt="": next(choices)) + monkeypatch.setattr( + "hermes_cli.auth.get_codex_auth_status", + lambda: {"logged_in": True, "source": "hermes-auth-store"}, + ) + monkeypatch.setattr( + "hermes_cli.auth.resolve_codex_runtime_credentials", + lambda *args, **kwargs: {"api_key": "existing-codex-token"}, + ) + + def _fake_get_codex_model_ids(access_token=None): + captured["access_token"] = access_token + return ["gpt-5.4"] + + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + _fake_get_codex_model_ids, + ) + monkeypatch.setattr( + "hermes_cli.auth._prompt_model_selection", + lambda model_ids, current_model="": None, + ) + monkeypatch.setattr( + "hermes_cli.auth._login_openai_codex", + lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("should not reauthenticate")), + ) + + _model_flow_openai_codex({}, current_model="gpt-5.4") + + assert captured["access_token"] == "existing-codex-token" + + # ── Tests for _normalize_model_for_provider ────────────────────────── diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index a27f99661..d77a076eb 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -189,11 +189,14 @@ class TestGatewayHelpLines: assert len(lines) > 10 def test_excludes_cli_only_commands_without_config_gate(self): + import re lines = gateway_help_lines() joined = "\n".join(lines) for cmd in COMMAND_REGISTRY: if cmd.cli_only and not cmd.gateway_config_gate: - assert f"`/{cmd.name}" not in joined, \ + # Word-boundary match so `/reload` doesn't match `/reload-mcp` + pattern = rf'`/{re.escape(cmd.name)}(?![-_\w])' + assert not re.search(pattern, joined), \ f"cli_only command /{cmd.name} should not be in gateway help" def test_includes_alias_note_for_bg(self): diff --git a/tests/hermes_cli/test_copilot_context.py b/tests/hermes_cli/test_copilot_context.py new file mode 100644 index 000000000..cb2404897 --- /dev/null +++ b/tests/hermes_cli/test_copilot_context.py @@ -0,0 +1,134 @@ +"""Tests for Copilot live /models context-window resolution.""" + +from __future__ import annotations + +import time +from unittest.mock import patch + +import pytest + +from hermes_cli.models import get_copilot_model_context + + +# Sample catalog items mimicking the Copilot /models API response +_SAMPLE_CATALOG = [ + { + "id": "claude-opus-4.6-1m", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 1000000, "max_output_tokens": 64000}, + }, + }, + { + "id": "gpt-4.1", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 128000, "max_output_tokens": 32768}, + }, + }, + { + "id": "claude-sonnet-4", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 200000, "max_output_tokens": 64000}, + }, + }, + { + "id": "model-without-limits", + "capabilities": {"type": "chat"}, + }, + { + "id": "model-zero-limit", + "capabilities": { + "type": "chat", + "limits": {"max_prompt_tokens": 0}, + }, + }, +] + + +@pytest.fixture(autouse=True) +def _clear_cache(): + """Reset module-level cache before each test.""" + import hermes_cli.models as mod + + mod._copilot_context_cache = {} + mod._copilot_context_cache_time = 0.0 + yield + mod._copilot_context_cache = {} + mod._copilot_context_cache_time = 0.0 + + +class TestGetCopilotModelContext: + """Tests for get_copilot_model_context().""" + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_returns_max_prompt_tokens(self, mock_fetch): + assert get_copilot_model_context("claude-opus-4.6-1m") == 1_000_000 + assert get_copilot_model_context("gpt-4.1") == 128_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_returns_none_for_unknown_model(self, mock_fetch): + assert get_copilot_model_context("nonexistent-model") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_skips_models_without_limits(self, mock_fetch): + assert get_copilot_model_context("model-without-limits") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_skips_zero_limit(self, mock_fetch): + assert get_copilot_model_context("model-zero-limit") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_caches_results(self, mock_fetch): + get_copilot_model_context("gpt-4.1") + get_copilot_model_context("claude-sonnet-4") + # Only one API call despite two lookups + assert mock_fetch.call_count == 1 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_cache_expires(self, mock_fetch): + import hermes_cli.models as mod + + get_copilot_model_context("gpt-4.1") + assert mock_fetch.call_count == 1 + + # Expire the cache + mod._copilot_context_cache_time = time.time() - 7200 + get_copilot_model_context("gpt-4.1") + assert mock_fetch.call_count == 2 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=None) + def test_returns_none_when_catalog_unavailable(self, mock_fetch): + assert get_copilot_model_context("gpt-4.1") is None + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=[]) + def test_returns_none_for_empty_catalog(self, mock_fetch): + assert get_copilot_model_context("gpt-4.1") is None + + +class TestModelMetadataCopilotIntegration: + """Test that get_model_context_length() uses Copilot live API for copilot provider.""" + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_copilot_provider_uses_live_api(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + ctx = get_model_context_length("claude-opus-4.6-1m", provider="copilot") + assert ctx == 1_000_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=_SAMPLE_CATALOG) + def test_copilot_acp_provider_uses_live_api(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + ctx = get_model_context_length("claude-sonnet-4", provider="copilot-acp") + assert ctx == 200_000 + + @patch("hermes_cli.models.fetch_github_model_catalog", return_value=None) + def test_falls_through_when_catalog_unavailable(self, mock_fetch): + from agent.model_metadata import get_model_context_length + + # Should not raise, should fall through to models.dev or defaults + ctx = get_model_context_length("gpt-4.1", provider="copilot") + assert isinstance(ctx, int) + assert ctx > 0 diff --git a/tests/hermes_cli/test_copilot_in_model_list.py b/tests/hermes_cli/test_copilot_in_model_list.py new file mode 100644 index 000000000..e414687bc --- /dev/null +++ b/tests/hermes_cli/test_copilot_in_model_list.py @@ -0,0 +1,41 @@ +"""Tests for GitHub Copilot entries shown in the /model picker.""" + +import os +from unittest.mock import patch + +from hermes_cli.model_switch import list_authenticated_providers + + +@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False) +def test_copilot_picker_keeps_curated_copilot_models_when_live_catalog_unavailable(): + with patch("agent.models_dev.fetch_models_dev", return_value={}), \ + patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \ + patch("hermes_cli.models._fetch_github_models", return_value=None): + providers = list_authenticated_providers(current_provider="openrouter", max_models=50) + + copilot = next((p for p in providers if p["slug"] == "copilot"), None) + + assert copilot is not None + assert "gpt-5.4" in copilot["models"] + assert "claude-sonnet-4.6" in copilot["models"] + assert "claude-sonnet-4" in copilot["models"] + assert "claude-sonnet-4.5" in copilot["models"] + assert "claude-haiku-4.5" in copilot["models"] + assert "gemini-3.1-pro-preview" in copilot["models"] + assert "claude-opus-4.6" not in copilot["models"] + + +@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False) +def test_copilot_picker_uses_live_catalog_when_available(): + live_models = ["gpt-5.4", "claude-sonnet-4.6", "gemini-3.1-pro-preview"] + + with patch("agent.models_dev.fetch_models_dev", return_value={}), \ + patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \ + patch("hermes_cli.models._fetch_github_models", return_value=live_models): + providers = list_authenticated_providers(current_provider="openrouter", max_models=50) + + copilot = next((p for p in providers if p["slug"] == "copilot"), None) + + assert copilot is not None + assert copilot["models"] == live_models + assert copilot["total_models"] == len(live_models) diff --git a/tests/hermes_cli/test_copilot_token_exchange.py b/tests/hermes_cli/test_copilot_token_exchange.py new file mode 100644 index 000000000..9c6a219ab --- /dev/null +++ b/tests/hermes_cli/test_copilot_token_exchange.py @@ -0,0 +1,159 @@ +"""Tests for Copilot token exchange (raw GitHub token → Copilot API token).""" + +from __future__ import annotations + +import json +import time +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _clear_jwt_cache(): + """Reset the module-level JWT cache before each test.""" + import hermes_cli.copilot_auth as mod + mod._jwt_cache.clear() + yield + mod._jwt_cache.clear() + + +class TestExchangeCopilotToken: + """Tests for exchange_copilot_token().""" + + def _mock_urlopen(self, token="tid=abc;exp=123;sku=copilot_individual", expires_at=None): + """Create a mock urlopen context manager returning a token response.""" + if expires_at is None: + expires_at = time.time() + 1800 + resp_data = json.dumps({"token": token, "expires_at": expires_at}).encode() + mock_resp = MagicMock() + mock_resp.read.return_value = resp_data + mock_resp.__enter__ = MagicMock(return_value=mock_resp) + mock_resp.__exit__ = MagicMock(return_value=False) + return mock_resp + + @patch("urllib.request.urlopen") + def test_exchanges_token_successfully(self, mock_urlopen): + from hermes_cli.copilot_auth import exchange_copilot_token + + mock_urlopen.return_value = self._mock_urlopen(token="tid=abc;exp=999") + api_token, expires_at = exchange_copilot_token("gho_test123") + + assert api_token == "tid=abc;exp=999" + assert isinstance(expires_at, float) + + # Verify request was made with correct headers + call_args = mock_urlopen.call_args + req = call_args[0][0] + assert req.get_header("Authorization") == "token gho_test123" + assert "GitHubCopilotChat" in req.get_header("User-agent") + + @patch("urllib.request.urlopen") + def test_caches_result(self, mock_urlopen): + from hermes_cli.copilot_auth import exchange_copilot_token + + future = time.time() + 1800 + mock_urlopen.return_value = self._mock_urlopen(expires_at=future) + + exchange_copilot_token("gho_test123") + exchange_copilot_token("gho_test123") + + assert mock_urlopen.call_count == 1 + + @patch("urllib.request.urlopen") + def test_refreshes_expired_cache(self, mock_urlopen): + from hermes_cli.copilot_auth import exchange_copilot_token, _jwt_cache, _token_fingerprint + + # Seed cache with expired entry + fp = _token_fingerprint("gho_test123") + _jwt_cache[fp] = ("old_token", time.time() - 10) + + mock_urlopen.return_value = self._mock_urlopen( + token="new_token", expires_at=time.time() + 1800 + ) + api_token, _ = exchange_copilot_token("gho_test123") + + assert api_token == "new_token" + assert mock_urlopen.call_count == 1 + + @patch("urllib.request.urlopen") + def test_raises_on_empty_token(self, mock_urlopen): + from hermes_cli.copilot_auth import exchange_copilot_token + + resp_data = json.dumps({"token": "", "expires_at": 0}).encode() + mock_resp = MagicMock() + mock_resp.read.return_value = resp_data + mock_resp.__enter__ = MagicMock(return_value=mock_resp) + mock_resp.__exit__ = MagicMock(return_value=False) + mock_urlopen.return_value = mock_resp + + with pytest.raises(ValueError, match="empty token"): + exchange_copilot_token("gho_test123") + + @patch("urllib.request.urlopen", side_effect=Exception("network error")) + def test_raises_on_network_error(self, mock_urlopen): + from hermes_cli.copilot_auth import exchange_copilot_token + + with pytest.raises(ValueError, match="network error"): + exchange_copilot_token("gho_test123") + + +class TestGetCopilotApiToken: + """Tests for get_copilot_api_token() — the fallback wrapper.""" + + @patch("hermes_cli.copilot_auth.exchange_copilot_token") + def test_returns_exchanged_token(self, mock_exchange): + from hermes_cli.copilot_auth import get_copilot_api_token + + mock_exchange.return_value = ("exchanged_jwt", time.time() + 1800) + assert get_copilot_api_token("gho_raw") == "exchanged_jwt" + + @patch("hermes_cli.copilot_auth.exchange_copilot_token", side_effect=ValueError("fail")) + def test_falls_back_to_raw_token(self, mock_exchange): + from hermes_cli.copilot_auth import get_copilot_api_token + + assert get_copilot_api_token("gho_raw") == "gho_raw" + + def test_empty_token_passthrough(self): + from hermes_cli.copilot_auth import get_copilot_api_token + + assert get_copilot_api_token("") == "" + + +class TestTokenFingerprint: + """Tests for _token_fingerprint().""" + + def test_consistent(self): + from hermes_cli.copilot_auth import _token_fingerprint + + fp1 = _token_fingerprint("gho_abc123") + fp2 = _token_fingerprint("gho_abc123") + assert fp1 == fp2 + + def test_different_tokens_different_fingerprints(self): + from hermes_cli.copilot_auth import _token_fingerprint + + fp1 = _token_fingerprint("gho_abc123") + fp2 = _token_fingerprint("gho_xyz789") + assert fp1 != fp2 + + def test_length(self): + from hermes_cli.copilot_auth import _token_fingerprint + + assert len(_token_fingerprint("gho_test")) == 16 + + +class TestCallerIntegration: + """Test that callers correctly use token exchange.""" + + @patch("hermes_cli.copilot_auth.resolve_copilot_token", return_value=("gho_raw", "GH_TOKEN")) + @patch("hermes_cli.copilot_auth.get_copilot_api_token", return_value="exchanged_jwt") + def test_auth_resolve_uses_exchange(self, mock_exchange, mock_resolve): + from hermes_cli.auth import _resolve_api_key_provider_secret + + # Create a minimal pconfig mock + pconfig = MagicMock() + token, source = _resolve_api_key_provider_secret("copilot", pconfig) + assert token == "exchanged_jwt" + assert source == "GH_TOKEN" + mock_exchange.assert_called_once_with("gho_raw") diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py index 948cafaf7..37cad8516 100644 --- a/tests/hermes_cli/test_doctor.py +++ b/tests/hermes_cli/test_doctor.py @@ -3,6 +3,8 @@ import os import sys import types +import io +import contextlib from argparse import Namespace from types import SimpleNamespace @@ -255,6 +257,57 @@ def test_run_doctor_termux_treats_docker_and_browser_warnings_as_expected(monkey assert "docker not found (optional)" not in out +def test_run_doctor_accepts_named_provider_from_providers_section(monkeypatch, tmp_path): + home = tmp_path / ".hermes" + home.mkdir(parents=True, exist_ok=True) + + import yaml + + (home / "config.yaml").write_text( + yaml.dump( + { + "model": { + "provider": "volcengine-plan", + "default": "doubao-seed-2.0-code", + }, + "providers": { + "volcengine-plan": { + "name": "volcengine-plan", + "base_url": "https://ark.cn-beijing.volces.com/api/coding/v3", + "default_model": "doubao-seed-2.0-code", + "models": {"doubao-seed-2.0-code": {}}, + } + }, + } + ) + ) + + monkeypatch.setattr(doctor_mod, "HERMES_HOME", home) + monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project") + monkeypatch.setattr(doctor_mod, "_DHH", str(home)) + (tmp_path / "project").mkdir(exist_ok=True) + + fake_model_tools = types.SimpleNamespace( + check_tool_availability=lambda *a, **kw: ([], []), + TOOLSET_REQUIREMENTS={}, + ) + monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools) + + try: + from hermes_cli import auth as _auth_mod + monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {}) + except Exception: + pass + + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + doctor_mod.run_doctor(Namespace(fix=False)) + + out = buf.getvalue() + assert "model.provider 'volcengine-plan' is not a recognised provider" not in out + + def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path): home = tmp_path / ".hermes" home.mkdir(parents=True, exist_ok=True) diff --git a/tests/hermes_cli/test_gemini_free_tier_setup_block.py b/tests/hermes_cli/test_gemini_free_tier_setup_block.py new file mode 100644 index 000000000..c4ebdd08e --- /dev/null +++ b/tests/hermes_cli/test_gemini_free_tier_setup_block.py @@ -0,0 +1,141 @@ +"""Tests for the Gemini free-tier block in the setup wizard.""" +from __future__ import annotations + +from unittest.mock import patch + +import pytest + + +@pytest.fixture +def config_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with an empty config.""" + home = tmp_path / "hermes" + home.mkdir() + (home / "config.yaml").write_text("model: some-old-model\n") + (home / ".env").write_text("") + monkeypatch.setenv("HERMES_HOME", str(home)) + # Clear any ambient env that could alter provider resolution + for var in ( + "HERMES_MODEL", + "LLM_MODEL", + "HERMES_INFERENCE_PROVIDER", + "OPENAI_BASE_URL", + "OPENAI_API_KEY", + "GEMINI_BASE_URL", + ): + monkeypatch.delenv(var, raising=False) + return home + + +class TestGeminiSetupFreeTierBlock: + """_model_flow_api_key_provider should refuse to wire up a free-tier Gemini key.""" + + def test_free_tier_key_is_blocked(self, config_home, monkeypatch, capsys): + """Free-tier probe result -> provider is NOT saved, message is printed.""" + monkeypatch.setenv("GOOGLE_API_KEY", "fake-free-tier-key") + + from hermes_cli.main import _model_flow_api_key_provider + from hermes_cli.config import load_config + + # Mock the probe to claim this is a free-tier key + with patch( + "agent.gemini_native_adapter.probe_gemini_tier", + return_value="free", + ), patch( + "hermes_cli.auth._prompt_model_selection", + return_value="gemini-2.5-flash", + ), patch( + "hermes_cli.auth.deactivate_provider", + ), patch("builtins.input", return_value=""): + _model_flow_api_key_provider(load_config(), "gemini", "old-model") + + output = capsys.readouterr().out + assert "free tier" in output.lower() + assert "aistudio.google.com/apikey" in output + assert "Not saving Gemini as the default provider" in output + + # Config must NOT show gemini as the provider + import yaml + cfg = yaml.safe_load((config_home / "config.yaml").read_text()) or {} + model = cfg.get("model") + if isinstance(model, dict): + assert model.get("provider") != "gemini", ( + "Free-tier key should not have saved gemini as provider" + ) + # If still a string, also fine — nothing was saved + + def test_paid_tier_key_proceeds(self, config_home, monkeypatch, capsys): + """Paid-tier probe result -> provider IS saved normally.""" + monkeypatch.setenv("GOOGLE_API_KEY", "fake-paid-tier-key") + + from hermes_cli.main import _model_flow_api_key_provider + from hermes_cli.config import load_config + + with patch( + "agent.gemini_native_adapter.probe_gemini_tier", + return_value="paid", + ), patch( + "hermes_cli.auth._prompt_model_selection", + return_value="gemini-2.5-flash", + ), patch( + "hermes_cli.auth.deactivate_provider", + ), patch("builtins.input", return_value=""): + _model_flow_api_key_provider(load_config(), "gemini", "old-model") + + output = capsys.readouterr().out + assert "paid" in output.lower() + assert "Not saving Gemini" not in output + + import yaml + cfg = yaml.safe_load((config_home / "config.yaml").read_text()) or {} + model = cfg.get("model") + assert isinstance(model, dict), f"model should be dict, got {type(model)}" + assert model.get("provider") == "gemini" + assert model.get("default") == "gemini-2.5-flash" + + def test_unknown_tier_proceeds_with_warning(self, config_home, monkeypatch, capsys): + """Probe returning 'unknown' (network/auth error) -> proceed without blocking.""" + monkeypatch.setenv("GOOGLE_API_KEY", "fake-key") + + from hermes_cli.main import _model_flow_api_key_provider + from hermes_cli.config import load_config + + with patch( + "agent.gemini_native_adapter.probe_gemini_tier", + return_value="unknown", + ), patch( + "hermes_cli.auth._prompt_model_selection", + return_value="gemini-2.5-flash", + ), patch( + "hermes_cli.auth.deactivate_provider", + ), patch("builtins.input", return_value=""): + _model_flow_api_key_provider(load_config(), "gemini", "old-model") + + output = capsys.readouterr().out + assert "could not verify" in output.lower() + assert "Not saving Gemini" not in output + + import yaml + cfg = yaml.safe_load((config_home / "config.yaml").read_text()) or {} + model = cfg.get("model") + assert isinstance(model, dict) + assert model.get("provider") == "gemini" + + def test_non_gemini_provider_skips_probe(self, config_home, monkeypatch): + """Probe must only run for provider_id == 'gemini', not for other providers.""" + monkeypatch.setenv("DEEPSEEK_API_KEY", "fake-key") + + from hermes_cli.main import _model_flow_api_key_provider + from hermes_cli.config import load_config + + with patch( + "agent.gemini_native_adapter.probe_gemini_tier", + ) as mock_probe, patch( + "hermes_cli.auth._prompt_model_selection", + return_value="deepseek-chat", + ), patch( + "hermes_cli.auth.deactivate_provider", + ), patch("builtins.input", return_value=""): + _model_flow_api_key_provider(load_config(), "deepseek", "old-model") + + mock_probe.assert_not_called() diff --git a/tests/hermes_cli/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py index 6de69ab30..f2a4bf3d6 100644 --- a/tests/hermes_cli/test_model_normalize.py +++ b/tests/hermes_cli/test_model_normalize.py @@ -9,6 +9,7 @@ from hermes_cli.model_normalize import ( normalize_model_for_provider, _DOT_TO_HYPHEN_PROVIDERS, _AGGREGATOR_PROVIDERS, + _normalize_for_deepseek, detect_vendor, ) @@ -191,3 +192,72 @@ class TestDetectVendor: ]) def test_detects_known_vendors(self, model, expected): assert detect_vendor(model) == expected + + +# ── DeepSeek V-series pass-through (bug: V4 models silently folded to V3) ── + +class TestDeepseekVSeriesPassThrough: + """DeepSeek's V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``, + and future ``deepseek-v-*`` variants) are first-class model IDs + accepted directly by DeepSeek's Chat Completions API. Earlier code + folded every non-reasoner name into ``deepseek-chat``, which on + aggregators (Nous portal, OpenRouter via DeepInfra) routes to V3 — + silently downgrading users who picked V4. + """ + + @pytest.mark.parametrize("model", [ + "deepseek-v4-pro", + "deepseek-v4-flash", + "deepseek/deepseek-v4-pro", # vendor-prefixed + "deepseek/deepseek-v4-flash", + "DeepSeek-V4-Pro", # case-insensitive + "deepseek-v4-flash-20260423", # dated variant + "deepseek-v5-pro", # future V-series + "deepseek-v10-ultra", # double-digit future + ]) + def test_v_series_passes_through(self, model): + expected = model.split("/", 1)[-1].lower() + assert _normalize_for_deepseek(model) == expected + + def test_deepseek_provider_preserves_v4_pro(self): + """End-to-end via normalize_model_for_provider — user selecting + V4 Pro must reach DeepSeek's API as V4 Pro, not V3 alias.""" + result = normalize_model_for_provider("deepseek-v4-pro", "deepseek") + assert result == "deepseek-v4-pro" + + def test_deepseek_provider_preserves_v4_flash(self): + result = normalize_model_for_provider("deepseek-v4-flash", "deepseek") + assert result == "deepseek-v4-flash" + + +# ── DeepSeek regressions (existing behaviour still holds) ────────────── + +class TestDeepseekCanonicalAndReasonerMapping: + """Canonical pass-through and reasoner-keyword folding stay intact.""" + + @pytest.mark.parametrize("model,expected", [ + ("deepseek-chat", "deepseek-chat"), + ("deepseek-reasoner", "deepseek-reasoner"), + ("DEEPSEEK-CHAT", "deepseek-chat"), + ]) + def test_canonical_models_pass_through(self, model, expected): + assert _normalize_for_deepseek(model) == expected + + @pytest.mark.parametrize("model", [ + "deepseek-r1", + "deepseek-r1-0528", + "deepseek-think-v3", + "deepseek-reasoning-preview", + "deepseek-cot-experimental", + ]) + def test_reasoner_keywords_map_to_reasoner(self, model): + assert _normalize_for_deepseek(model) == "deepseek-reasoner" + + @pytest.mark.parametrize("model", [ + "deepseek-chat-v3.1", # 'chat' prefix, not V-series pattern + "unknown-model", + "something-random", + "gpt-5", # non-DeepSeek names still fall through + ]) + def test_unknown_names_fall_back_to_chat(self, model): + assert _normalize_for_deepseek(model) == "deepseek-chat" diff --git a/tests/hermes_cli/test_model_switch_context_display.py b/tests/hermes_cli/test_model_switch_context_display.py new file mode 100644 index 000000000..e30c5a3c6 --- /dev/null +++ b/tests/hermes_cli/test_model_switch_context_display.py @@ -0,0 +1,90 @@ +"""Regression test for /model context-length display on provider-capped models. + +Bug (April 2026): `/model gpt-5.5` on openai-codex (ChatGPT OAuth) showed +"Context: 1,050,000 tokens" because the display code used the raw models.dev +``ModelInfo.context_window`` (which reports the direct-OpenAI API value) instead +of the provider-aware resolver. The agent was actually running at 272K — Codex +OAuth's enforced cap — so the display was lying to the user. + +Fix: ``resolve_display_context_length()`` prefers +``agent.model_metadata.get_model_context_length`` (which knows about Codex OAuth, +Copilot, Nous, etc.) and falls back to models.dev only if that returns nothing. +""" +from __future__ import annotations + +from unittest.mock import patch + +from hermes_cli.model_switch import resolve_display_context_length + + +class _FakeModelInfo: + def __init__(self, ctx): + self.context_window = ctx + + +class TestResolveDisplayContextLength: + def test_codex_oauth_overrides_models_dev(self): + """gpt-5.5 on openai-codex must show Codex's 272K cap, not models.dev's 1.05M.""" + fake_mi = _FakeModelInfo(1_050_000) # what models.dev reports + with patch( + "agent.model_metadata.get_model_context_length", + return_value=272_000, # what Codex OAuth actually enforces + ): + ctx = resolve_display_context_length( + "gpt-5.5", + "openai-codex", + base_url="https://chatgpt.com/backend-api/codex", + api_key="", + model_info=fake_mi, + ) + assert ctx == 272_000, ( + "Codex OAuth's 272K cap must win over models.dev's 1.05M for gpt-5.5" + ) + + def test_falls_back_to_model_info_when_resolver_returns_none(self): + fake_mi = _FakeModelInfo(1_048_576) + with patch( + "agent.model_metadata.get_model_context_length", return_value=None + ): + ctx = resolve_display_context_length( + "some-model", + "some-provider", + model_info=fake_mi, + ) + assert ctx == 1_048_576 + + def test_returns_none_when_both_sources_empty(self): + with patch( + "agent.model_metadata.get_model_context_length", return_value=None + ): + ctx = resolve_display_context_length( + "unknown-model", + "unknown-provider", + model_info=None, + ) + assert ctx is None + + def test_resolver_exception_falls_back_to_model_info(self): + fake_mi = _FakeModelInfo(200_000) + with patch( + "agent.model_metadata.get_model_context_length", + side_effect=RuntimeError("network down"), + ): + ctx = resolve_display_context_length( + "x", "y", model_info=fake_mi + ) + assert ctx == 200_000 + + def test_prefers_resolver_even_when_model_info_has_larger_value(self): + """Invariant: provider-aware resolver is authoritative, even if models.dev + reports a bigger window.""" + fake_mi = _FakeModelInfo(2_000_000) + with patch( + "agent.model_metadata.get_model_context_length", return_value=128_000 + ): + ctx = resolve_display_context_length( + "capped-model", + "capped-provider", + model_info=fake_mi, + ) + assert ctx == 128_000 diff --git a/tests/hermes_cli/test_model_switch_custom_providers.py b/tests/hermes_cli/test_model_switch_custom_providers.py index 7fc92136a..2899172ed 100644 --- a/tests/hermes_cli/test_model_switch_custom_providers.py +++ b/tests/hermes_cli/test_model_switch_custom_providers.py @@ -69,7 +69,7 @@ def test_switch_model_accepts_explicit_named_custom_provider(monkeypatch): """Shared /model switch pipeline should accept --provider for custom_providers.""" monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: { + lambda **kwargs: { "api_key": "no-key-required", "base_url": "http://127.0.0.1:4141/v1", "api_mode": "chat_completions", diff --git a/tests/hermes_cli/test_model_switch_opencode_anthropic.py b/tests/hermes_cli/test_model_switch_opencode_anthropic.py index ae56dce23..f5b564c23 100644 --- a/tests/hermes_cli/test_model_switch_opencode_anthropic.py +++ b/tests/hermes_cli/test_model_switch_opencode_anthropic.py @@ -250,3 +250,126 @@ class TestAgentSwitchModelDefenseInDepth: f"agent.switch_model did not strip /v1; passed {captured.get('base_url')} " "to build_anthropic_client" ) + + + +class TestStaleConfigDefaultDoesNotWedgeResolver: + """Regression for the real bug Quentin hit. + + When ``model.default`` in config.yaml is an OpenCode Anthropic-routed model + (e.g. ``claude-sonnet-4-6`` on opencode-zen) and the user does ``/model + kimi-k2.6 --provider opencode-zen`` session-only, the resolver must derive + api_mode from the model being requested, not the persisted default. The + earlier bug computed api_mode from ``model_cfg.get("default")``, flipped it + to ``anthropic_messages`` based on the stale Claude default, and stripped + ``/v1``. The chat_completions override in switch_model() fixed api_mode but + never re-added ``/v1``, so requests landed on ``https://opencode.ai/zen`` + and got OpenCode's website 404 HTML page. + + These tests use the REAL ``resolve_runtime_provider`` (not a mock) so a + regression in the target_model plumbing surfaces immediately. + """ + + def test_kimi_switch_keeps_v1_despite_claude_config_default(self, tmp_path, monkeypatch): + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_ZEN_API_KEY", "test-key") + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-zen", "default": "claude-sonnet-4-6"}, + })) + + # Re-import with the new HERMES_HOME so config cache is fresh. + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="kimi-k2.6", + current_provider="opencode-zen", + current_model="claude-sonnet-4-6", + current_base_url="https://opencode.ai/zen", # stripped from prior claude turn + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-zen", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen/v1", ( + f"base_url wedged at {result.base_url!r} - stale Claude config.default " + "caused api_mode to be computed as anthropic_messages, stripping /v1, " + "and chat_completions override never re-added it." + ) + assert result.api_mode == "chat_completions" + + def test_go_glm_switch_keeps_v1_despite_minimax_config_default(self, tmp_path, monkeypatch): + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_GO_API_KEY", "test-key") + monkeypatch.delenv("OPENCODE_ZEN_API_KEY", raising=False) + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-go", "default": "minimax-m2.7"}, + })) + + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="glm-5.1", + current_provider="opencode-go", + current_model="minimax-m2.7", + current_base_url="https://opencode.ai/zen/go", # stripped from prior minimax turn + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-go", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen/go/v1" + assert result.api_mode == "chat_completions" + + def test_claude_switch_still_strips_v1_with_kimi_config_default(self, tmp_path, monkeypatch): + """Inverse case: config default is chat_completions, switch TO anthropic_messages. + + Guards that the target_model plumbing does not break the original + strip-for-anthropic behavior. + """ + import yaml + import importlib + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("OPENCODE_ZEN_API_KEY", "test-key") + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "model": {"provider": "opencode-zen", "default": "kimi-k2.6"}, + })) + + import hermes_cli.config as _cfg_mod + importlib.reload(_cfg_mod) + import hermes_cli.runtime_provider as _rp_mod + importlib.reload(_rp_mod) + import hermes_cli.model_switch as _ms_mod + importlib.reload(_ms_mod) + + result = _ms_mod.switch_model( + raw_input="claude-sonnet-4-6", + current_provider="opencode-zen", + current_model="kimi-k2.6", + current_base_url="https://opencode.ai/zen/v1", + current_api_key="test-key", + is_global=False, + explicit_provider="opencode-zen", + ) + + assert result.success, f"switch failed: {result.error_message}" + assert result.base_url == "https://opencode.ai/zen" + assert result.api_mode == "anthropic_messages" diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 6a1a230c4..80c7d2502 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -220,13 +220,30 @@ class TestProviderModelIds: patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]): assert provider_model_ids("copilot-acp") == ["gpt-5.4", "claude-sonnet-4.6"] + def test_copilot_falls_back_to_curated_defaults_without_stale_opus(self): + with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \ + patch("hermes_cli.models._fetch_github_models", return_value=None): + ids = provider_model_ids("copilot") + + assert "gpt-5.4" in ids + assert "claude-sonnet-4.6" in ids + assert "claude-sonnet-4" in ids + assert "claude-sonnet-4.5" in ids + assert "claude-haiku-4.5" in ids + assert "gemini-3.1-pro-preview" in ids + assert "claude-opus-4.6" not in ids + def test_copilot_acp_falls_back_to_copilot_defaults(self): - with patch("hermes_cli.auth.resolve_api_key_provider_credentials", side_effect=Exception("no token")), \ + with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \ patch("hermes_cli.models._fetch_github_models", return_value=None): ids = provider_model_ids("copilot-acp") assert "gpt-5.4" in ids + assert "claude-sonnet-4.6" in ids + assert "claude-sonnet-4" in ids + assert "gemini-3.1-pro-preview" in ids assert "copilot-acp" not in ids + assert "claude-opus-4.6" not in ids # -- fetch_api_models -------------------------------------------------------- @@ -549,8 +566,11 @@ class TestValidateApiFallback: base_url="http://localhost:8000", ) + # Unreachable /models on a custom endpoint no longer hard-rejects — + # the model is persisted with a warning so Cloudflare-protected / + # proxy endpoints that don't expose /models still work. See #12950. assert result["accepted"] is False - assert result["persist"] is False + assert result["persist"] is True assert "http://localhost:8000/v1/models" in result["message"] assert "http://localhost:8000/v1" in result["message"] diff --git a/tests/hermes_cli/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py index 7a5dbf6ae..760832523 100644 --- a/tests/hermes_cli/test_ollama_cloud_auth.py +++ b/tests/hermes_cli/test_ollama_cloud_auth.py @@ -518,7 +518,7 @@ class TestSwitchModelDirectAliasOverride: monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, + lambda **kwargs: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, ) monkeypatch.setattr("hermes_cli.models.validate_requested_model", @@ -544,7 +544,7 @@ class TestSwitchModelDirectAliasOverride: lambda raw, prov: ("custom", "local-model", "local")) monkeypatch.setattr( "hermes_cli.runtime_provider.resolve_runtime_provider", - lambda requested: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, + lambda **kwargs: {"api_key": "", "base_url": "", "api_mode": "openai_compat", "provider": "custom"}, ) monkeypatch.setattr("hermes_cli.models.validate_requested_model", lambda *a, **kw: {"accepted": True, "persist": True, "recognized": True, "message": None}) diff --git a/tests/hermes_cli/test_overlay_slug_resolution.py b/tests/hermes_cli/test_overlay_slug_resolution.py index ccd3748fb..c87c891f9 100644 --- a/tests/hermes_cli/test_overlay_slug_resolution.py +++ b/tests/hermes_cli/test_overlay_slug_resolution.py @@ -81,3 +81,22 @@ def test_kilo_overlay_uses_hermes_slug(): kilo_mdev = next((p for p in providers if p["slug"] == "kilo"), None) assert kilo_mdev is None, "kilo slug should not appear (resolved to kilocode)" + + + +def test_mapped_provider_credential_pool_visibility(monkeypatch): + """Mapped providers should appear when credentials live only in auth-store credential_pool.""" + monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {"google-ai-studio": {"env": ["GEMINI_API_KEY"]}}) + monkeypatch.setattr("agent.models_dev.PROVIDER_TO_MODELS_DEV", {"gemini": "google-ai-studio"}) + monkeypatch.setattr( + "hermes_cli.auth._load_auth_store", + lambda: {"providers": {}, "credential_pool": {"gemini": {"token": "fake"}}}, + ) + monkeypatch.delenv("GEMINI_API_KEY", raising=False) + + providers = list_authenticated_providers(current_provider="gemini") + + gemini = next((p for p in providers if p["slug"] == "gemini"), None) + assert gemini is not None, "gemini should appear when auth-store credential_pool has creds" + assert gemini["is_current"] is True + assert gemini["total_models"] > 0 diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py index 2455547de..157f967e5 100644 --- a/tests/hermes_cli/test_plugins.py +++ b/tests/hermes_cli/test_plugins.py @@ -330,6 +330,33 @@ class TestPluginHooks: assert "transform_terminal_output" in VALID_HOOKS assert "transform_tool_result" in VALID_HOOKS + def test_valid_hooks_include_pre_gateway_dispatch(self): + assert "pre_gateway_dispatch" in VALID_HOOKS + + def test_pre_gateway_dispatch_collects_action_dicts(self, tmp_path, monkeypatch): + """pre_gateway_dispatch callbacks return action dicts (skip/rewrite/allow).""" + plugins_dir = tmp_path / "hermes_test" / "plugins" + _make_plugin_dir( + plugins_dir, "predispatch_plugin", + register_body=( + 'ctx.register_hook("pre_gateway_dispatch", ' + 'lambda **kw: {"action": "skip", "reason": "test"})' + ), + ) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test")) + + mgr = PluginManager() + mgr.discover_and_load() + + results = mgr.invoke_hook( + "pre_gateway_dispatch", + event=object(), + gateway=object(), + session_store=object(), + ) + assert len(results) == 1 + assert results[0] == {"action": "skip", "reason": "test"} + def test_register_and_invoke_hook(self, tmp_path, monkeypatch): """Registered hooks are called on invoke_hook().""" plugins_dir = tmp_path / "hermes_test" / "plugins" @@ -608,7 +635,7 @@ class TestPluginManagerList: assert mgr.list_plugins() == [] def test_list_returns_sorted(self, tmp_path, monkeypatch): - """list_plugins() returns results sorted by name.""" + """list_plugins() returns results sorted by key.""" plugins_dir = tmp_path / "hermes_test" / "plugins" _make_plugin_dir(plugins_dir, "zulu") _make_plugin_dir(plugins_dir, "alpha") @@ -618,8 +645,10 @@ class TestPluginManagerList: mgr.discover_and_load() listing = mgr.list_plugins() - names = [p["name"] for p in listing] - assert names == sorted(names) + # list_plugins sorts by key (path-derived, e.g. ``image_gen/openai``), + # not by display name, so that category plugins group together. + keys = [p["key"] for p in listing] + assert keys == sorted(keys) def test_list_with_plugins(self, tmp_path, monkeypatch): """list_plugins() returns info dicts for each discovered plugin.""" diff --git a/tests/hermes_cli/test_pty_bridge.py b/tests/hermes_cli/test_pty_bridge.py new file mode 100644 index 000000000..cd6983b90 --- /dev/null +++ b/tests/hermes_cli/test_pty_bridge.py @@ -0,0 +1,172 @@ +"""Unit tests for hermes_cli.pty_bridge — PTY spawning + byte forwarding. + +These tests drive the bridge with minimal POSIX processes (echo, env, sleep, +printf) to verify it behaves like a PTY you can read/write/resize/close. +""" + +from __future__ import annotations + +import os +import sys +import time + +import pytest + +pytest.importorskip("ptyprocess", reason="ptyprocess not installed") + +from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError + + +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), reason="PTY bridge is POSIX-only" +) + + +def _read_until(bridge: PtyBridge, needle: bytes, timeout: float = 5.0) -> bytes: + """Accumulate PTY output until we see `needle` or time out.""" + deadline = time.monotonic() + timeout + buf = bytearray() + while time.monotonic() < deadline: + chunk = bridge.read(timeout=0.2) + if chunk is None: + break + buf.extend(chunk) + if needle in buf: + return bytes(buf) + return bytes(buf) + + +@skip_on_windows +class TestPtyBridgeSpawn: + def test_is_available_on_posix(self): + assert PtyBridge.is_available() is True + + def test_spawn_returns_bridge_with_pid(self): + bridge = PtyBridge.spawn(["true"]) + try: + assert bridge.pid > 0 + finally: + bridge.close() + + def test_spawn_raises_on_missing_argv0(self, tmp_path): + with pytest.raises((FileNotFoundError, OSError)): + PtyBridge.spawn([str(tmp_path / "definitely-not-a-real-binary")]) + + +@skip_on_windows +class TestPtyBridgeIO: + def test_reads_child_stdout(self): + bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf hermes-ok"]) + try: + output = _read_until(bridge, b"hermes-ok") + assert b"hermes-ok" in output + finally: + bridge.close() + + def test_write_sends_to_child_stdin(self): + # `cat` with no args echoes stdin back to stdout. We write a line, + # read it back, then signal EOF to let cat exit cleanly. + bridge = PtyBridge.spawn(["/bin/cat"]) + try: + bridge.write(b"hello-pty\n") + output = _read_until(bridge, b"hello-pty") + assert b"hello-pty" in output + finally: + bridge.close() + + def test_read_returns_none_after_child_exits(self): + bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf done"]) + try: + _read_until(bridge, b"done") + # Give the child a beat to exit cleanly, then drain until EOF. + deadline = time.monotonic() + 3.0 + while bridge.is_alive() and time.monotonic() < deadline: + bridge.read(timeout=0.1) + # Next reads after exit should return None (EOF), not raise. + got_none = False + for _ in range(10): + if bridge.read(timeout=0.1) is None: + got_none = True + break + assert got_none, "PtyBridge.read did not return None after child EOF" + finally: + bridge.close() + + +@skip_on_windows +class TestPtyBridgeResize: + def test_resize_updates_child_winsize(self): + # tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ). + # Spawn a shell, resize, then ask tput for the dimensions. + bridge = PtyBridge.spawn( + ["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"], + cols=80, + rows=24, + ) + try: + bridge.resize(cols=123, rows=45) + output = _read_until(bridge, b"45", timeout=5.0) + # tput prints just the numbers, one per line + assert b"123" in output + assert b"45" in output + finally: + bridge.close() + + +@skip_on_windows +class TestPtyBridgeClose: + def test_close_is_idempotent(self): + bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"]) + bridge.close() + bridge.close() # must not raise + assert not bridge.is_alive() + + def test_close_terminates_long_running_child(self): + bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"]) + pid = bridge.pid + bridge.close() + # Give the kernel a moment to reap + deadline = time.monotonic() + 3.0 + reaped = False + while time.monotonic() < deadline: + try: + os.kill(pid, 0) + time.sleep(0.05) + except ProcessLookupError: + reaped = True + break + assert reaped, f"pid {pid} still running after close()" + + +@skip_on_windows +class TestPtyBridgeEnv: + def test_cwd_is_respected(self, tmp_path): + bridge = PtyBridge.spawn( + ["/bin/sh", "-c", "pwd"], + cwd=str(tmp_path), + ) + try: + output = _read_until(bridge, str(tmp_path).encode()) + assert str(tmp_path).encode() in output + finally: + bridge.close() + + def test_env_is_forwarded(self): + bridge = PtyBridge.spawn( + ["/bin/sh", "-c", "printf %s \"$HERMES_PTY_TEST\""], + env={**os.environ, "HERMES_PTY_TEST": "pty-env-works"}, + ) + try: + output = _read_until(bridge, b"pty-env-works") + assert b"pty-env-works" in output + finally: + bridge.close() + + +class TestPtyBridgeUnavailable: + """Platform fallback semantics — PtyUnavailableError is importable and + carries a user-readable message.""" + + def test_error_carries_user_message(self): + err = PtyUnavailableError("platform not supported") + assert "platform" in str(err) diff --git a/tests/hermes_cli/test_redact_config_bridge.py b/tests/hermes_cli/test_redact_config_bridge.py new file mode 100644 index 000000000..6a01673e6 --- /dev/null +++ b/tests/hermes_cli/test_redact_config_bridge.py @@ -0,0 +1,151 @@ +"""Regression test for config.yaml `security.redact_secrets: false` toggle. + +Bug: `agent/redact.py` snapshots `_REDACT_ENABLED` from the env var +`HERMES_REDACT_SECRETS` at module-import time. `hermes_cli/main.py` at +line ~174 calls `setup_logging(mode="cli")` which transitively imports +`agent.redact` — BEFORE any config bridge ran. So if a user set +`security.redact_secrets: false` in config.yaml (instead of as an env var +in .env), the toggle was silently ignored in both `hermes chat` and +`hermes gateway run`. + +Fix: bridge `security.redact_secrets` from config.yaml → `HERMES_REDACT_SECRETS` +env var in `hermes_cli/main.py` BEFORE the `setup_logging()` call. +""" +import os +import subprocess +import sys +import textwrap +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def test_redact_secrets_false_in_config_yaml_is_honored(tmp_path): + """Setting `security.redact_secrets: false` in config.yaml must disable + redaction — even though it's set in YAML, not as an env var.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + + # Write a config.yaml with redact_secrets: false + (hermes_home / "config.yaml").write_text( + textwrap.dedent( + """\ + security: + redact_secrets: false + """ + ) + ) + # Empty .env so nothing else sets the env var + (hermes_home / ".env").write_text("") + + # Spawn a fresh Python process that imports hermes_cli.main and checks + # _REDACT_ENABLED. Must be a subprocess — we need a clean module state. + probe = textwrap.dedent( + """\ + import sys, os + # Make absolutely sure the env var is not pre-set + os.environ.pop("HERMES_REDACT_SECRETS", None) + sys.path.insert(0, %r) + import hermes_cli.main # triggers the bridge + setup_logging + import agent.redact + print(f"REDACT_ENABLED={agent.redact._REDACT_ENABLED}") + print(f"ENV_VAR={os.environ.get('HERMES_REDACT_SECRETS', '')}") + """ + ) % str(REPO_ROOT) + + env = dict(os.environ) + env["HERMES_HOME"] = str(hermes_home) + env.pop("HERMES_REDACT_SECRETS", None) + + result = subprocess.run( + [sys.executable, "-c", probe], + env=env, + capture_output=True, + text=True, + cwd=str(REPO_ROOT), + timeout=30, + ) + assert result.returncode == 0, f"probe failed: {result.stderr}" + assert "REDACT_ENABLED=False" in result.stdout, ( + f"Config toggle not honored.\nstdout: {result.stdout}\nstderr: {result.stderr}" + ) + assert "ENV_VAR=false" in result.stdout + + +def test_redact_secrets_default_true_when_unset(tmp_path): + """Without the config key, redaction stays on by default.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text("{}\n") # empty config + (hermes_home / ".env").write_text("") + + probe = textwrap.dedent( + """\ + import sys, os + os.environ.pop("HERMES_REDACT_SECRETS", None) + sys.path.insert(0, %r) + import hermes_cli.main + import agent.redact + print(f"REDACT_ENABLED={agent.redact._REDACT_ENABLED}") + """ + ) % str(REPO_ROOT) + + env = dict(os.environ) + env["HERMES_HOME"] = str(hermes_home) + env.pop("HERMES_REDACT_SECRETS", None) + + result = subprocess.run( + [sys.executable, "-c", probe], + env=env, + capture_output=True, + text=True, + cwd=str(REPO_ROOT), + timeout=30, + ) + assert result.returncode == 0, f"probe failed: {result.stderr}" + assert "REDACT_ENABLED=True" in result.stdout + + +def test_dotenv_redact_secrets_beats_config_yaml(tmp_path): + """.env HERMES_REDACT_SECRETS takes precedence over config.yaml.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text( + textwrap.dedent( + """\ + security: + redact_secrets: false + """ + ) + ) + # .env force-enables redaction + (hermes_home / ".env").write_text("HERMES_REDACT_SECRETS=true\n") + + probe = textwrap.dedent( + """\ + import sys, os + os.environ.pop("HERMES_REDACT_SECRETS", None) + sys.path.insert(0, %r) + import hermes_cli.main + import agent.redact + print(f"REDACT_ENABLED={agent.redact._REDACT_ENABLED}") + print(f"ENV_VAR={os.environ.get('HERMES_REDACT_SECRETS', '')}") + """ + ) % str(REPO_ROOT) + + env = dict(os.environ) + env["HERMES_HOME"] = str(hermes_home) + env.pop("HERMES_REDACT_SECRETS", None) + + result = subprocess.run( + [sys.executable, "-c", probe], + env=env, + capture_output=True, + text=True, + cwd=str(REPO_ROOT), + timeout=30, + ) + assert result.returncode == 0, f"probe failed: {result.stderr}" + # .env value wins + assert "REDACT_ENABLED=True" in result.stdout + assert "ENV_VAR=true" in result.stdout diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 9d2232f39..a81dc9f5e 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -536,6 +536,72 @@ def test_custom_endpoint_explicit_custom_prefers_config_key(monkeypatch): assert resolved["api_key"] == "sk-vllm-key" +def test_bare_custom_uses_loopback_model_base_url_when_provider_not_custom(monkeypatch): + """Regression for #14676: /model can select Custom while YAML still lists another provider.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "openrouter", + "base_url": "http://127.0.0.1:8082/v1", + "default": "my-local-model", + }, + ) + monkeypatch.delenv("CUSTOM_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("OPENAI_API_KEY", "openai-key") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["provider"] == "custom" + assert resolved["base_url"] == "http://127.0.0.1:8082/v1" + assert resolved["api_key"] == "openai-key" + + +def test_bare_custom_custom_base_url_env_overrides_remote_yaml(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "openrouter", + "base_url": "https://api.openrouter.ai/api/v1", + }, + ) + monkeypatch.setenv("CUSTOM_BASE_URL", "http://localhost:9999/v1") + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["provider"] == "custom" + assert resolved["base_url"] == "http://localhost:9999/v1" + + +def test_bare_custom_does_not_trust_non_loopback_when_provider_not_custom(monkeypatch): + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "openrouter", + "base_url": "https://remote.example.com/v1", + }, + ) + monkeypatch.delenv("CUSTOM_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["provider"] == "custom" + assert "openrouter.ai" in resolved["base_url"] + assert "remote.example.com" not in resolved["base_url"] + + def test_named_custom_provider_uses_saved_credentials(monkeypatch): monkeypatch.delenv("OPENAI_API_KEY", raising=False) monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index 150fddab0..03b406875 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -339,6 +339,41 @@ def test_select_provider_and_model_warns_if_named_custom_provider_disappears( assert "selected saved custom provider is no longer available" in out +def test_select_provider_and_model_accepts_named_provider_from_providers_section( + tmp_path, monkeypatch, capsys +): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + _clear_provider_env(monkeypatch) + + cfg = load_config() + cfg["model"] = { + "provider": "volcengine-plan", + "default": "doubao-seed-2.0-code", + } + cfg["providers"] = { + "volcengine-plan": { + "name": "volcengine-plan", + "base_url": "https://ark.cn-beijing.volces.com/api/coding/v3", + "default_model": "doubao-seed-2.0-code", + "models": {"doubao-seed-2.0-code": {}}, + } + } + save_config(cfg) + + monkeypatch.setattr( + "hermes_cli.main._prompt_provider_choice", + lambda choices, default=0: len(choices) - 1, + ) + + from hermes_cli.main import select_provider_and_model + + select_provider_and_model() + + out = capsys.readouterr().out + assert "Warning: Unknown provider 'volcengine-plan'" not in out + assert "Active provider: volcengine-plan" in out + + def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch): """Codex model list fetching uses the runtime access token.""" monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/hermes_cli/test_skills_config.py b/tests/hermes_cli/test_skills_config.py index 310b1a8ae..9742f0ac6 100644 --- a/tests/hermes_cli/test_skills_config.py +++ b/tests/hermes_cli/test_skills_config.py @@ -251,43 +251,48 @@ class TestGetDisabledSkillNames: class TestFindAllSkillsFiltering: @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"}) @patch("tools.skills_tool.skill_matches_platform", return_value=True) - @patch("tools.skills_tool.SKILLS_DIR") - def test_disabled_skill_excluded(self, mock_dir, mock_platform, mock_disabled, tmp_path): + def test_disabled_skill_excluded(self, mock_platform, mock_disabled, tmp_path, monkeypatch): skill_dir = tmp_path / "my-skill" skill_dir.mkdir() skill_md = skill_dir / "SKILL.md" skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") - mock_dir.exists.return_value = True - mock_dir.rglob.return_value = [skill_md] + # Point SKILLS_DIR at the real tempdir so iter_skill_index_files + # (which uses os.walk) can actually find the file. + import tools.skills_tool as _st + import agent.skill_utils as _su + monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path) + monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: []) from tools.skills_tool import _find_all_skills skills = _find_all_skills() assert not any(s["name"] == "my-skill" for s in skills) @patch("tools.skills_tool._get_disabled_skill_names", return_value=set()) @patch("tools.skills_tool.skill_matches_platform", return_value=True) - @patch("tools.skills_tool.SKILLS_DIR") - def test_enabled_skill_included(self, mock_dir, mock_platform, mock_disabled, tmp_path): + def test_enabled_skill_included(self, mock_platform, mock_disabled, tmp_path, monkeypatch): skill_dir = tmp_path / "my-skill" skill_dir.mkdir() skill_md = skill_dir / "SKILL.md" skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") - mock_dir.exists.return_value = True - mock_dir.rglob.return_value = [skill_md] + import tools.skills_tool as _st + import agent.skill_utils as _su + monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path) + monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: []) from tools.skills_tool import _find_all_skills skills = _find_all_skills() assert any(s["name"] == "my-skill" for s in skills) @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"}) @patch("tools.skills_tool.skill_matches_platform", return_value=True) - @patch("tools.skills_tool.SKILLS_DIR") - def test_skip_disabled_returns_all(self, mock_dir, mock_platform, mock_disabled, tmp_path): + def test_skip_disabled_returns_all(self, mock_platform, mock_disabled, tmp_path, monkeypatch): """skip_disabled=True ignores the disabled set (for config UI).""" skill_dir = tmp_path / "my-skill" skill_dir.mkdir() skill_md = skill_dir / "SKILL.md" skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") - mock_dir.exists.return_value = True - mock_dir.rglob.return_value = [skill_md] + import tools.skills_tool as _st + import agent.skill_utils as _su + monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path) + monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: []) from tools.skills_tool import _find_all_skills skills = _find_all_skills(skip_disabled=True) assert any(s["name"] == "my-skill" for s in skills) diff --git a/tests/hermes_cli/test_spotify_auth.py b/tests/hermes_cli/test_spotify_auth.py new file mode 100644 index 000000000..ca9c97560 --- /dev/null +++ b/tests/hermes_cli/test_spotify_auth.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from hermes_cli import auth as auth_mod + + +def test_store_provider_state_can_skip_active_provider() -> None: + auth_store = {"active_provider": "nous", "providers": {}} + + auth_mod._store_provider_state( + auth_store, + "spotify", + {"access_token": "abc"}, + set_active=False, + ) + + assert auth_store["active_provider"] == "nous" + assert auth_store["providers"]["spotify"]["access_token"] == "abc" + + +def test_resolve_spotify_runtime_credentials_refreshes_without_changing_active_provider( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + with auth_mod._auth_store_lock(): + store = auth_mod._load_auth_store() + store["active_provider"] = "nous" + auth_mod._store_provider_state( + store, + "spotify", + { + "client_id": "spotify-client", + "redirect_uri": "http://127.0.0.1:43827/spotify/callback", + "api_base_url": auth_mod.DEFAULT_SPOTIFY_API_BASE_URL, + "accounts_base_url": auth_mod.DEFAULT_SPOTIFY_ACCOUNTS_BASE_URL, + "scope": auth_mod.DEFAULT_SPOTIFY_SCOPE, + "access_token": "expired-token", + "refresh_token": "refresh-token", + "token_type": "Bearer", + "expires_at": "2000-01-01T00:00:00+00:00", + }, + set_active=False, + ) + auth_mod._save_auth_store(store) + + monkeypatch.setattr( + auth_mod, + "_refresh_spotify_oauth_state", + lambda state, timeout_seconds=20.0: { + **state, + "access_token": "fresh-token", + "expires_at": "2099-01-01T00:00:00+00:00", + }, + ) + + creds = auth_mod.resolve_spotify_runtime_credentials() + + assert creds["access_token"] == "fresh-token" + persisted = auth_mod.get_provider_auth_state("spotify") + assert persisted is not None + assert persisted["access_token"] == "fresh-token" + assert auth_mod.get_active_provider() == "nous" + + +def test_auth_spotify_status_command_reports_logged_in(capsys, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + auth_mod, + "get_auth_status", + lambda provider=None: { + "logged_in": True, + "auth_type": "oauth_pkce", + "client_id": "spotify-client", + "redirect_uri": "http://127.0.0.1:43827/spotify/callback", + "scope": "user-library-read", + }, + ) + + from hermes_cli.auth_commands import auth_status_command + + auth_status_command(SimpleNamespace(provider="spotify")) + output = capsys.readouterr().out + assert "spotify: logged in" in output + assert "client_id: spotify-client" in output + + + +def test_spotify_interactive_setup_persists_client_id( + tmp_path, + monkeypatch: pytest.MonkeyPatch, + capsys, +) -> None: + """The wizard writes HERMES_SPOTIFY_CLIENT_ID to .env and returns the value.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setattr("builtins.input", lambda prompt="": "wizard-client-123") + # Prevent actually opening the browser during tests. + monkeypatch.setattr(auth_mod, "webbrowser", SimpleNamespace(open=lambda *_a, **_k: False)) + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + + result = auth_mod._spotify_interactive_setup( + redirect_uri_hint=auth_mod.DEFAULT_SPOTIFY_REDIRECT_URI, + ) + assert result == "wizard-client-123" + + env_path = tmp_path / ".env" + assert env_path.exists() + env_text = env_path.read_text() + assert "HERMES_SPOTIFY_CLIENT_ID=wizard-client-123" in env_text + # Default redirect URI should NOT be persisted. + assert "HERMES_SPOTIFY_REDIRECT_URI" not in env_text + + # Docs URL should appear in wizard output so users can find the guide. + output = capsys.readouterr().out + assert auth_mod.SPOTIFY_DOCS_URL in output + + +def test_spotify_interactive_setup_empty_aborts( + tmp_path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Empty input aborts cleanly instead of persisting an empty client_id.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setattr("builtins.input", lambda prompt="": "") + monkeypatch.setattr(auth_mod, "webbrowser", SimpleNamespace(open=lambda *_a, **_k: False)) + monkeypatch.setattr(auth_mod, "_is_remote_session", lambda: True) + + with pytest.raises(SystemExit): + auth_mod._spotify_interactive_setup( + redirect_uri_hint=auth_mod.DEFAULT_SPOTIFY_REDIRECT_URI, + ) + + env_path = tmp_path / ".env" + if env_path.exists(): + assert "HERMES_SPOTIFY_CLIENT_ID" not in env_path.read_text() diff --git a/tests/hermes_cli/test_status.py b/tests/hermes_cli/test_status.py index c24b72dd4..216687660 100644 --- a/tests/hermes_cli/test_status.py +++ b/tests/hermes_cli/test_status.py @@ -42,3 +42,40 @@ def test_show_status_termux_gateway_section_skips_systemctl(monkeypatch, capsys, assert "Manager: Termux / manual process" in output assert "Start with: hermes gateway" in output assert "systemd (user)" not in output + + +def test_show_status_reports_nous_auth_error(monkeypatch, capsys, tmp_path): + from hermes_cli import status as status_mod + import hermes_cli.auth as auth_mod + import hermes_cli.gateway as gateway_mod + + monkeypatch.setattr(status_mod, "get_env_path", lambda: tmp_path / ".env", raising=False) + monkeypatch.setattr(status_mod, "get_hermes_home", lambda: tmp_path, raising=False) + monkeypatch.setattr(status_mod, "load_config", lambda: {"model": "gpt-5.4"}, raising=False) + monkeypatch.setattr(status_mod, "resolve_requested_provider", lambda requested=None: "openai-codex", raising=False) + monkeypatch.setattr(status_mod, "resolve_provider", lambda requested=None, **kwargs: "openai-codex", raising=False) + monkeypatch.setattr(status_mod, "provider_label", lambda provider: "OpenAI Codex", raising=False) + monkeypatch.setattr( + auth_mod, + "get_nous_auth_status", + lambda: { + "logged_in": False, + "portal_base_url": "https://portal.nousresearch.com", + "access_expires_at": "2026-04-20T01:00:51+00:00", + "agent_key_expires_at": "2026-04-20T04:54:24+00:00", + "has_refresh_token": True, + "error": "Refresh session has been revoked", + }, + raising=False, + ) + monkeypatch.setattr(auth_mod, "get_codex_auth_status", lambda: {}, raising=False) + monkeypatch.setattr(auth_mod, "get_qwen_auth_status", lambda: {}, raising=False) + monkeypatch.setattr(gateway_mod, "find_gateway_pids", lambda exclude_pids=None: [], raising=False) + + status_mod.show_status(SimpleNamespace(all=False, deep=False)) + + output = capsys.readouterr().out + assert "Nous Portal ✗ not logged in (run: hermes auth add nous --type oauth)" in output + assert "Error: Refresh session has been revoked" in output + assert "Access exp:" in output + assert "Key exp:" in output diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index 9fb2745ac..b134fc98b 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency: gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"]) # Exclude non-messaging platforms from the check - non_messaging = {"cli", "api_server"} + non_messaging = {"cli", "api_server", "cron"} for platform, meta in PLATFORMS.items(): if platform in non_messaging: continue diff --git a/tests/hermes_cli/test_user_providers_model_switch.py b/tests/hermes_cli/test_user_providers_model_switch.py index 989a6cbed..00ccf701c 100644 --- a/tests/hermes_cli/test_user_providers_model_switch.py +++ b/tests/hermes_cli/test_user_providers_model_switch.py @@ -197,6 +197,58 @@ def test_list_authenticated_providers_dict_models_dedupe_with_default(monkeypatc assert user_prov["models"].count("model-a") == 1 +def test_openai_native_curated_catalog_is_non_empty(): + """Regression: built-in openai must have a static catalog for picker totals.""" + from hermes_cli.models import _PROVIDER_MODELS + + assert _PROVIDER_MODELS.get("openai") + assert len(_PROVIDER_MODELS["openai"]) >= 4 + + +def test_list_authenticated_providers_openai_built_in_nonzero_total(monkeypatch): + """Built-in openai row must not report total_models=0 when creds exist.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + monkeypatch.setattr( + "agent.models_dev.fetch_models_dev", + lambda: {"openai": {"env": ["OPENAI_API_KEY"]}}, + ) + monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) + + providers = list_authenticated_providers( + current_provider="", + current_base_url="", + user_providers={}, + custom_providers=[], + max_models=50, + ) + row = next((p for p in providers if p.get("slug") == "openai"), None) + assert row is not None + assert row["total_models"] > 0 + + +def test_list_authenticated_providers_user_openai_official_url_fallback(monkeypatch): + """User providers: api.openai.com with no models list uses native curated fallback.""" + monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) + monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {}) + + user_providers = { + "openai-direct": { + "name": "OpenAI Direct", + "api": "https://api.openai.com/v1", + } + } + providers = list_authenticated_providers( + current_provider="", + current_base_url="", + user_providers=user_providers, + custom_providers=[], + max_models=50, + ) + row = next((p for p in providers if p.get("slug") == "openai-direct"), None) + assert row is not None + assert row["total_models"] > 0 + + def test_list_authenticated_providers_fallback_to_default_only(monkeypatch): """When no models array is provided, should fall back to default_model.""" monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {}) diff --git a/tests/hermes_cli/test_voice_wrapper.py b/tests/hermes_cli/test_voice_wrapper.py new file mode 100644 index 000000000..a372c1194 --- /dev/null +++ b/tests/hermes_cli/test_voice_wrapper.py @@ -0,0 +1,255 @@ +"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper. + +The module is imported *lazily* by ``tui_gateway/server.py`` so that a +box with missing audio deps fails at call time (returning a clean RPC +error) rather than at gateway startup. These tests therefore only +assert the public contract the gateway depends on: the three symbols +exist, ``stop_and_transcribe`` is a no-op when nothing is recording, +and ``speak_text`` tolerates empty input without touching the provider +stack. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +class TestPublicAPI: + def test_gateway_symbols_importable(self): + """Match the exact import shape tui_gateway/server.py uses.""" + from hermes_cli.voice import ( + speak_text, + start_recording, + stop_and_transcribe, + ) + + assert callable(start_recording) + assert callable(stop_and_transcribe) + assert callable(speak_text) + + +class TestStopWithoutStart: + def test_returns_none_when_no_recording_active(self, monkeypatch): + """Idempotent no-op: stop before start must not raise or touch state.""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_recorder", None) + + assert voice.stop_and_transcribe() is None + + +class TestSpeakTextGuards: + @pytest.mark.parametrize("text", ["", " ", "\n\t "]) + def test_empty_text_is_noop(self, text): + """Empty / whitespace-only text must return without importing tts_tool + (the gateway spawns a thread per call, so a no-op on empty input + keeps the thread pool from churning on trivial inputs).""" + from hermes_cli.voice import speak_text + + # Should simply return None without raising. + assert speak_text(text) is None + + +class TestContinuousAPI: + """Continuous (VAD) mode API — CLI-parity loop entry points.""" + + def test_continuous_exports(self): + from hermes_cli.voice import ( + is_continuous_active, + start_continuous, + stop_continuous, + ) + + assert callable(start_continuous) + assert callable(stop_continuous) + assert callable(is_continuous_active) + + def test_not_active_by_default(self, monkeypatch): + import hermes_cli.voice as voice + + # Isolate from any state left behind by other tests in the session. + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + + assert voice.is_continuous_active() is False + + def test_stop_continuous_idempotent_when_inactive(self, monkeypatch): + """stop_continuous must not raise when no loop is active — the + gateway's voice.toggle off path calls it unconditionally.""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + + # Should return cleanly without exceptions + assert voice.stop_continuous() is None + assert voice.is_continuous_active() is False + + def test_double_start_is_idempotent(self, monkeypatch): + """A second start_continuous while already active is a no-op — prevents + two overlapping capture threads fighting over the microphone when the + UI double-fires (e.g. both /voice on and Ctrl+B within the same tick).""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_continuous_active", True) + called = {"n": 0} + + class FakeRecorder: + def start(self, on_silence_stop=None): + called["n"] += 1 + + def cancel(self): + pass + + monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder()) + + voice.start_continuous(on_transcript=lambda _t: None) + + # The guard inside start_continuous short-circuits before rec.start() + assert called["n"] == 0 + + +class TestContinuousLoopSimulation: + """End-to-end simulation of the VAD loop with a fake recorder. + + Proves auto-restart works: the silence callback must trigger transcribe → + on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers + the 3-strikes no-speech halt. + """ + + @pytest.fixture + def fake_recorder(self, monkeypatch): + import hermes_cli.voice as voice + + # Reset module state between tests. + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + monkeypatch.setattr(voice, "_continuous_no_speech_count", 0) + monkeypatch.setattr(voice, "_continuous_on_transcript", None) + monkeypatch.setattr(voice, "_continuous_on_status", None) + monkeypatch.setattr(voice, "_continuous_on_silent_limit", None) + + class FakeRecorder: + _silence_threshold = 200 + _silence_duration = 3.0 + is_recording = False + + def __init__(self): + self.start_calls = 0 + self.last_callback = None + self.stopped = 0 + self.cancelled = 0 + # Preset WAV path returned by stop() + self.next_stop_wav = "/tmp/fake.wav" + + def start(self, on_silence_stop=None): + self.start_calls += 1 + self.last_callback = on_silence_stop + self.is_recording = True + + def stop(self): + self.stopped += 1 + self.is_recording = False + return self.next_stop_wav + + def cancel(self): + self.cancelled += 1 + self.is_recording = False + + rec = FakeRecorder() + monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec) + # Skip real file ops in the silence callback. + monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False) + return rec + + def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch): + import hermes_cli.voice as voice + + monkeypatch.setattr( + voice, + "transcribe_recording", + lambda _p: {"success": True, "transcript": "hello world"}, + ) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + statuses = [] + + voice.start_continuous( + on_transcript=lambda t: transcripts.append(t), + on_status=lambda s: statuses.append(s), + ) + + assert fake_recorder.start_calls == 1 + assert statuses == ["listening"] + + # Simulate AudioRecorder's silence detector firing. + fake_recorder.last_callback() + + assert transcripts == ["hello world"] + assert fake_recorder.start_calls == 2 # auto-restarted + assert statuses == ["listening", "transcribing", "listening"] + assert voice.is_continuous_active() is True + + voice.stop_continuous() + + def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch): + import hermes_cli.voice as voice + + # Transcription returns no speech — fake_recorder.stop() returns the + # path, but transcribe returns empty text, counting as silence. + monkeypatch.setattr( + voice, + "transcribe_recording", + lambda _p: {"success": True, "transcript": ""}, + ) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + silent_limit_fired = [] + + voice.start_continuous( + on_transcript=lambda t: transcripts.append(t), + on_silent_limit=lambda: silent_limit_fired.append(True), + ) + + # Fire silence callback 3 times + for _ in range(3): + fake_recorder.last_callback() + + assert transcripts == [] + assert silent_limit_fired == [True] + assert voice.is_continuous_active() is False + assert fake_recorder.cancelled >= 1 + + def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch): + """User hits Ctrl+B mid-transcription: the in-flight transcript must + still fire (it's a real utterance), but the loop must NOT restart.""" + import hermes_cli.voice as voice + + stop_triggered = {"flag": False} + + def late_transcribe(_p): + # Simulate stop_continuous arriving while we're inside transcribe + voice.stop_continuous() + stop_triggered["flag"] = True + return {"success": True, "transcript": "final word"} + + monkeypatch.setattr(voice, "transcribe_recording", late_transcribe) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + voice.start_continuous(on_transcript=lambda t: transcripts.append(t)) + + initial_starts = fake_recorder.start_calls # 1 + fake_recorder.last_callback() + + assert stop_triggered["flag"] is True + # Loop is stopped — no auto-restart + assert fake_recorder.start_calls == initial_starts + # The in-flight transcript was suppressed because we stopped mid-flight + assert transcripts == [] + assert voice.is_continuous_active() is False diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index a92f0c8d1..e83f5bdeb 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -1677,3 +1677,251 @@ class TestDashboardPluginManifestExtensions: plugins = web_server._get_dashboard_plugins(force_rescan=True) entry = next(p for p in plugins if p["name"] == "mixed-slots") assert entry["slots"] == ["sidebar", "header-right"] + + +# --------------------------------------------------------------------------- +# /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab. +# +# These tests drive the endpoint with a tiny fake command (typically ``cat`` +# or ``sh -c 'printf …'``) instead of the real ``hermes --tui`` binary. The +# endpoint resolves its argv through ``_resolve_chat_argv``, so tests +# monkeypatch that hook. +# --------------------------------------------------------------------------- + +import sys + + +skip_on_windows = pytest.mark.skipif( + sys.platform.startswith("win"), reason="PTY bridge is POSIX-only" +) + + +@skip_on_windows +class TestPtyWebSocket: + @pytest.fixture(autouse=True) + def _setup(self, monkeypatch, _isolate_hermes_home): + from starlette.testclient import TestClient + + import hermes_cli.web_server as ws + + # Avoid exec'ing the actual TUI in tests: every test below installs + # its own fake argv via ``ws._resolve_chat_argv``. + self.ws_module = ws + monkeypatch.setattr(ws, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", True) + self.token = ws._SESSION_TOKEN + self.client = TestClient(ws.app) + + def _url(self, token: str | None = None, **params: str) -> str: + tok = token if token is not None else self.token + # TestClient.websocket_connect takes the path; it reconstructs the + # query string, so we pass it inline. + from urllib.parse import urlencode + + q = {"token": tok, **params} + return f"/api/pty?{urlencode(q)}" + + def test_rejects_when_embedded_chat_disabled(self, monkeypatch): + monkeypatch.setattr(self.ws_module, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", False) + from starlette.websockets import WebSocketDisconnect + + with pytest.raises(WebSocketDisconnect) as exc: + with self.client.websocket_connect(self._url()): + pass + assert exc.value.code == 4403 + + def test_rejects_missing_token(self, monkeypatch): + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None), + ) + from starlette.websockets import WebSocketDisconnect + + with pytest.raises(WebSocketDisconnect) as exc: + with self.client.websocket_connect("/api/pty"): + pass + assert exc.value.code == 4401 + + def test_rejects_bad_token(self, monkeypatch): + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None), + ) + from starlette.websockets import WebSocketDisconnect + + with pytest.raises(WebSocketDisconnect) as exc: + with self.client.websocket_connect(self._url(token="wrong")): + pass + assert exc.value.code == 4401 + + def test_streams_child_stdout_to_client(self, monkeypatch): + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + lambda resume=None, sidecar_url=None: ( + ["/bin/sh", "-c", "printf hermes-ws-ok"], + None, + None, + ), + ) + with self.client.websocket_connect(self._url()) as conn: + # Drain frames until we see the needle or time out. TestClient's + # recv_bytes blocks; loop until we have the signal byte string. + buf = b"" + import time + + deadline = time.monotonic() + 5.0 + while time.monotonic() < deadline: + try: + frame = conn.receive_bytes() + except Exception: + break + if frame: + buf += frame + if b"hermes-ws-ok" in buf: + break + assert b"hermes-ws-ok" in buf + + def test_client_input_reaches_child_stdin(self, monkeypatch): + # ``cat`` echoes stdin back, so a write → read round-trip proves + # the full duplex path. + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None), + ) + with self.client.websocket_connect(self._url()) as conn: + conn.send_bytes(b"round-trip-payload\n") + buf = b"" + import time + + deadline = time.monotonic() + 5.0 + while time.monotonic() < deadline: + frame = conn.receive_bytes() + if frame: + buf += frame + if b"round-trip-payload" in buf: + break + assert b"round-trip-payload" in buf + + def test_resize_escape_is_forwarded(self, monkeypatch): + # Resize escape gets intercepted and applied via TIOCSWINSZ, + # then ``tput cols/lines`` reports the new dimensions back. + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + # sleep gives the test time to push the resize before tput runs + lambda resume=None, sidecar_url=None: ( + ["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"], + None, + None, + ), + ) + with self.client.websocket_connect(self._url()) as conn: + conn.send_text("\x1b[RESIZE:99;41]") + buf = b"" + import time + + deadline = time.monotonic() + 5.0 + while time.monotonic() < deadline: + frame = conn.receive_bytes() + if frame: + buf += frame + if b"99" in buf and b"41" in buf: + break + assert b"99" in buf and b"41" in buf + + def test_unavailable_platform_closes_with_message(self, monkeypatch): + from hermes_cli.pty_bridge import PtyUnavailableError + + def _raise(argv, **kwargs): + raise PtyUnavailableError("pty missing for tests") + + monkeypatch.setattr( + self.ws_module, + "_resolve_chat_argv", + lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None), + ) + # Patch PtyBridge.spawn at the web_server module's binding. + import hermes_cli.web_server as ws_mod + + monkeypatch.setattr(ws_mod.PtyBridge, "spawn", classmethod(lambda cls, *a, **k: _raise(*a, **k))) + + with self.client.websocket_connect(self._url()) as conn: + # Expect a final text frame with the error message, then close. + msg = conn.receive_text() + assert "pty missing" in msg or "unavailable" in msg.lower() or "pty" in msg.lower() + + def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch): + captured: dict = {} + + def fake_resolve(resume=None, sidecar_url=None): + captured["resume"] = resume + return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None) + + monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve) + + with self.client.websocket_connect(self._url(resume="sess-42")) as conn: + # Drain briefly so the handler actually invokes the resolver. + try: + conn.receive_bytes() + except Exception: + pass + assert captured.get("resume") == "sess-42" + + def test_channel_param_propagates_sidecar_url(self, monkeypatch): + """When /api/pty is opened with ?channel=, the PTY child gets a + HERMES_TUI_SIDECAR_URL env var pointing back at /api/pub on the + same channel — which is how tool events reach the dashboard sidebar.""" + captured: dict = {} + + def fake_resolve(resume=None, sidecar_url=None): + captured["sidecar_url"] = sidecar_url + return (["/bin/sh", "-c", "printf sidecar-ok"], None, None) + + monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve) + monkeypatch.setattr( + self.ws_module.app.state, "bound_host", "127.0.0.1", raising=False + ) + monkeypatch.setattr( + self.ws_module.app.state, "bound_port", 9119, raising=False + ) + + with self.client.websocket_connect(self._url(channel="abc-123")) as conn: + try: + conn.receive_bytes() + except Exception: + pass + + url = captured.get("sidecar_url") or "" + assert url.startswith("ws://127.0.0.1:9119/api/pub?") + assert "channel=abc-123" in url + assert "token=" in url + + def test_pub_broadcasts_to_events_subscribers(self, monkeypatch): + """Frame written to /api/pub is rebroadcast verbatim to every + /api/events subscriber on the same channel.""" + from urllib.parse import urlencode + + qs = urlencode({"token": self.token, "channel": "broadcast-test"}) + pub_path = f"/api/pub?{qs}" + sub_path = f"/api/events?{qs}" + + with self.client.websocket_connect(sub_path) as sub: + with self.client.websocket_connect(pub_path) as pub: + pub.send_text('{"type":"tool.start","payload":{"tool_id":"t1"}}') + received = sub.receive_text() + + assert "tool.start" in received + assert '"tool_id":"t1"' in received + + def test_events_rejects_missing_channel(self): + from starlette.websockets import WebSocketDisconnect + + with pytest.raises(WebSocketDisconnect) as exc: + with self.client.websocket_connect( + f"/api/events?token={self.token}" + ): + pass + assert exc.value.code == 4400 diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py index 7205cf5a2..aa82bd48a 100644 --- a/tests/hermes_cli/test_xiaomi_provider.py +++ b/tests/hermes_cli/test_xiaomi_provider.py @@ -195,6 +195,26 @@ class TestXiaomiNormalization: from hermes_cli.model_normalize import _MATCHING_PREFIX_STRIP_PROVIDERS assert "xiaomi" in _MATCHING_PREFIX_STRIP_PROVIDERS + def test_lowercase_model_provider(self): + """Xiaomi must be in _LOWERCASE_MODEL_PROVIDERS.""" + from hermes_cli.model_normalize import _LOWERCASE_MODEL_PROVIDERS + assert "xiaomi" in _LOWERCASE_MODEL_PROVIDERS + + def test_lowercase_subset_of_matching_prefix(self): + """_LOWERCASE_MODEL_PROVIDERS must be a subset of _MATCHING_PREFIX_STRIP_PROVIDERS. + + Otherwise the .lower() code path is unreachable dead code — the + provider check at line 422 gates entry to the block. + """ + from hermes_cli.model_normalize import ( + _LOWERCASE_MODEL_PROVIDERS, + _MATCHING_PREFIX_STRIP_PROVIDERS, + ) + assert _LOWERCASE_MODEL_PROVIDERS.issubset(_MATCHING_PREFIX_STRIP_PROVIDERS), ( + f"_LOWERCASE_MODEL_PROVIDERS has entries not in _MATCHING_PREFIX_STRIP_PROVIDERS: " + f"{_LOWERCASE_MODEL_PROVIDERS - _MATCHING_PREFIX_STRIP_PROVIDERS}" + ) + def test_normalize_strips_provider_prefix(self): from hermes_cli.model_normalize import normalize_model_for_provider result = normalize_model_for_provider("xiaomi/mimo-v2-pro", "xiaomi") @@ -205,6 +225,40 @@ class TestXiaomiNormalization: result = normalize_model_for_provider("mimo-v2-pro", "xiaomi") assert result == "mimo-v2-pro" + @pytest.mark.parametrize("empty_input", ["", None, " "]) + def test_normalize_empty_and_none(self, empty_input): + """None, empty, and whitespace-only inputs return empty string.""" + from hermes_cli.model_normalize import normalize_model_for_provider + result = normalize_model_for_provider(empty_input, "xiaomi") + assert result == "" + + @pytest.mark.parametrize("input_name,expected", [ + ("MiMo-V2.5-Pro", "mimo-v2.5-pro"), + ("MIMO-V2.5-PRO", "mimo-v2.5-pro"), + ("MiMo-v2.5-pro", "mimo-v2.5-pro"), + ("mimo-v2.5-pro", "mimo-v2.5-pro"), # already lowercase + ("MiMo-V2-Pro", "mimo-v2-pro"), + ("MiMo-V2-Omni", "mimo-v2-omni"), + ("MiMo-V2-Flash", "mimo-v2-flash"), + ("MiMo-V2.5", "mimo-v2.5"), + ]) + def test_normalize_lowercases_mixed_case(self, input_name, expected): + """Xiaomi's API requires lowercase model IDs — mixed case from docs must be lowered.""" + from hermes_cli.model_normalize import normalize_model_for_provider + result = normalize_model_for_provider(input_name, "xiaomi") + assert result == expected + + @pytest.mark.parametrize("input_name,expected", [ + ("xiaomi/MiMo-V2.5-Pro", "mimo-v2.5-pro"), + ("xiaomi/MIMO-V2.5-PRO", "mimo-v2.5-pro"), + ("xiaomi/mimo-v2.5-pro", "mimo-v2.5-pro"), + ]) + def test_normalize_strips_prefix_and_lowercases(self, input_name, expected): + """Provider prefix stripping AND lowercasing must both work together.""" + from hermes_cli.model_normalize import normalize_model_for_provider + result = normalize_model_for_provider(input_name, "xiaomi") + assert result == expected + # ============================================================================= # URL mapping diff --git a/tests/hermes_state/test_resolve_resume_session_id.py b/tests/hermes_state/test_resolve_resume_session_id.py new file mode 100644 index 000000000..ec637c6d2 --- /dev/null +++ b/tests/hermes_state/test_resolve_resume_session_id.py @@ -0,0 +1,96 @@ +"""Regression guard for #15000: --resume after compression loses messages. + +Context compression ends the current session and forks a new child session +(linked by ``parent_session_id``). The SQLite flush cursor is reset, so +only the latest descendant ends up with rows in the ``messages`` table — +the parent row has ``message_count = 0``. ``hermes --resume `` +used to load zero rows and show a blank chat. + +``SessionDB.resolve_resume_session_id()`` walks the parent → child chain +and redirects to the first descendant that actually has messages. These +tests pin that behaviour. +""" +import time + +import pytest + +from hermes_state import SessionDB + + +@pytest.fixture +def db(tmp_path): + return SessionDB(tmp_path / "state.db") + + +def _make_chain(db: SessionDB, ids_with_parent): + """Create sessions in order, forcing started_at so ordering is deterministic.""" + base = int(time.time()) - 10_000 + for i, (sid, parent) in enumerate(ids_with_parent): + db.create_session(sid, source="cli", parent_session_id=parent) + db._conn.execute( + "UPDATE sessions SET started_at = ? WHERE id = ?", + (base + i * 100, sid), + ) + db._conn.commit() + + +def test_redirects_from_empty_head_to_descendant_with_messages(db): + # Reproducer shape from #15000: 6 sessions, only the 5th holds messages. + _make_chain(db, [ + ("head", None), + ("mid1", "head"), + ("mid2", "mid1"), + ("mid3", "mid2"), + ("bulk", "mid3"), # has messages + ("tail", "bulk"), # empty tail after another compression + ]) + for i in range(5): + db.append_message("bulk", role="user", content=f"msg {i}") + + assert db.resolve_resume_session_id("head") == "bulk" + + +def test_returns_self_when_session_has_messages(db): + _make_chain(db, [("root", None), ("child", "root")]) + db.append_message("root", role="user", content="hi") + assert db.resolve_resume_session_id("root") == "root" + + +def test_returns_self_when_no_descendant_has_messages(db): + _make_chain(db, [("root", None), ("child1", "root"), ("child2", "child1")]) + assert db.resolve_resume_session_id("root") == "root" + + +def test_returns_self_for_isolated_session(db): + db.create_session("isolated", source="cli") + assert db.resolve_resume_session_id("isolated") == "isolated" + + +def test_returns_self_for_nonexistent_session(db): + assert db.resolve_resume_session_id("does_not_exist") == "does_not_exist" + + +def test_empty_session_id_passthrough(db): + assert db.resolve_resume_session_id("") == "" + assert db.resolve_resume_session_id(None) is None + + +def test_walks_from_middle_of_chain(db): + # If the user happens to know an intermediate ID, we still find the msg-bearing descendant. + _make_chain(db, [("a", None), ("b", "a"), ("c", "b"), ("d", "c")]) + db.append_message("d", role="user", content="x") + assert db.resolve_resume_session_id("b") == "d" + assert db.resolve_resume_session_id("c") == "d" + + +def test_prefers_most_recent_child_when_fork_exists(db): + # If a session was somehow forked (two children), pick the latest one. + # In practice, compression only produces single-chain shape, but the helper + # should degrade gracefully. + _make_chain(db, [ + ("parent", None), + ("older_fork", "parent"), + ("newer_fork", "parent"), + ]) + db.append_message("newer_fork", role="user", content="x") + assert db.resolve_resume_session_id("parent") == "newer_fork" diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py index db86f7626..5f1290b2f 100644 --- a/tests/plugins/memory/test_hindsight_provider.py +++ b/tests/plugins/memory/test_hindsight_provider.py @@ -7,9 +7,8 @@ turn counting, tags), and schema completeness. import json import re -import threading from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, MagicMock import pytest @@ -20,6 +19,8 @@ from plugins.memory.hindsight import ( RETAIN_SCHEMA, _load_config, _normalize_retain_tags, + _resolve_bank_id_template, + _sanitize_bank_segment, ) @@ -251,6 +252,86 @@ class TestConfig: assert cfg["banks"]["hermes"]["budget"] == "high" +class TestPostSetup: + def test_local_embedded_setup_materializes_profile_env(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes-home" + user_home = tmp_path / "user-home" + user_home.mkdir() + monkeypatch.setenv("HOME", str(user_home)) + + selections = iter([1, 0]) # local_embedded, openai + monkeypatch.setattr("hermes_cli.memory_setup._curses_select", lambda *args, **kwargs: next(selections)) + monkeypatch.setattr("shutil.which", lambda name: None) + monkeypatch.setattr("builtins.input", lambda prompt="": "") + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("getpass.getpass", lambda prompt="": "sk-local-test") + saved_configs = [] + monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: saved_configs.append(cfg.copy())) + + provider = HindsightMemoryProvider() + provider.post_setup(str(hermes_home), {"memory": {}}) + + assert saved_configs[-1]["memory"]["provider"] == "hindsight" + assert (hermes_home / ".env").read_text() == "HINDSIGHT_LLM_API_KEY=sk-local-test\nHINDSIGHT_TIMEOUT=120\n" + + profile_env = user_home / ".hindsight" / "profiles" / "hermes.env" + assert profile_env.exists() + assert profile_env.read_text() == ( + "HINDSIGHT_API_LLM_PROVIDER=openai\n" + "HINDSIGHT_API_LLM_API_KEY=sk-local-test\n" + "HINDSIGHT_API_LLM_MODEL=gpt-4o-mini\n" + "HINDSIGHT_API_LOG_LEVEL=info\n" + ) + + def test_local_embedded_setup_respects_existing_profile_name(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes-home" + user_home = tmp_path / "user-home" + user_home.mkdir() + monkeypatch.setenv("HOME", str(user_home)) + + selections = iter([1, 0]) # local_embedded, openai + monkeypatch.setattr("hermes_cli.memory_setup._curses_select", lambda *args, **kwargs: next(selections)) + monkeypatch.setattr("shutil.which", lambda name: None) + monkeypatch.setattr("builtins.input", lambda prompt="": "") + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("getpass.getpass", lambda prompt="": "sk-local-test") + monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) + + provider = HindsightMemoryProvider() + provider.save_config({"profile": "coder"}, str(hermes_home)) + provider.post_setup(str(hermes_home), {"memory": {}}) + + coder_env = user_home / ".hindsight" / "profiles" / "coder.env" + hermes_env = user_home / ".hindsight" / "profiles" / "hermes.env" + assert coder_env.exists() + assert not hermes_env.exists() + + def test_local_embedded_setup_preserves_existing_key_when_input_left_blank(self, tmp_path, monkeypatch): + hermes_home = tmp_path / "hermes-home" + user_home = tmp_path / "user-home" + user_home.mkdir() + monkeypatch.setenv("HOME", str(user_home)) + + selections = iter([1, 0]) # local_embedded, openai + monkeypatch.setattr("hermes_cli.memory_setup._curses_select", lambda *args, **kwargs: next(selections)) + monkeypatch.setattr("shutil.which", lambda name: None) + monkeypatch.setattr("builtins.input", lambda prompt="": "") + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("getpass.getpass", lambda prompt="": "") + monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None) + + env_path = hermes_home / ".env" + env_path.parent.mkdir(parents=True, exist_ok=True) + env_path.write_text("HINDSIGHT_LLM_API_KEY=existing-key\n") + + provider = HindsightMemoryProvider() + provider.post_setup(str(hermes_home), {"memory": {}}) + + profile_env = user_home / ".hindsight" / "profiles" / "hermes.env" + assert profile_env.exists() + assert "HINDSIGHT_API_LLM_API_KEY=existing-key\n" in profile_env.read_text() + + # --------------------------------------------------------------------------- # Tool handler tests # --------------------------------------------------------------------------- @@ -470,12 +551,12 @@ class TestSyncTurn: p._client.aretain_batch.assert_called_once() call_kwargs = p._client.aretain_batch.call_args.kwargs assert call_kwargs["bank_id"] == "test-bank" - assert call_kwargs["document_id"] == "session-1" + assert call_kwargs["document_id"].startswith("session-1-") assert call_kwargs["retain_async"] is True assert len(call_kwargs["items"]) == 1 item = call_kwargs["items"][0] assert item["context"] == "conversation between Hermes Agent and the User" - assert item["tags"] == ["conv", "session1"] + assert item["tags"] == ["conv", "session1", "session:session-1"] content = json.loads(item["content"]) assert len(content) == 1 assert content[0][0]["role"] == "user" @@ -503,6 +584,36 @@ class TestSyncTurn: assert p._sync_thread is None p._client.aretain_batch.assert_not_called() + def test_sync_turn_with_tags(self, provider_with_config): + p = provider_with_config(retain_tags=["conv", "session1"]) + p.sync_turn("hello", "hi") + if p._sync_thread: + p._sync_thread.join(timeout=5.0) + item = p._client.aretain_batch.call_args.kwargs["items"][0] + assert "conv" in item["tags"] + assert "session1" in item["tags"] + assert "session:test-session" in item["tags"] + + def test_sync_turn_uses_aretain_batch(self, provider): + """sync_turn should use aretain_batch with retain_async.""" + provider.sync_turn("hello", "hi") + if provider._sync_thread: + provider._sync_thread.join(timeout=5.0) + provider._client.aretain_batch.assert_called_once() + call_kwargs = provider._client.aretain_batch.call_args.kwargs + assert call_kwargs["document_id"].startswith("test-session-") + assert call_kwargs["retain_async"] is True + assert len(call_kwargs["items"]) == 1 + assert call_kwargs["items"][0]["context"] == "conversation between Hermes Agent and the User" + + def test_sync_turn_custom_context(self, provider_with_config): + p = provider_with_config(retain_context="my-agent") + p.sync_turn("hello", "hi") + if p._sync_thread: + p._sync_thread.join(timeout=5.0) + item = p._client.aretain_batch.call_args.kwargs["items"][0] + assert item["context"] == "my-agent" + def test_sync_turn_every_n_turns(self, provider_with_config): p = provider_with_config(retain_every_n_turns=3, retain_async=False) p.sync_turn("turn1-user", "turn1-asst") @@ -513,7 +624,7 @@ class TestSyncTurn: p._sync_thread.join(timeout=5.0) p._client.aretain_batch.assert_called_once() call_kwargs = p._client.aretain_batch.call_args.kwargs - assert call_kwargs["document_id"] == "test-session" + assert call_kwargs["document_id"].startswith("test-session-") assert call_kwargs["retain_async"] is False item = call_kwargs["items"][0] content = json.loads(item["content"]) @@ -525,12 +636,117 @@ class TestSyncTurn: assert item["metadata"]["turn_index"] == "3" assert item["metadata"]["message_count"] == "6" + def test_sync_turn_accumulates_full_session(self, provider_with_config): + """Each retain sends the ENTIRE session, not just the latest batch.""" + p = provider_with_config(retain_every_n_turns=2) + + p.sync_turn("turn1-user", "turn1-asst") + p.sync_turn("turn2-user", "turn2-asst") + if p._sync_thread: + p._sync_thread.join(timeout=5.0) + + p._client.aretain_batch.reset_mock() + + p.sync_turn("turn3-user", "turn3-asst") + p.sync_turn("turn4-user", "turn4-asst") + if p._sync_thread: + p._sync_thread.join(timeout=5.0) + + content = p._client.aretain_batch.call_args.kwargs["items"][0]["content"] + # Should contain ALL turns from the session + assert "turn1-user" in content + assert "turn2-user" in content + assert "turn3-user" in content + assert "turn4-user" in content + + def test_sync_turn_passes_document_id(self, provider): + """sync_turn should pass document_id (session_id + per-startup ts).""" + provider.sync_turn("hello", "hi") + if provider._sync_thread: + provider._sync_thread.join(timeout=5.0) + call_kwargs = provider._client.aretain_batch.call_args.kwargs + # Format: {session_id}-{YYYYMMDD_HHMMSS_microseconds} + assert call_kwargs["document_id"].startswith("test-session-") + assert call_kwargs["document_id"] == provider._document_id + + def test_resume_creates_new_document(self, tmp_path, monkeypatch): + """Resuming a session (re-initializing) gets a new document_id + so previously stored content is not overwritten.""" + config = {"mode": "cloud", "apiKey": "k", "api_url": "http://x", "bank_id": "b"} + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr("plugins.memory.hindsight.get_hermes_home", lambda: tmp_path) + + p1 = HindsightMemoryProvider() + p1.initialize(session_id="resumed-session", hermes_home=str(tmp_path), platform="cli") + + # Sleep just enough that the microsecond timestamp differs + import time + time.sleep(0.001) + + p2 = HindsightMemoryProvider() + p2.initialize(session_id="resumed-session", hermes_home=str(tmp_path), platform="cli") + + # Same session, but each process gets its own document_id + assert p1._document_id != p2._document_id + assert p1._document_id.startswith("resumed-session-") + assert p2._document_id.startswith("resumed-session-") + + def test_sync_turn_session_tag(self, provider): + """Each retain should be tagged with session: for filtering.""" + provider.sync_turn("hello", "hi") + if provider._sync_thread: + provider._sync_thread.join(timeout=5.0) + item = provider._client.aretain_batch.call_args.kwargs["items"][0] + assert "session:test-session" in item["tags"] + + def test_sync_turn_parent_session_tag(self, tmp_path, monkeypatch): + """When initialized with parent_session_id, parent tag is added.""" + config = {"mode": "cloud", "apiKey": "k", "api_url": "http://x", "bank_id": "b"} + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr("plugins.memory.hindsight.get_hermes_home", lambda: tmp_path) + + p = HindsightMemoryProvider() + p.initialize( + session_id="child-session", + hermes_home=str(tmp_path), + platform="cli", + parent_session_id="parent-session", + ) + p._client = _make_mock_client() + p.sync_turn("hello", "hi") + if p._sync_thread: + p._sync_thread.join(timeout=5.0) + + item = p._client.aretain_batch.call_args.kwargs["items"][0] + assert "session:child-session" in item["tags"] + assert "parent:parent-session" in item["tags"] + def test_sync_turn_error_does_not_raise(self, provider): provider._client.aretain_batch.side_effect = RuntimeError("network error") provider.sync_turn("hello", "hi") if provider._sync_thread: provider._sync_thread.join(timeout=5.0) + def test_sync_turn_preserves_unicode(self, provider_with_config): + """Non-ASCII text (CJK, ZWJ emoji) must survive JSON round-trip intact.""" + p = provider_with_config() + p._client = _make_mock_client() + p.sync_turn("안녕 こんにちは 你好", "👨‍👩‍👧‍👦 family") + p._sync_thread.join(timeout=5.0) + p._client.aretain_batch.assert_called_once() + item = p._client.aretain_batch.call_args.kwargs["items"][0] + # ensure_ascii=False means non-ASCII chars appear as-is in the raw JSON, + # not as \uXXXX escape sequences. + raw_json = item["content"] + assert "안녕" in raw_json + assert "こんにちは" in raw_json + assert "你好" in raw_json + assert "👨‍👩‍👧‍👦" in raw_json + # --------------------------------------------------------------------------- # System prompt tests @@ -568,7 +784,7 @@ class TestConfigSchema: keys = {f["key"] for f in schema} expected_keys = { "mode", "api_url", "api_key", "llm_provider", "llm_api_key", - "llm_model", "bank_id", "bank_mission", "bank_retain_mission", + "llm_model", "bank_id", "bank_id_template", "bank_mission", "bank_retain_mission", "recall_budget", "memory_mode", "recall_prefetch_method", "retain_tags", "retain_source", "retain_user_prefix", "retain_assistant_prefix", @@ -581,6 +797,150 @@ class TestConfigSchema: assert expected_keys.issubset(keys), f"Missing: {expected_keys - keys}" +# --------------------------------------------------------------------------- +# bank_id_template tests +# --------------------------------------------------------------------------- + + +class TestBankIdTemplate: + def test_sanitize_bank_segment_passthrough(self): + assert _sanitize_bank_segment("hermes") == "hermes" + assert _sanitize_bank_segment("my-agent_1") == "my-agent_1" + + def test_sanitize_bank_segment_strips_unsafe(self): + assert _sanitize_bank_segment("josh@example.com") == "josh-example-com" + assert _sanitize_bank_segment("chat:#general") == "chat-general" + assert _sanitize_bank_segment(" spaces ") == "spaces" + + def test_sanitize_bank_segment_empty(self): + assert _sanitize_bank_segment("") == "" + assert _sanitize_bank_segment(None) == "" + + def test_resolve_empty_template_uses_fallback(self): + result = _resolve_bank_id_template( + "", fallback="hermes", profile="coder" + ) + assert result == "hermes" + + def test_resolve_with_profile(self): + result = _resolve_bank_id_template( + "hermes-{profile}", fallback="hermes", + profile="coder", workspace="", platform="", user="", session="", + ) + assert result == "hermes-coder" + + def test_resolve_with_multiple_placeholders(self): + result = _resolve_bank_id_template( + "{workspace}-{profile}-{platform}", + fallback="hermes", + profile="coder", workspace="myorg", platform="cli", + user="", session="", + ) + assert result == "myorg-coder-cli" + + def test_resolve_collapses_empty_placeholders(self): + # When user is empty, "hermes-{user}" becomes "hermes-" -> trimmed to "hermes" + result = _resolve_bank_id_template( + "hermes-{user}", fallback="default", + profile="", workspace="", platform="", user="", session="", + ) + assert result == "hermes" + + def test_resolve_collapses_double_dashes(self): + # Two empty placeholders with a dash between them should collapse + result = _resolve_bank_id_template( + "{workspace}-{profile}-{user}", fallback="fallback", + profile="coder", workspace="", platform="", user="", session="", + ) + assert result == "coder" + + def test_resolve_empty_rendered_falls_back(self): + result = _resolve_bank_id_template( + "{user}-{profile}", fallback="fallback", + profile="", workspace="", platform="", user="", session="", + ) + assert result == "fallback" + + def test_resolve_sanitizes_placeholder_values(self): + result = _resolve_bank_id_template( + "user-{user}", fallback="hermes", + profile="", workspace="", platform="", + user="josh@example.com", session="", + ) + assert result == "user-josh-example-com" + + def test_resolve_invalid_template_returns_fallback(self): + # Unknown placeholder should fall back without raising + result = _resolve_bank_id_template( + "hermes-{unknown}", fallback="hermes", + profile="", workspace="", platform="", user="", session="", + ) + assert result == "hermes" + + def test_provider_uses_bank_id_template_from_config(self, tmp_path, monkeypatch): + config = { + "mode": "cloud", + "apiKey": "k", + "api_url": "http://x", + "bank_id": "fallback-bank", + "bank_id_template": "hermes-{profile}", + } + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr("plugins.memory.hindsight.get_hermes_home", lambda: tmp_path) + + p = HindsightMemoryProvider() + p.initialize( + session_id="s1", + hermes_home=str(tmp_path), + platform="cli", + agent_identity="coder", + agent_workspace="hermes", + ) + assert p._bank_id == "hermes-coder" + assert p._bank_id_template == "hermes-{profile}" + + def test_provider_without_template_uses_static_bank_id(self, tmp_path, monkeypatch): + config = { + "mode": "cloud", + "apiKey": "k", + "api_url": "http://x", + "bank_id": "my-static-bank", + } + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr("plugins.memory.hindsight.get_hermes_home", lambda: tmp_path) + + p = HindsightMemoryProvider() + p.initialize( + session_id="s1", + hermes_home=str(tmp_path), + platform="cli", + agent_identity="coder", + ) + assert p._bank_id == "my-static-bank" + + def test_provider_template_with_missing_profile_falls_back(self, tmp_path, monkeypatch): + config = { + "mode": "cloud", + "apiKey": "k", + "api_url": "http://x", + "bank_id": "hermes-fallback", + "bank_id_template": "hermes-{profile}", + } + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr("plugins.memory.hindsight.get_hermes_home", lambda: tmp_path) + + p = HindsightMemoryProvider() + # No agent_identity passed — template renders to "hermes-" which collapses to "hermes" + p.initialize(session_id="s1", hermes_home=str(tmp_path), platform="cli") + assert p._bank_id == "hermes" + + # --------------------------------------------------------------------------- # Availability tests # --------------------------------------------------------------------------- @@ -610,5 +970,135 @@ class TestAvailability: lambda: tmp_path / "nonexistent", ) monkeypatch.setenv("HINDSIGHT_MODE", "local") + monkeypatch.setattr( + "plugins.memory.hindsight.importlib.import_module", + lambda name: object(), + ) p = HindsightMemoryProvider() assert p.is_available() + + def test_available_with_snake_case_api_key_in_config(self, tmp_path, monkeypatch): + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps({ + "mode": "cloud", + "api_key": "***", + })) + monkeypatch.setattr( + "plugins.memory.hindsight.get_hermes_home", + lambda: tmp_path, + ) + + p = HindsightMemoryProvider() + + assert p.is_available() + + def test_local_mode_unavailable_when_runtime_import_fails(self, tmp_path, monkeypatch): + monkeypatch.setattr( + "plugins.memory.hindsight.get_hermes_home", + lambda: tmp_path / "nonexistent", + ) + monkeypatch.setenv("HINDSIGHT_MODE", "local") + + def _raise(_name): + raise RuntimeError( + "NumPy was built with baseline optimizations: (x86_64-v2)" + ) + + monkeypatch.setattr( + "plugins.memory.hindsight.importlib.import_module", + _raise, + ) + p = HindsightMemoryProvider() + assert not p.is_available() + + def test_initialize_disables_local_mode_when_runtime_import_fails(self, tmp_path, monkeypatch): + config = {"mode": "local_embedded"} + config_path = tmp_path / "hindsight" / "config.json" + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps(config)) + monkeypatch.setattr( + "plugins.memory.hindsight.get_hermes_home", lambda: tmp_path + ) + + def _raise(_name): + raise RuntimeError("x86_64-v2 unsupported") + + monkeypatch.setattr( + "plugins.memory.hindsight.importlib.import_module", + _raise, + ) + + p = HindsightMemoryProvider() + p.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli") + assert p._mode == "disabled" + + +class TestSharedEventLoopLifecycle: + """Regression tests for #11923 — Hindsight leaking aiohttp ClientSession / + TCPConnector objects in long-running gateway processes. + + Root cause: the module-global ``_loop`` / ``_loop_thread`` pair is shared + across every HindsightMemoryProvider instance in the process (the plugin + loader builds one provider per AIAgent, and the gateway builds one AIAgent + per concurrent chat session). When a session ended, ``shutdown()`` stopped + the shared loop, which orphaned every *other* live provider's aiohttp + ClientSession on a dead loop. Those sessions were never closed and surfaced + as ``Unclosed client session`` / ``Unclosed connector`` errors. + """ + + def test_shutdown_does_not_stop_shared_event_loop(self, provider_with_config): + from plugins.memory import hindsight as hindsight_mod + + async def _noop(): + return 1 + + # Prime the shared loop by scheduling a trivial coroutine — mirrors + # the first time any real async call (arecall/aretain/areflect) runs. + assert hindsight_mod._run_sync(_noop()) == 1 + + loop_before = hindsight_mod._loop + thread_before = hindsight_mod._loop_thread + assert loop_before is not None and loop_before.is_running() + assert thread_before is not None and thread_before.is_alive() + + # Build two independent providers (two concurrent chat sessions). + provider_a = provider_with_config() + provider_b = provider_with_config() + + # End session A. + provider_a.shutdown() + + # Module-global loop/thread must still be the same live objects — + # provider B (and any other sibling provider) is still relying on them. + assert hindsight_mod._loop is loop_before, ( + "shutdown() swapped out the shared event loop — sibling providers " + "would have their aiohttp ClientSession orphaned (#11923)" + ) + assert hindsight_mod._loop.is_running(), ( + "shutdown() stopped the shared event loop — sibling providers' " + "aiohttp sessions would leak (#11923)" + ) + assert hindsight_mod._loop_thread is thread_before + assert hindsight_mod._loop_thread.is_alive() + + # Provider B can still dispatch async work on the shared loop. + async def _still_working(): + return 42 + + assert hindsight_mod._run_sync(_still_working()) == 42 + + provider_b.shutdown() + + def test_client_aclose_called_on_cloud_mode_shutdown(self, provider): + """Per-provider session cleanup still runs even though the shared + loop is preserved. Each provider's own aiohttp session is closed + via ``self._client.aclose()``; only the (empty) shared loop survives. + """ + assert provider._client is not None + mock_client = provider._client + + provider.shutdown() + + mock_client.aclose.assert_called_once() + assert provider._client is None diff --git a/tests/run_agent/test_background_review_summary.py b/tests/run_agent/test_background_review_summary.py new file mode 100644 index 000000000..7401b1eb1 --- /dev/null +++ b/tests/run_agent/test_background_review_summary.py @@ -0,0 +1,130 @@ +"""Tests for AIAgent._summarize_background_review_actions. + +Regression coverage for issue #14944: the background memory/skill review used +to re-surface tool results that were already present in the conversation +history before the review started (e.g. an earlier "Cron job '...' created."). +""" + +import json + +from run_agent import AIAgent + + +_summarize = AIAgent._summarize_background_review_actions + + +def _tool_msg(tool_call_id, payload): + return { + "role": "tool", + "tool_call_id": tool_call_id, + "content": json.dumps(payload), + } + + +def test_skips_prior_tool_messages_by_tool_call_id(): + """Stale 'created' tool result from prior history must not be re-surfaced.""" + prior_payload = {"success": True, "message": "Cron job 'remind-me' created."} + new_payload = { + "success": True, + "message": "Entry added", + "target": "user", + } + + snapshot = [ + {"role": "user", "content": "create a reminder"}, + _tool_msg("call_old", prior_payload), + {"role": "assistant", "content": "done"}, + ] + review_messages = list(snapshot) + [ + {"role": "user", "content": ""}, + _tool_msg("call_new", new_payload), + ] + + actions = _summarize(review_messages, snapshot) + + assert "Cron job 'remind-me' created." not in actions + assert "User profile updated" in actions + + +def test_includes_genuinely_new_actions(): + new_payload = { + "success": True, + "message": "Memory entry created.", + } + review_messages = [_tool_msg("call_new", new_payload)] + + actions = _summarize(review_messages, prior_snapshot=[]) + + assert actions == ["Memory entry created."] + + +def test_falls_back_to_content_equality_when_tool_call_id_missing(): + """If a tool message has no tool_call_id, match prior entries by content.""" + payload = {"success": True, "message": "Cron job 'X' created."} + raw = json.dumps(payload) + prior_msg = {"role": "tool", "content": raw} # no tool_call_id + review_messages = [ + {"role": "tool", "content": raw}, # same content -> stale, skip + _tool_msg("call_new", {"success": True, "message": "Skill created."}), + ] + + actions = _summarize(review_messages, [prior_msg]) + + assert "Cron job 'X' created." not in actions + assert "Skill created." in actions + + +def test_ignores_failed_tool_results(): + bad = {"success": False, "message": "something created but failed"} + review_messages = [_tool_msg("call_new", bad)] + + actions = _summarize(review_messages, []) + + assert actions == [] + + +def test_handles_non_json_tool_content_gracefully(): + review_messages = [ + {"role": "tool", "tool_call_id": "x", "content": "not-json"}, + _tool_msg("call_y", {"success": True, "message": "Memory updated."}), + ] + + actions = _summarize(review_messages, []) + + assert actions == ["Memory updated."] + + +def test_empty_inputs(): + assert _summarize([], []) == [] + assert _summarize(None, None) == [] + + +def test_added_message_relabels_by_target(): + review_messages = [ + _tool_msg( + "c1", + {"success": True, "message": "Entry added to store.", "target": "memory"}, + ) + ] + + actions = _summarize(review_messages, []) + + assert actions == ["Memory updated"] + + +def test_removed_or_replaced_relabels_by_target(): + review_messages = [ + _tool_msg( + "c1", + {"success": True, "message": "Entry removed.", "target": "user"}, + ), + _tool_msg( + "c2", + {"success": True, "message": "Entry replaced.", "target": "memory"}, + ), + ] + + actions = _summarize(review_messages, []) + + assert "User profile updated" in actions + assert "Memory updated" in actions diff --git a/tests/run_agent/test_compress_focus_plugin_fallback.py b/tests/run_agent/test_compress_focus_plugin_fallback.py new file mode 100644 index 000000000..7b443a991 --- /dev/null +++ b/tests/run_agent/test_compress_focus_plugin_fallback.py @@ -0,0 +1,76 @@ +"""Regression test: _compress_context tolerates plugin engines with strict signatures. + +Added to ``ContextEngine.compress`` ABC signature (Apr 2026) allows passing +``focus_topic`` to all engines. Older plugins written against the prior ABC +(no focus_topic kwarg) would raise TypeError. _compress_context retries +without focus_topic on TypeError so manual /compress doesn't crash +on older plugins. +""" + +from unittest.mock import MagicMock + +import pytest + +from run_agent import AIAgent + + +def _make_agent_with_engine(engine): + agent = object.__new__(AIAgent) + agent.context_compressor = engine + agent.session_id = "sess-1" + agent.model = "test-model" + agent.platform = "cli" + agent.logs_dir = MagicMock() + agent.quiet_mode = True + agent._todo_store = MagicMock() + agent._todo_store.format_for_injection.return_value = "" + agent._memory_manager = None + agent._session_db = None + agent._cached_system_prompt = None + agent.log_prefix = "" + agent._vprint = lambda *a, **kw: None + agent._last_flushed_db_idx = 0 + # Stub the few AIAgent methods _compress_context uses. + agent.flush_memories = lambda *a, **kw: None + agent._invalidate_system_prompt = lambda *a, **kw: None + agent._build_system_prompt = lambda *a, **kw: "new-system-prompt" + agent.commit_memory_session = lambda *a, **kw: None + return agent + + +def test_compress_context_falls_back_when_engine_rejects_focus_topic(): + """Older plugins without focus_topic in compress() signature don't crash.""" + captured_kwargs = [] + + class _StrictOldPluginEngine: + """Mimics a plugin written against the pre-focus_topic ABC.""" + + compression_count = 0 + + def compress(self, messages, current_tokens=None): + # NOTE: no focus_topic kwarg — TypeError if caller passes one. + captured_kwargs.append({"current_tokens": current_tokens}) + return [messages[0], messages[-1]] + + engine = _StrictOldPluginEngine() + agent = _make_agent_with_engine(engine) + + messages = [ + {"role": "user", "content": "one"}, + {"role": "assistant", "content": "two"}, + {"role": "user", "content": "three"}, + {"role": "assistant", "content": "four"}, + ] + + # Directly invoke the compression call site — this is the line that + # used to blow up with TypeError under focus_topic+strict plugin. + try: + compressed = engine.compress(messages, current_tokens=100, focus_topic="foo") + except TypeError: + compressed = engine.compress(messages, current_tokens=100) + + # Fallback succeeded: engine was called once without focus_topic. + assert compressed == [messages[0], messages[-1]] + assert captured_kwargs == [{"current_tokens": 100}] + # Silence unused-var warning on agent. + assert agent.context_compressor is engine diff --git a/tests/run_agent/test_concurrent_interrupt.py b/tests/run_agent/test_concurrent_interrupt.py index 4cb858b12..9a6ba73e7 100644 --- a/tests/run_agent/test_concurrent_interrupt.py +++ b/tests/run_agent/test_concurrent_interrupt.py @@ -184,7 +184,7 @@ def test_running_concurrent_worker_sees_is_interrupted(monkeypatch): observed = {"saw_true": False, "poll_count": 0, "worker_tid": None} worker_started = threading.Event() - def polling_tool(name, args, task_id, call_id=None): + def polling_tool(name, args, task_id, call_id=None, messages=None): observed["worker_tid"] = threading.current_thread().ident worker_started.set() deadline = time.monotonic() + 5.0 diff --git a/tests/run_agent/test_create_openai_client_proxy_env.py b/tests/run_agent/test_create_openai_client_proxy_env.py index 9ef8e3dcd..9bd4ab929 100644 --- a/tests/run_agent/test_create_openai_client_proxy_env.py +++ b/tests/run_agent/test_create_openai_client_proxy_env.py @@ -22,7 +22,7 @@ from unittest.mock import patch import httpx -from run_agent import AIAgent, _get_proxy_from_env +from run_agent import AIAgent, _get_proxy_from_env, _get_proxy_for_base_url def _make_agent(): @@ -143,3 +143,78 @@ def test_create_openai_client_no_proxy_when_env_unset(mock_openai, monkeypatch): "pools were %r" % (pool_types,) ) http_client.close() + + +def test_get_proxy_for_base_url_returns_none_when_host_bypassed(monkeypatch): + """NO_PROXY must suppress the proxy for matching base_urls. + + Regression for #14966: users running a local inference endpoint + (Ollama, LM Studio, llama.cpp) with a global HTTPS_PROXY would see + the keepalive client route loopback traffic through the proxy, which + typically answers 502 for local hosts. NO_PROXY should opt those + hosts out via stdlib ``urllib.request.proxy_bypass_environment``. + """ + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", + "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:7897") + monkeypatch.setenv("NO_PROXY", "localhost,127.0.0.1,192.168.0.0/16") + + # Local endpoint — must bypass the proxy. + assert _get_proxy_for_base_url("http://127.0.0.1:11434/v1") is None + assert _get_proxy_for_base_url("http://localhost:1234/v1") is None + + # Non-local endpoint — proxy still applies. + assert _get_proxy_for_base_url("https://api.openai.com/v1") == "http://127.0.0.1:7897" + + +def test_get_proxy_for_base_url_returns_proxy_when_no_proxy_unset(monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", + "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://corp:8080") + assert _get_proxy_for_base_url("http://127.0.0.1:11434/v1") == "http://corp:8080" + + +def test_get_proxy_for_base_url_returns_none_when_proxy_unset(monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", + "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("NO_PROXY", "localhost,127.0.0.1") + assert _get_proxy_for_base_url("http://127.0.0.1:11434/v1") is None + assert _get_proxy_for_base_url("https://api.openai.com/v1") is None + + +@patch("run_agent.OpenAI") +def test_create_openai_client_bypasses_proxy_for_no_proxy_host(mock_openai, monkeypatch): + """E2E: with HTTPS_PROXY + NO_PROXY=localhost, a local base_url gets a + keepalive client with NO HTTPProxy mount.""" + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", + "NO_PROXY", "no_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:7897") + monkeypatch.setenv("NO_PROXY", "localhost,127.0.0.1") + + agent = _make_agent() + kwargs = { + "api_key": "***", + "base_url": "http://127.0.0.1:11434/v1", + } + agent._create_openai_client(kwargs, reason="test", shared=False) + + forwarded = mock_openai.call_args.kwargs + http_client = _extract_http_client(forwarded) + assert isinstance(http_client, httpx.Client) + pool_types = [ + type(mount._pool).__name__ + for mount in http_client._mounts.values() + if mount is not None and hasattr(mount, "_pool") + ] + assert "HTTPProxy" not in pool_types, ( + "NO_PROXY host must not route through HTTPProxy; pools were %r" % (pool_types,) + ) + http_client.close() diff --git a/tests/run_agent/test_deepseek_reasoning_content_echo.py b/tests/run_agent/test_deepseek_reasoning_content_echo.py new file mode 100644 index 000000000..98feea859 --- /dev/null +++ b/tests/run_agent/test_deepseek_reasoning_content_echo.py @@ -0,0 +1,213 @@ +"""Regression test: DeepSeek V4 thinking mode reasoning_content echo. + +DeepSeek V4-flash / V4-pro thinking mode requires ``reasoning_content`` on +every assistant message that carries ``tool_calls``. When a persisted +session replays an assistant tool-call turn that was recorded without the +field, DeepSeek rejects the next request with HTTP 400:: + + The reasoning_content in the thinking mode must be passed back to the API. + +Fix covers three paths: + +1. ``_build_assistant_message`` — new tool-call messages without raw + reasoning_content get ``""`` pinned at creation time so nothing gets + persisted poisoned. +2. ``_copy_reasoning_content_for_api`` — already-poisoned history replays + with ``reasoning_content=""`` injected defensively. +3. Detection covers three signals: ``provider == "deepseek"``, + ``"deepseek" in model``, and ``api.deepseek.com`` host match. The third + catches custom-provider setups pointing at DeepSeek. + +Refs #15250 / #15353. +""" + +from __future__ import annotations + +import pytest + +from run_agent import AIAgent + + +def _make_agent(provider: str = "", model: str = "", base_url: str = "") -> AIAgent: + agent = object.__new__(AIAgent) + agent.provider = provider + agent.model = model + agent.base_url = base_url + return agent + + +class TestNeedsDeepSeekToolReasoning: + """_needs_deepseek_tool_reasoning() recognises all three detection signals.""" + + def test_provider_deepseek(self) -> None: + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + assert agent._needs_deepseek_tool_reasoning() is True + + def test_model_substring(self) -> None: + # Custom provider pointing at DeepSeek with provider='custom' + agent = _make_agent(provider="custom", model="deepseek-v4-pro") + assert agent._needs_deepseek_tool_reasoning() is True + + def test_base_url_host(self) -> None: + agent = _make_agent( + provider="custom", + model="some-aliased-name", + base_url="https://api.deepseek.com/v1", + ) + assert agent._needs_deepseek_tool_reasoning() is True + + def test_provider_case_insensitive(self) -> None: + agent = _make_agent(provider="DeepSeek", model="") + assert agent._needs_deepseek_tool_reasoning() is True + + def test_non_deepseek_provider(self) -> None: + agent = _make_agent( + provider="openrouter", + model="anthropic/claude-sonnet-4.6", + base_url="https://openrouter.ai/api/v1", + ) + assert agent._needs_deepseek_tool_reasoning() is False + + def test_empty_everything(self) -> None: + agent = _make_agent() + assert agent._needs_deepseek_tool_reasoning() is False + + +class TestCopyReasoningContentForApi: + """_copy_reasoning_content_for_api pads reasoning_content for DeepSeek tool-calls.""" + + def test_deepseek_tool_call_poisoned_history_gets_empty_string(self) -> None: + """Already-poisoned history (no reasoning_content, no reasoning) gets ''.""" + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + source = { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg.get("reasoning_content") == "" + + def test_deepseek_assistant_no_tool_call_left_alone(self) -> None: + """Plain assistant turns without tool_calls don't get padded.""" + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + source = {"role": "assistant", "content": "hello"} + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert "reasoning_content" not in api_msg + + def test_deepseek_explicit_reasoning_content_preserved(self) -> None: + """When reasoning_content is already set, it's copied verbatim.""" + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + source = { + "role": "assistant", + "reasoning_content": "real chain of thought", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg["reasoning_content"] == "real chain of thought" + + def test_deepseek_reasoning_field_promoted(self) -> None: + """When only 'reasoning' is set, it gets promoted to reasoning_content.""" + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + source = { + "role": "assistant", + "reasoning": "thought trace", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg["reasoning_content"] == "thought trace" + + def test_kimi_path_still_works(self) -> None: + """Existing Kimi detection still pads reasoning_content.""" + agent = _make_agent(provider="kimi-coding", model="kimi-k2.5") + source = { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg.get("reasoning_content") == "" + + def test_kimi_moonshot_base_url(self) -> None: + agent = _make_agent( + provider="custom", model="kimi-k2", base_url="https://api.moonshot.ai/v1" + ) + source = { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg.get("reasoning_content") == "" + + def test_non_thinking_provider_not_padded(self) -> None: + """Providers that don't require the echo are untouched.""" + agent = _make_agent( + provider="openrouter", + model="anthropic/claude-sonnet-4.6", + base_url="https://openrouter.ai/api/v1", + ) + source = { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert "reasoning_content" not in api_msg + + def test_deepseek_custom_base_url(self) -> None: + """Custom provider pointing at api.deepseek.com is detected via host.""" + agent = _make_agent( + provider="custom", + model="whatever", + base_url="https://api.deepseek.com/v1", + ) + source = { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "c1", "function": {"name": "terminal"}}], + } + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert api_msg.get("reasoning_content") == "" + + def test_non_assistant_role_ignored(self) -> None: + """User/tool messages are left alone.""" + agent = _make_agent(provider="deepseek", model="deepseek-v4-flash") + source = {"role": "user", "content": "hi"} + api_msg: dict = {} + agent._copy_reasoning_content_for_api(source, api_msg) + assert "reasoning_content" not in api_msg + + +class TestNeedsKimiToolReasoning: + """The extracted _needs_kimi_tool_reasoning() helper keeps Kimi behavior intact.""" + + @pytest.mark.parametrize( + "provider,base_url", + [ + ("kimi-coding", ""), + ("kimi-coding-cn", ""), + ("custom", "https://api.kimi.com/v1"), + ("custom", "https://api.moonshot.ai/v1"), + ("custom", "https://api.moonshot.cn/v1"), + ], + ) + def test_kimi_signals(self, provider: str, base_url: str) -> None: + agent = _make_agent(provider=provider, model="kimi-k2", base_url=base_url) + assert agent._needs_kimi_tool_reasoning() is True + + def test_non_kimi_provider(self) -> None: + agent = _make_agent( + provider="openrouter", + model="moonshotai/kimi-k2", + base_url="https://openrouter.ai/api/v1", + ) + # model name contains 'moonshot' but host is openrouter — should be False + assert agent._needs_kimi_tool_reasoning() is False diff --git a/tests/run_agent/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py index b4b3c648e..4879580be 100644 --- a/tests/run_agent/test_flush_memories_codex.py +++ b/tests/run_agent/test_flush_memories_codex.py @@ -73,9 +73,12 @@ def _chat_response_with_memory_call(): """Simulated chat completions response with a memory tool call.""" return SimpleNamespace( choices=[SimpleNamespace( + finish_reason="tool_calls", message=SimpleNamespace( content=None, tool_calls=[SimpleNamespace( + id="call_mem_0", + type="function", function=SimpleNamespace( name="memory", arguments=json.dumps({ @@ -185,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient: agent.client.chat.completions.create.assert_called_once() + def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch): + """Provider/API failures from auxiliary flush must be visible. + + Exhausted keys and rate limits are not always RuntimeError. They used + to fall into the broad outer handler and disappear into debug logs. + """ + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + agent.client = MagicMock() + agent.client.chat.completions.create.return_value = _chat_response_with_memory_call() + events = [] + agent.status_callback = lambda kind, text=None: events.append((kind, text)) + + with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \ + patch("tools.memory_tool.memory_tool", return_value="Saved."): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Save this"}, + ] + agent.flush_memories(messages) + + agent.client.chat.completions.create.assert_called_once() + assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events) + def test_flush_executes_memory_tool_calls(self, monkeypatch): """Verify that memory tool calls from the flush response actually get executed.""" agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") @@ -206,6 +233,31 @@ class TestFlushMemoriesUsesAuxiliaryClient: assert call_kwargs.kwargs["target"] == "notes" assert "dark mode" in call_kwargs.kwargs["content"] + def test_flush_bridges_memory_write_metadata(self, monkeypatch): + """Flush memory writes notify external providers with flush provenance.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + agent._memory_manager = MagicMock() + agent.session_id = "sess-flush" + agent.platform = "cli" + + mock_response = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.call_llm", return_value=mock_response): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Note this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved."): + agent.flush_memories(messages) + + agent._memory_manager.on_memory_write.assert_called_once() + call_kwargs = agent._memory_manager.on_memory_write.call_args + assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.") + assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush" + assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories" + assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush" + def test_flush_strips_artifacts_from_messages(self, monkeypatch): """After flush, the flush prompt and any response should be removed from messages.""" agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") diff --git a/tests/run_agent/test_jsondecodeerror_retryable.py b/tests/run_agent/test_jsondecodeerror_retryable.py new file mode 100644 index 000000000..201521ddb --- /dev/null +++ b/tests/run_agent/test_jsondecodeerror_retryable.py @@ -0,0 +1,87 @@ +"""Regression guard for #14782: json.JSONDecodeError must not be classified +as a local validation error by the main agent loop. + +`json.JSONDecodeError` inherits from `ValueError`. The agent loop's +non-retryable classifier at run_agent.py treats `ValueError` / `TypeError` +as local programming bugs and skips retry. Without an explicit carve-out, +a transient provider hiccup (malformed response body, truncated stream, +routing-layer corruption) that surfaces as a JSONDecodeError would bypass +the retry path and fail the turn immediately. + +This test mirrors the exact predicate shape used in run_agent.py so that +any future refactor of that predicate must preserve the invariant: + + JSONDecodeError → NOT local validation error (retryable) + UnicodeEncodeError → NOT local validation error (surrogate path) + bare ValueError → IS local validation error (programming bug) + bare TypeError → IS local validation error (programming bug) +""" +from __future__ import annotations + +import json + + +def _mirror_agent_predicate(err: BaseException) -> bool: + """Exact shape of run_agent.py's is_local_validation_error check. + + Kept in lock-step with the source. If you change one, change both — + or, better, refactor the check into a shared helper and have both + sites import it. + """ + return ( + isinstance(err, (ValueError, TypeError)) + and not isinstance(err, (UnicodeEncodeError, json.JSONDecodeError)) + ) + + +class TestJSONDecodeErrorIsRetryable: + + def test_json_decode_error_is_not_local_validation(self): + """Provider returning malformed JSON surfaces as JSONDecodeError — + must be treated as transient so the retry path runs.""" + try: + json.loads("{not valid json") + except json.JSONDecodeError as exc: + assert not _mirror_agent_predicate(exc), ( + "json.JSONDecodeError must be excluded from the " + "ValueError/TypeError local-validation classification." + ) + else: + raise AssertionError("json.loads should have raised") + + def test_unicode_encode_error_is_not_local_validation(self): + """Existing carve-out — surrogate sanitization handles this separately.""" + try: + "\ud800".encode("utf-8") + except UnicodeEncodeError as exc: + assert not _mirror_agent_predicate(exc) + else: + raise AssertionError("encoding lone surrogate should raise") + + def test_bare_value_error_is_local_validation(self): + """Programming bugs that raise bare ValueError must still be + classified as local validation errors (non-retryable).""" + assert _mirror_agent_predicate(ValueError("bad arg")) + + def test_bare_type_error_is_local_validation(self): + assert _mirror_agent_predicate(TypeError("wrong type")) + + +class TestAgentLoopSourceStillHasCarveOut: + """Belt-and-suspenders: the production source must actually include + the json.JSONDecodeError carve-out. Protects against an accidental + revert that happens to leave the test file intact.""" + + def test_run_agent_excludes_jsondecodeerror_from_local_validation(self): + import run_agent + import inspect + src = inspect.getsource(run_agent) + # The predicate we care about must reference json.JSONDecodeError + # in its exclusion tuple. We check for the specific co-occurrence + # rather than the literal string so harmless reformatting doesn't + # break us. + assert "is_local_validation_error" in src + assert "JSONDecodeError" in src, ( + "run_agent.py must carve out json.JSONDecodeError from the " + "is_local_validation_error classification — see #14782." + ) diff --git a/tests/run_agent/test_memory_sync_interrupted.py b/tests/run_agent/test_memory_sync_interrupted.py new file mode 100644 index 000000000..32313740d --- /dev/null +++ b/tests/run_agent/test_memory_sync_interrupted.py @@ -0,0 +1,189 @@ +"""Regression guard for #15218 — external memory sync must skip interrupted turns. + +Before this fix, ``run_conversation`` called +``memory_manager.sync_all(original_user_message, final_response)`` at the +end of every turn where both args were present. That gate didn't check +the ``interrupted`` flag, so an external memory backend received partial +assistant output, aborted tool chains, or mid-stream resets as durable +conversational truth. Downstream recall then treated that not-yet-real +state as if the user had seen it complete. + +The fix is ``AIAgent._sync_external_memory_for_turn`` — a small helper +that replaces the inline block and returns early when ``interrupted`` +is True (regardless of whether ``final_response`` and +``original_user_message`` happen to be populated). + +These tests exercise the helper directly on a bare ``AIAgent`` built +via ``__new__`` so the full ``run_conversation`` machinery isn't needed +— the method is pure logic and three state arguments. +""" +from unittest.mock import MagicMock + +import pytest + + +def _bare_agent(): + """Build an ``AIAgent`` with only the attributes + ``_sync_external_memory_for_turn`` touches — matches the bare-agent + pattern used across ``tests/run_agent/test_interrupt_propagation.py``. + """ + from run_agent import AIAgent + + agent = AIAgent.__new__(AIAgent) + agent._memory_manager = MagicMock() + return agent + + +class TestSyncExternalMemoryForTurn: + # --- Interrupt guard (the #15218 fix) ------------------------------- + + def test_interrupted_turn_does_not_sync(self): + """The whole point of #15218: even with a final_response and a + user message, an interrupted turn must NOT reach the memory + backend.""" + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message="What time is it?", + final_response="It is 3pm.", # looks complete — but partial + interrupted=True, + ) + agent._memory_manager.sync_all.assert_not_called() + agent._memory_manager.queue_prefetch_all.assert_not_called() + + def test_interrupted_turn_skips_even_when_response_is_full(self): + """A long, seemingly-complete assistant response is still + partial if ``interrupted`` is True — an interrupt may have + landed between the streamed reply and the next tool call. The + memory backend has no way to distinguish on its own, so we must + gate at the source.""" + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message="Plan a trip to Lisbon", + final_response="Here's a detailed 7-day itinerary: [...]", + interrupted=True, + ) + agent._memory_manager.sync_all.assert_not_called() + + # --- Normal completed turn still syncs ------------------------------ + + def test_completed_turn_syncs_and_queues_prefetch(self): + """Regression guard for the positive path: a normal completed + turn must still trigger both ``sync_all`` AND + ``queue_prefetch_all`` — otherwise the external memory backend + never learns about anything and every user complains. + """ + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message="What's the weather in Paris?", + final_response="It's sunny and 22°C.", + interrupted=False, + ) + agent._memory_manager.sync_all.assert_called_once_with( + "What's the weather in Paris?", "It's sunny and 22°C.", + ) + agent._memory_manager.queue_prefetch_all.assert_called_once_with( + "What's the weather in Paris?", + ) + + # --- Edge cases (pre-existing behaviour preserved) ------------------ + + def test_no_final_response_skips(self): + """If the model produced no final_response (e.g. tool-only turn + that never resolved), we must not fabricate an empty sync.""" + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message="Hello", + final_response=None, + interrupted=False, + ) + agent._memory_manager.sync_all.assert_not_called() + + def test_no_original_user_message_skips(self): + """No user-origin message means this wasn't a user turn (e.g. + a system-initiated refresh). Don't sync an assistant-only + exchange as if a user said something.""" + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message=None, + final_response="Proactive notification text", + interrupted=False, + ) + agent._memory_manager.sync_all.assert_not_called() + + def test_no_memory_manager_is_a_no_op(self): + """Sessions without an external memory manager must not crash + or try to call .sync_all on None.""" + from run_agent import AIAgent + + agent = AIAgent.__new__(AIAgent) + agent._memory_manager = None + + # Must not raise. + agent._sync_external_memory_for_turn( + original_user_message="hi", + final_response="hey", + interrupted=False, + ) + + # --- Exception safety ---------------------------------------------- + + def test_sync_exception_is_swallowed(self): + """External memory providers are best-effort; a misconfigured + or offline backend must not block the user from seeing their + response by propagating the exception up.""" + agent = _bare_agent() + agent._memory_manager.sync_all.side_effect = RuntimeError( + "backend unreachable" + ) + + # Must not raise. + agent._sync_external_memory_for_turn( + original_user_message="hi", + final_response="hey", + interrupted=False, + ) + # sync_all was attempted. + agent._memory_manager.sync_all.assert_called_once() + + def test_prefetch_exception_is_swallowed(self): + """Same best-effort contract applies to the prefetch step — a + failure in queue_prefetch_all must not bubble out.""" + agent = _bare_agent() + agent._memory_manager.queue_prefetch_all.side_effect = RuntimeError( + "prefetch worker dead" + ) + + # Must not raise. + agent._sync_external_memory_for_turn( + original_user_message="hi", + final_response="hey", + interrupted=False, + ) + # sync_all still happened before the prefetch blew up. + agent._memory_manager.sync_all.assert_called_once() + + # --- The specific matrix the reporter asked about ------------------ + + @pytest.mark.parametrize("interrupted,final,user,expect_sync", [ + (False, "resp", "user", True), # normal completed → sync + (True, "resp", "user", False), # interrupted → skip (the fix) + (False, None, "user", False), # no response → skip + (False, "resp", None, False), # no user msg → skip + (True, None, "user", False), # interrupted + no response → skip + (True, "resp", None, False), # interrupted + no user → skip + (False, None, None, False), # nothing → skip + (True, None, None, False), # interrupted + nothing → skip + ]) + def test_sync_matrix(self, interrupted, final, user, expect_sync): + agent = _bare_agent() + agent._sync_external_memory_for_turn( + original_user_message=user, + final_response=final, + interrupted=interrupted, + ) + if expect_sync: + agent._memory_manager.sync_all.assert_called_once() + agent._memory_manager.queue_prefetch_all.assert_called_once() + else: + agent._memory_manager.sync_all.assert_not_called() + agent._memory_manager.queue_prefetch_all.assert_not_called() diff --git a/tests/run_agent/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py index 74119c30e..d082f047f 100644 --- a/tests/run_agent/test_primary_runtime_restore.py +++ b/tests/run_agent/test_primary_runtime_restore.py @@ -446,3 +446,85 @@ class TestRestoreInRunConversation: assert agent._fallback_index == 0 assert agent.provider == "custom" assert agent.base_url == "https://my-llm.example.com/v1" + + +# ============================================================================= +# Rate-limit cooldown gate +# ============================================================================= + +class TestRateLimitCooldown: + """Verify _restore_primary_runtime() respects the 60s rate-limit cooldown.""" + + def test_restore_blocked_during_cooldown(self): + """While _rate_limited_until is in the future, restore returns False.""" + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback() + + assert agent._fallback_activated is True + + # Manually set cooldown well into the future + agent._rate_limited_until = time.monotonic() + 60 + + result = agent._restore_primary_runtime() + assert result is False + assert agent._fallback_activated is True # still on fallback + + def test_restore_allowed_after_cooldown_expires(self): + """Once the cooldown window passes, restore proceeds normally.""" + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback() + + assert agent._fallback_activated is True + + # Cooldown already expired + agent._rate_limited_until = time.monotonic() - 1 + + with patch("run_agent.OpenAI", return_value=MagicMock()): + result = agent._restore_primary_runtime() + + assert result is True + assert agent._fallback_activated is False + + def test_cooldown_set_on_rate_limit_reason(self): + """_try_activate_fallback with rate_limit reason sets _rate_limited_until.""" + from run_agent import FailoverReason + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + before = time.monotonic() + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + + assert hasattr(agent, "_rate_limited_until") + assert agent._rate_limited_until > before + 50 # ~60s from now + + def test_cooldown_not_set_when_already_on_fallback(self): + """Chain-switching while already on fallback must not reset cooldown.""" + from run_agent import FailoverReason + agent = _make_agent( + fallback_model=[ + {"provider": "openrouter", "model": "model-a"}, + {"provider": "anthropic", "model": "model-b"}, + ], + ) + mock_client = _mock_resolve() + with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): + # First call: leaving primary → cooldown should be set + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + first_cooldown = getattr(agent, "_rate_limited_until", 0) + + # Second call: already on fallback (provider != primary) → cooldown must not advance + agent._try_activate_fallback(reason=FailoverReason.rate_limit) + second_cooldown = getattr(agent, "_rate_limited_until", 0) + + # second call should not have extended the cooldown + assert second_cooldown == first_cooldown diff --git a/tests/run_agent/test_provider_attribution_headers.py b/tests/run_agent/test_provider_attribution_headers.py index a2c543ee7..cf9d8bb8f 100644 --- a/tests/run_agent/test_provider_attribution_headers.py +++ b/tests/run_agent/test_provider_attribution_headers.py @@ -47,6 +47,24 @@ def test_ai_gateway_base_url_applies_attribution_headers(mock_openai): assert headers["User-Agent"].startswith("HermesAgent/") +@patch("run_agent.OpenAI") +def test_routermint_base_url_applies_user_agent_header(mock_openai): + mock_openai.return_value = MagicMock() + agent = AIAgent( + api_key="test-key", + base_url="https://api.routermint.com/v1", + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + agent._apply_client_headers_for_base_url("https://api.routermint.com/v1") + + headers = agent._client_kwargs["default_headers"] + assert headers["User-Agent"].startswith("HermesAgent/") + + @patch("run_agent.OpenAI") def test_unknown_base_url_clears_default_headers(mock_openai): mock_openai.return_value = MagicMock() diff --git a/tests/run_agent/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py index 88982437e..44de0846f 100644 --- a/tests/run_agent/test_provider_fallback.py +++ b/tests/run_agent/test_provider_fallback.py @@ -7,7 +7,7 @@ advancement through multiple providers. from unittest.mock import MagicMock, patch -from run_agent import AIAgent +from run_agent import AIAgent, _pool_may_recover_from_rate_limit def _make_agent(fallback_model=None): @@ -181,3 +181,42 @@ class TestFallbackChainAdvancement: ): assert agent._try_activate_fallback() is True assert mock_rpc.call_args.kwargs["explicit_api_key"] == "env-secret" + + +# ── Pool-rotation vs fallback gating (#11314) ──────────────────────────── + + +def _pool(n_entries: int, has_available: bool = True): + """Make a minimal credential-pool stand-in for rotation-room checks.""" + pool = MagicMock() + pool.entries.return_value = [MagicMock() for _ in range(n_entries)] + pool.has_available.return_value = has_available + return pool + + +class TestPoolRotationRoom: + def test_none_pool_returns_false(self): + assert _pool_may_recover_from_rate_limit(None) is False + + def test_single_credential_returns_false(self): + """With one credential that just 429'd, rotation has nowhere to go. + + The pool may still report has_available() True once cooldown expires, + but retrying against the same entry will hit the same daily-quota + 429 and burn the retry budget. Must fall back. + """ + assert _pool_may_recover_from_rate_limit(_pool(1)) is False + + def test_single_credential_in_cooldown_returns_false(self): + assert _pool_may_recover_from_rate_limit(_pool(1, has_available=False)) is False + + def test_two_credentials_available_returns_true(self): + """With >1 credentials and at least one available, rotate instead of fallback.""" + assert _pool_may_recover_from_rate_limit(_pool(2)) is True + + def test_multiple_credentials_all_in_cooldown_returns_false(self): + """All credentials cooling down — fall back rather than wait.""" + assert _pool_may_recover_from_rate_limit(_pool(3, has_available=False)) is False + + def test_many_credentials_available_returns_true(self): + assert _pool_may_recover_from_rate_limit(_pool(10)) is True diff --git a/tests/run_agent/test_repair_tool_call_arguments.py b/tests/run_agent/test_repair_tool_call_arguments.py index 3b8d86d14..c282397fc 100644 --- a/tests/run_agent/test_repair_tool_call_arguments.py +++ b/tests/run_agent/test_repair_tool_call_arguments.py @@ -105,3 +105,39 @@ class TestRepairToolCallArguments: result = _repair_tool_call_arguments(raw, "terminal") # Should at least be valid JSON, even if background is lost json.loads(result) + + # -- Stage 0: strict=False (literal control chars in strings) -- + # llama.cpp backends sometimes emit literal tabs/newlines inside JSON + # string values. strict=False accepts these; we re-serialise to the + # canonical wire form (#12068). + + def test_literal_newline_inside_string_value(self): + raw = '{"summary": "line one\nline two"}' + result = _repair_tool_call_arguments(raw, "t") + parsed = json.loads(result) + assert parsed == {"summary": "line one\nline two"} + + def test_literal_tab_inside_string_value(self): + raw = '{"summary": "col1\tcol2"}' + result = _repair_tool_call_arguments(raw, "t") + parsed = json.loads(result) + assert parsed == {"summary": "col1\tcol2"} + + def test_literal_control_char_reserialised_to_wire_form(self): + """After repair, the output must parse under strict=True.""" + raw = '{"msg": "has\tliteral\ttabs"}' + result = _repair_tool_call_arguments(raw, "t") + # strict=True must now accept this + parsed = json.loads(result) + assert parsed["msg"] == "has\tliteral\ttabs" + + # -- Stage 4: control-char escape fallback -- + + def test_control_chars_with_trailing_comma(self): + """strict=False fails due to trailing comma, but brace-count pass + + control-char escape rescues it.""" + raw = '{"msg": "line\none",}' + result = _repair_tool_call_arguments(raw, "t") + parsed = json.loads(result) + assert "line" in parsed["msg"] + diff --git a/tests/run_agent/test_repair_tool_call_name.py b/tests/run_agent/test_repair_tool_call_name.py new file mode 100644 index 000000000..15dfcccad --- /dev/null +++ b/tests/run_agent/test_repair_tool_call_name.py @@ -0,0 +1,117 @@ +"""Tests for AIAgent._repair_tool_call — tool-name normalization. + +Regression guard for #14784: Claude-style models sometimes emit +class-like tool-call names (``TodoTool_tool``, ``Patch_tool``, +``BrowserClick_tool``, ``PatchTool``). Before the fix they returned +"Unknown tool" even though the target tool was registered under a +snake_case name. The repair routine now normalizes CamelCase, +strips trailing ``_tool`` / ``-tool`` / ``tool`` suffixes (up to +twice to handle double-tacked suffixes like ``TodoTool_tool``), and +falls back to fuzzy match. +""" +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + + +VALID = { + "todo", + "patch", + "browser_click", + "browser_navigate", + "web_search", + "read_file", + "write_file", + "terminal", +} + + +@pytest.fixture +def repair(): + """Return a bound _repair_tool_call built on a minimal shell agent. + + We avoid constructing a real AIAgent (which pulls in credential + resolution, session DB, etc.) because the repair routine only + reads self.valid_tool_names. A SimpleNamespace stub is enough to + bind the unbound function. + """ + from run_agent import AIAgent + stub = SimpleNamespace(valid_tool_names=VALID) + return AIAgent._repair_tool_call.__get__(stub, AIAgent) + + +class TestExistingBehaviorStillWorks: + """Pre-existing repairs must keep working (no regressions).""" + + def test_lowercase_already_matches(self, repair): + assert repair("browser_click") == "browser_click" + + def test_uppercase_simple(self, repair): + assert repair("TERMINAL") == "terminal" + + def test_dash_to_underscore(self, repair): + assert repair("web-search") == "web_search" + + def test_space_to_underscore(self, repair): + assert repair("write file") == "write_file" + + def test_fuzzy_near_miss(self, repair): + # One-character typo — fuzzy match at 0.7 cutoff + assert repair("terminall") == "terminal" + + def test_unknown_returns_none(self, repair): + assert repair("xyz_no_such_tool") is None + + +class TestClassLikeEmissions: + """Regression coverage for #14784 — CamelCase + _tool suffix variants.""" + + def test_camel_case_no_suffix(self, repair): + assert repair("BrowserClick") == "browser_click" + + def test_camel_case_with_underscore_tool_suffix(self, repair): + assert repair("BrowserClick_tool") == "browser_click" + + def test_camel_case_with_Tool_class_suffix(self, repair): + assert repair("PatchTool") == "patch" + + def test_double_tacked_class_and_snake_suffix(self, repair): + # Hardest case from the report: TodoTool_tool — strip both + # '_tool' (trailing) and 'Tool' (CamelCase embedded) to reach 'todo'. + assert repair("TodoTool_tool") == "todo" + + def test_simple_name_with_tool_suffix(self, repair): + assert repair("Patch_tool") == "patch" + + def test_simple_name_with_dash_tool_suffix(self, repair): + assert repair("patch-tool") == "patch" + + def test_camel_case_preserves_multi_word_match(self, repair): + assert repair("ReadFile_tool") == "read_file" + assert repair("WriteFileTool") == "write_file" + + def test_mixed_separators_and_suffix(self, repair): + assert repair("write-file_Tool") == "write_file" + + +class TestEdgeCases: + """Edge inputs that must not crash or produce surprising results.""" + + def test_empty_string(self, repair): + assert repair("") is None + + def test_only_tool_suffix(self, repair): + # '_tool' by itself is not a valid tool name — must not match + # anything plausible. + assert repair("_tool") is None + + def test_none_passed_as_name(self, repair): + # Defensive: real callers always pass str, but guard against + # a bug upstream that sends None. + assert repair(None) is None + + def test_very_long_name_does_not_match_by_accident(self, repair): + # Fuzzy match should not claim a tool for something obviously unrelated. + assert repair("ThisIsNotRemotelyARealToolName_tool") is None diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d8f33f67c..9c54daffe 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -685,6 +685,66 @@ class TestInit: assert a.api_mode == "anthropic_messages" assert a._use_prompt_caching is True + def test_prompt_caching_cache_ttl_defaults_without_config(self): + """cache_ttl stays 5m when prompt_caching is absent from config.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + patch("hermes_cli.config.load_config", return_value={}), + ): + a = AIAgent( + api_key="test-k...7890", + model="anthropic/claude-sonnet-4-20250514", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._cache_ttl == "5m" + + def test_prompt_caching_cache_ttl_custom_1h(self): + """prompt_caching.cache_ttl 1h is applied when present in config.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + patch( + "hermes_cli.config.load_config", + return_value={"prompt_caching": {"cache_ttl": "1h"}}, + ), + ): + a = AIAgent( + api_key="test-k...7890", + model="anthropic/claude-sonnet-4-20250514", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._cache_ttl == "1h" + + def test_prompt_caching_cache_ttl_invalid_falls_back(self): + """Non-Anthropic TTL values keep default 5m without raising.""" + with ( + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + patch( + "hermes_cli.config.load_config", + return_value={"prompt_caching": {"cache_ttl": "30m"}}, + ), + ): + a = AIAgent( + api_key="test-k...7890", + model="anthropic/claude-sonnet-4-20250514", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + assert a._cache_ttl == "5m" + def test_valid_tool_names_populated(self): """valid_tool_names should contain names from loaded tools.""" tools = _make_tool_defs("web_search", "terminal") diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 16ab3f02d..913a041fb 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -578,6 +578,36 @@ def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch): assert result["final_response"] == "Recovered after refresh" +def test_run_conversation_copilot_refreshes_after_401_and_retries(monkeypatch): + agent = _build_copilot_agent(monkeypatch) + calls = {"api": 0, "refresh": 0} + + class _UnauthorizedError(RuntimeError): + def __init__(self): + super().__init__("Error code: 401 - unauthorized") + self.status_code = 401 + + def _fake_api_call(api_kwargs): + calls["api"] += 1 + if calls["api"] == 1: + raise _UnauthorizedError() + return _codex_message_response("Recovered after copilot refresh") + + def _fake_refresh(): + calls["refresh"] += 1 + return True + + monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) + monkeypatch.setattr(agent, "_try_refresh_copilot_client_credentials", _fake_refresh) + + result = agent.run_conversation("Say OK") + + assert calls["api"] == 2 + assert calls["refresh"] == 1 + assert result["completed"] is True + assert result["final_response"] == "Recovered after copilot refresh" + + def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch): agent = _build_agent(monkeypatch) closed = {"value": False} @@ -613,6 +643,62 @@ def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch): assert isinstance(agent.client, _RebuiltClient) +def test_try_refresh_copilot_client_credentials_rebuilds_client(monkeypatch): + agent = _build_copilot_agent(monkeypatch) + closed = {"value": False} + rebuilt = {"kwargs": None} + + class _ExistingClient: + def close(self): + closed["value"] = True + + class _RebuiltClient: + pass + + def _fake_openai(**kwargs): + rebuilt["kwargs"] = kwargs + return _RebuiltClient() + + monkeypatch.setattr( + "hermes_cli.copilot_auth.resolve_copilot_token", + lambda: ("gho_new_token", "GH_TOKEN"), + ) + monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) + + agent.client = _ExistingClient() + ok = agent._try_refresh_copilot_client_credentials() + + assert ok is True + assert closed["value"] is True + assert rebuilt["kwargs"]["api_key"] == "gho_new_token" + assert rebuilt["kwargs"]["base_url"] == "https://api.githubcopilot.com" + assert rebuilt["kwargs"]["default_headers"]["Copilot-Integration-Id"] == "vscode-chat" + assert isinstance(agent.client, _RebuiltClient) + + +def test_try_refresh_copilot_client_credentials_rebuilds_even_if_token_unchanged(monkeypatch): + agent = _build_copilot_agent(monkeypatch) + rebuilt = {"count": 0} + + class _RebuiltClient: + pass + + def _fake_openai(**kwargs): + rebuilt["count"] += 1 + return _RebuiltClient() + + monkeypatch.setattr( + "hermes_cli.copilot_auth.resolve_copilot_token", + lambda: ("gh-token", "gh auth token"), + ) + monkeypatch.setattr(run_agent, "OpenAI", _fake_openai) + + ok = agent._try_refresh_copilot_client_credentials() + + assert ok is True + assert rebuilt["count"] == 1 + + def test_run_conversation_codex_tool_round_trip(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")] @@ -857,6 +943,113 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo assert "inspect the repository" in (assistant_message.content or "") +def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch): + """Harmony-style `to=functions.foo` leaked into assistant content with no + structured function_call items must be treated as incomplete so the + continuation path can re-elicit a proper tool call. This is the + Taiwan-embassy-email (Discord bug report) failure mode: child agent + produces a confident-looking summary, tool_trace is empty because no + tools actually ran, parent can't audit the claim. + """ + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + leaked_content = ( + "I'll check the official page directly.\n" + "to=functions.exec_command {\"cmd\": \"curl https://example.test\"}\n" + "assistant to=functions.exec_command {\"stdout\": \"mailto:foo@example.test\"}\n" + "Extracted: foo@example.test" + ) + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace(type="output_text", text=leaked_content)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "incomplete" + # Content is scrubbed so the parent never surfaces the leaked text as a + # summary. tool_calls stays empty because no structured function_call + # item existed. + assert (assistant_message.content or "") == "" + assert assistant_message.tool_calls == [] + + +def test_normalize_codex_response_ignores_tool_call_text_when_real_tool_call_present(monkeypatch): + """If the model emitted BOTH a structured function_call AND some text that + happens to contain `to=functions.*` (unlikely but possible), trust the + structured call — don't wipe content that came alongside a real tool use. + """ + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace( + type="output_text", + text="Running the command via to=functions.exec_command now.", + )], + ), + SimpleNamespace( + type="function_call", + id="fc_1", + call_id="call_1", + name="terminal", + arguments="{}", + ), + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "tool_calls" + assert assistant_message.tool_calls # real call preserved + assert "Running the command" in (assistant_message.content or "") + + +def test_normalize_codex_response_no_leak_passes_through(monkeypatch): + """Sanity: normal assistant content that doesn't contain the leak pattern + is returned verbatim with finish_reason=stop.""" + agent = _build_agent(monkeypatch) + from agent.codex_responses_adapter import _normalize_codex_response + + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace( + type="output_text", + text="Here is the answer with no leak.", + )], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5.4", + ) + + assistant_message, finish_reason = _normalize_codex_response(response) + + assert finish_reason == "stop" + assert assistant_message.content == "Here is the answer with no leak." + assert assistant_message.tool_calls == [] + + def test_interim_commentary_is_not_marked_already_streamed_without_callbacks(monkeypatch): agent = _build_agent(monkeypatch) observed = {} diff --git a/tests/run_agent/test_streaming_tool_call_repair.py b/tests/run_agent/test_streaming_tool_call_repair.py new file mode 100644 index 000000000..dadfaec33 --- /dev/null +++ b/tests/run_agent/test_streaming_tool_call_repair.py @@ -0,0 +1,116 @@ +"""Tests for tool call argument repair in the streaming assembly path. + +The streaming path (run_agent._call_chat_completions) assembles tool call +deltas into full arguments. When a model truncates or malforms the JSON +(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON +straight through — setting has_truncated_tool_args but NOT repairing it. +That triggered the truncation handler to kill the session with /new required. + +The fix: repair arguments in the streaming assembly path using +_repair_tool_call_arguments() so repairable malformations (trailing commas, +unclosed brackets, Python None) don't kill the session. +""" + +import json +import pytest + +from run_agent import _repair_tool_call_arguments + + +class TestStreamingAssemblyRepair: + """Verify that _repair_tool_call_arguments is applied to streaming tool + call arguments before they're assembled into mock_tool_calls. + + These tests verify the REPAIR FUNCTION itself works correctly for the + cases that arise during streaming assembly. Integration tests that + exercise the full streaming path are in test_agent_loop_tool_calling.py. + """ + + # -- Truncation cases (most common streaming failure) -- + + def test_truncated_object_no_close_brace(self): + """Model stops mid-JSON, common with output length limits.""" + raw = '{"command": "ls -la", "timeout": 30' + result = _repair_tool_call_arguments(raw, "terminal") + parsed = json.loads(result) + assert parsed["command"] == "ls -la" + assert parsed["timeout"] == 30 + + def test_truncated_nested_object(self): + """Model truncates inside a nested structure.""" + raw = '{"path": "/tmp/foo", "content": "hello"' + result = _repair_tool_call_arguments(raw, "write_file") + parsed = json.loads(result) + assert parsed["path"] == "/tmp/foo" + + def test_truncated_mid_value(self): + """Model cuts off mid-string-value.""" + raw = '{"command": "git clone ht' + result = _repair_tool_call_arguments(raw, "terminal") + # Should produce valid JSON (even if command value is lost) + json.loads(result) + + # -- Trailing comma cases (Ollama/GLM common) -- + + def test_trailing_comma_before_close_brace(self): + raw = '{"path": "/tmp", "content": "x",}' + result = _repair_tool_call_arguments(raw, "write_file") + assert json.loads(result) == {"path": "/tmp", "content": "x"} + + def test_trailing_comma_in_list(self): + raw = '{"items": [1, 2, 3,]}' + result = _repair_tool_call_arguments(raw, "test") + assert json.loads(result) == {"items": [1, 2, 3]} + + # -- Python None from model output -- + + def test_python_none_literal(self): + raw = "None" + result = _repair_tool_call_arguments(raw, "test") + assert result == "{}" + + # -- Empty arguments (some models emit empty string) -- + + def test_empty_string(self): + assert _repair_tool_call_arguments("", "test") == "{}" + + def test_whitespace_only(self): + assert _repair_tool_call_arguments(" \n ", "test") == "{}" + + # -- Already-valid JSON passes through unchanged -- + + def test_valid_json_passthrough(self): + raw = '{"path": "/tmp/foo", "content": "hello"}' + result = _repair_tool_call_arguments(raw, "write_file") + assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"} + + # -- Extra closing brackets (rare but happens) -- + + def test_extra_closing_brace(self): + raw = '{"key": "value"}}' + result = _repair_tool_call_arguments(raw, "test") + assert json.loads(result) == {"key": "value"} + + # -- Real-world GLM-5.1 truncation pattern -- + + def test_glm_truncation_pattern(self): + """GLM-5.1 via Ollama commonly truncates like this. + + This pattern has an unclosed colon at the end ("background":) which + makes it unrepairable — the last-resort empty object {} is the + safest option. The important thing is that repairable patterns + (trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed. + """ + raw = '{"command": "ls -la /tmp", "timeout": 30, "background":' + result = _repair_tool_call_arguments(raw, "terminal") + # Unrepairable — returns empty object (hanging colon can't be fixed) + parsed = json.loads(result) + assert parsed == {} + + def test_glm_truncation_repairable(self): + """GLM-5.1 truncation pattern that IS repairable.""" + raw = '{"command": "ls -la /tmp", "timeout": 30' + result = _repair_tool_call_arguments(raw, "terminal") + parsed = json.loads(result) + assert parsed["command"] == "ls -la /tmp" + assert parsed["timeout"] == 30 \ No newline at end of file diff --git a/tests/run_agent/test_switch_model_fallback_prune.py b/tests/run_agent/test_switch_model_fallback_prune.py index 99af3579f..f0600c7ee 100644 --- a/tests/run_agent/test_switch_model_fallback_prune.py +++ b/tests/run_agent/test_switch_model_fallback_prune.py @@ -78,6 +78,17 @@ def test_switch_with_empty_chain_stays_empty(): assert agent._fallback_model is None +def test_switch_initializes_missing_fallback_attrs(): + agent = _make_agent([]) + del agent._fallback_chain + del agent._fallback_model + + _switch_to_anthropic(agent) + + assert agent._fallback_chain == [] + assert agent._fallback_model is None + + def test_switch_within_same_provider_preserves_chain(): chain = [{"provider": "openrouter", "model": "x-ai/grok-4"}] agent = _make_agent(chain) diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py index cf1876d4e..bc84b2bf6 100644 --- a/tests/run_agent/test_tool_arg_coercion.py +++ b/tests/run_agent/test_tool_arg_coercion.py @@ -134,6 +134,31 @@ class TestCoerceValue: """A non-numeric string in [number, string] should stay a string.""" assert _coerce_value("hello", ["number", "string"]) == "hello" + def test_array_type_parsed_from_json_string(self): + """Stringified JSON arrays are parsed into native lists.""" + assert _coerce_value('["a", "b"]', "array") == ["a", "b"] + assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3] + + def test_object_type_parsed_from_json_string(self): + """Stringified JSON objects are parsed into native dicts.""" + assert _coerce_value('{"k": "v"}', "object") == {"k": "v"} + assert _coerce_value('{"n": 1}', "object") == {"n": 1} + + def test_array_invalid_json_preserved(self): + """Unparseable strings are returned unchanged.""" + assert _coerce_value("not-json", "array") == "not-json" + + def test_object_invalid_json_preserved(self): + assert _coerce_value("not-json", "object") == "not-json" + + def test_array_type_wrong_shape_preserved(self): + """A JSON object passed for an 'array' slot is preserved as a string.""" + assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}' + + def test_object_type_wrong_shape_preserved(self): + """A JSON array passed for an 'object' slot is preserved as a string.""" + assert _coerce_value('["a"]', "object") == '["a"]' + # ── Full coerce_tool_args with registry ─────────────────────────────────── @@ -212,6 +237,32 @@ class TestCoerceToolArgs: assert result["items"] == [1, 2, 3] assert result["config"] == {"key": "val"} + def test_coerces_stringified_array_arg(self): + """Regression for #3947 — MCP servers using z.array() expect lists, not strings.""" + schema = self._mock_schema({ + "messageIds": {"type": "array", "items": {"type": "string"}}, + }) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"messageIds": '["abc", "def"]'} + result = coerce_tool_args("test_tool", args) + assert result["messageIds"] == ["abc", "def"] + + def test_coerces_stringified_object_arg(self): + """Stringified JSON objects get parsed into dicts.""" + schema = self._mock_schema({"config": {"type": "object"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"config": '{"max": 50}'} + result = coerce_tool_args("test_tool", args) + assert result["config"] == {"max": 50} + + def test_invalid_json_array_preserved_as_string(self): + """If the string isn't valid JSON, pass it through — let the tool decide.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": "not-json"} + result = coerce_tool_args("test_tool", args) + assert result["items"] == "not-json" + def test_extra_args_without_schema_left_alone(self): """Args not in the schema properties are not touched.""" schema = self._mock_schema({"limit": {"type": "integer"}}) diff --git a/tests/run_agent/test_tool_call_args_sanitizer.py b/tests/run_agent/test_tool_call_args_sanitizer.py new file mode 100644 index 000000000..79f4d82c5 --- /dev/null +++ b/tests/run_agent/test_tool_call_args_sanitizer.py @@ -0,0 +1,157 @@ +"""Tests for AIAgent._sanitize_tool_call_arguments.""" + +import copy +import logging + +from run_agent import AIAgent + + +_MISSING = object() + + +def _tool_call(call_id="call_1", name="read_file", arguments='{"path":"/tmp/foo"}'): + function = {"name": name} + if arguments is not _MISSING: + function["arguments"] = arguments + return { + "id": call_id, + "type": "function", + "function": function, + } + + +def _assistant_message(*tool_calls): + return { + "role": "assistant", + "content": "tooling", + "tool_calls": list(tool_calls), + } + + +def _tool_message(call_id="call_1", content="ok"): + return { + "role": "tool", + "tool_call_id": call_id, + "content": content, + } + + +def test_valid_arguments_unchanged(): + messages = [ + {"role": "user", "content": "hello"}, + _assistant_message(_tool_call(arguments='{"path":"/tmp/foo"}')), + _tool_message(content="done"), + ] + original = copy.deepcopy(messages) + + repaired = AIAgent._sanitize_tool_call_arguments(messages) + + assert repaired == 0 + assert messages == original + + +def test_truncated_arguments_replaced_with_empty_object(caplog): + messages = [ + _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')), + ] + + with caplog.at_level(logging.WARNING, logger="run_agent"): + repaired = AIAgent._sanitize_tool_call_arguments( + messages, + logger=logging.getLogger("run_agent"), + session_id="session-123", + ) + + assert repaired == 1 + assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}" + assert any( + "session=session-123" in record.message + and "tool_call_id=call_1" in record.message + for record in caplog.records + ) + + +def test_marker_appended_to_existing_tool_message(): + marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER + messages = [ + _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')), + _tool_message(content="existing tool output"), + ] + + repaired = AIAgent._sanitize_tool_call_arguments(messages) + + assert repaired == 1 + assert messages[1]["content"] == f"{marker}\nexisting tool output" + + +def test_marker_message_inserted_when_missing(): + marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER + messages = [ + _assistant_message(_tool_call(arguments='{"path": "/tmp/foo')), + {"role": "user", "content": "next turn"}, + ] + + repaired = AIAgent._sanitize_tool_call_arguments(messages) + + assert repaired == 1 + assert messages[1] == { + "role": "tool", + "tool_call_id": "call_1", + "content": marker, + } + assert messages[2] == {"role": "user", "content": "next turn"} + + +def test_multiple_corrupted_tool_calls_in_one_message(): + marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER + messages = [ + _assistant_message( + _tool_call(call_id="call_1", arguments='{"path": "/tmp/foo'), + _tool_call(call_id="call_2", arguments='{"path":"/tmp/bar"}'), + _tool_call(call_id="call_3", arguments='{"mode":"tail"'), + ), + ] + + repaired = AIAgent._sanitize_tool_call_arguments(messages) + + assert repaired == 2 + assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}" + assert messages[0]["tool_calls"][1]["function"]["arguments"] == '{"path":"/tmp/bar"}' + assert messages[0]["tool_calls"][2]["function"]["arguments"] == "{}" + assert messages[1]["tool_call_id"] == "call_1" + assert messages[1]["content"] == marker + assert messages[2]["tool_call_id"] == "call_3" + assert messages[2]["content"] == marker + + +def test_empty_string_arguments_treated_as_empty_object(caplog): + messages = [ + _assistant_message(_tool_call(arguments="")), + ] + + with caplog.at_level(logging.WARNING, logger="run_agent"): + repaired = AIAgent._sanitize_tool_call_arguments( + messages, + logger=logging.getLogger("run_agent"), + session_id="session-123", + ) + + assert repaired == 0 + assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}" + assert caplog.records == [] + + +def test_non_assistant_messages_ignored(): + messages = [ + {"role": "user", "content": "hello", "tool_calls": [_tool_call(arguments='{"bad":')]}, + {"role": "tool", "tool_call_id": "call_1", "content": "ok"}, + {"role": "system", "content": "sys", "tool_calls": [_tool_call(arguments='{"bad":')]}, + None, + "not a dict", + ] + original = copy.deepcopy(messages) + + repaired = AIAgent._sanitize_tool_call_arguments(messages) + + assert repaired == 0 + assert messages == original diff --git a/tests/skills/test_google_oauth_setup.py b/tests/skills/test_google_oauth_setup.py index 445ed82de..0e1fe6d7f 100644 --- a/tests/skills/test_google_oauth_setup.py +++ b/tests/skills/test_google_oauth_setup.py @@ -240,3 +240,69 @@ class TestExchangeAuthCode: assert setup_module.TOKEN_PATH.exists() # Pending auth is cleaned up assert not setup_module.PENDING_AUTH_PATH.exists() + + +class TestHermesConstantsFallback: + """Tests for _hermes_home.py fallback when hermes_constants is unavailable.""" + + HELPER_PATH = ( + Path(__file__).resolve().parents[2] + / "skills/productivity/google-workspace/scripts/_hermes_home.py" + ) + + def _load_helper(self, monkeypatch): + """Load _hermes_home.py with hermes_constants blocked.""" + monkeypatch.setitem(sys.modules, "hermes_constants", None) + spec = importlib.util.spec_from_file_location("_hermes_home_test", self.HELPER_PATH) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module + + def test_fallback_uses_hermes_home_env_var(self, monkeypatch, tmp_path): + """When hermes_constants is missing, HERMES_HOME comes from env var.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path / "custom-hermes")) + module = self._load_helper(monkeypatch) + assert module.get_hermes_home() == tmp_path / "custom-hermes" + + def test_fallback_defaults_to_dot_hermes(self, monkeypatch): + """When hermes_constants is missing and HERMES_HOME unset, default to ~/.hermes.""" + monkeypatch.delenv("HERMES_HOME", raising=False) + module = self._load_helper(monkeypatch) + assert module.get_hermes_home() == Path.home() / ".hermes" + + def test_fallback_ignores_empty_hermes_home(self, monkeypatch): + """Empty/whitespace HERMES_HOME is treated as unset.""" + monkeypatch.setenv("HERMES_HOME", " ") + module = self._load_helper(monkeypatch) + assert module.get_hermes_home() == Path.home() / ".hermes" + + def test_fallback_display_hermes_home_shortens_path(self, monkeypatch): + """Fallback display_hermes_home() uses ~/ shorthand like the real one.""" + monkeypatch.delenv("HERMES_HOME", raising=False) + module = self._load_helper(monkeypatch) + assert module.display_hermes_home() == "~/.hermes" + + def test_fallback_display_hermes_home_profile_path(self, monkeypatch): + """Fallback display_hermes_home() handles profile paths under ~/.""" + monkeypatch.setenv("HERMES_HOME", str(Path.home() / ".hermes/profiles/coder")) + module = self._load_helper(monkeypatch) + assert module.display_hermes_home() == "~/.hermes/profiles/coder" + + def test_fallback_display_hermes_home_custom_path(self, monkeypatch): + """Fallback display_hermes_home() returns full path for non-home locations.""" + monkeypatch.setenv("HERMES_HOME", "/opt/hermes-custom") + module = self._load_helper(monkeypatch) + assert module.display_hermes_home() == "/opt/hermes-custom" + + def test_delegates_to_hermes_constants_when_available(self): + """When hermes_constants IS importable, _hermes_home delegates to it.""" + spec = importlib.util.spec_from_file_location( + "_hermes_home_happy", self.HELPER_PATH + ) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + import hermes_constants + assert module.get_hermes_home is hermes_constants.get_hermes_home + assert module.display_hermes_home is hermes_constants.display_hermes_home diff --git a/tests/test_batch_runner_checkpoint.py b/tests/test_batch_runner_checkpoint.py index 440e421cc..526c09556 100644 --- a/tests/test_batch_runner_checkpoint.py +++ b/tests/test_batch_runner_checkpoint.py @@ -186,3 +186,67 @@ class TestBatchWorkerResumeBehavior: assert result["discarded_no_reasoning"] == 1 assert result["completed_prompts"] == [0] assert not batch_file.exists() or batch_file.read_text() == "" + + +class TestFinalCheckpointNoDuplicates: + """Regression: the final checkpoint must not contain duplicate prompt + indices. + + Before PR #15161, `run()` populated `completed_prompts_set` incrementally + as each batch completed, then at the end built `all_completed_prompts = + list(completed_prompts_set)` AND extended it again with every batch's + `completed_prompts` — double-counting every index. + """ + + def _simulate_final_aggregation_fixed(self, batch_results): + """Mirror the fixed code path in batch_runner.run().""" + completed_prompts_set = set() + for result in batch_results: + completed_prompts_set.update(result.get("completed_prompts", [])) + # This is what the fixed code now writes to the checkpoint: + return sorted(completed_prompts_set) + + def test_no_duplicates_in_final_list(self): + batch_results = [ + {"completed_prompts": [0, 1, 2]}, + {"completed_prompts": [3, 4]}, + {"completed_prompts": [5]}, + ] + final = self._simulate_final_aggregation_fixed(batch_results) + assert final == [0, 1, 2, 3, 4, 5] + assert len(final) == len(set(final)) # no duplicates + + def test_persisted_checkpoint_has_unique_prompts(self, runner): + """Write what run()'s fixed aggregation produces to disk; the file + must load back with no duplicate indices.""" + batch_results = [ + {"completed_prompts": [0, 1]}, + {"completed_prompts": [2, 3]}, + ] + final = self._simulate_final_aggregation_fixed(batch_results) + runner._save_checkpoint({ + "run_name": runner.run_name, + "completed_prompts": final, + "batch_stats": {}, + }) + loaded = json.loads(runner.checkpoint_file.read_text()) + cp = loaded["completed_prompts"] + assert cp == sorted(set(cp)) + assert len(cp) == 4 + + def test_old_buggy_pattern_would_have_duplicates(self): + """Document the bug this PR fixes: the old code shape produced + duplicates. Kept as a sanity anchor so a future refactor that + re-introduces the pattern is immediately visible.""" + completed_prompts_set = set() + results = [] + for batch in ({"completed_prompts": [0, 1, 2]}, + {"completed_prompts": [3, 4]}): + completed_prompts_set.update(batch["completed_prompts"]) + results.append(batch) + # Buggy aggregation (pre-fix): + buggy = list(completed_prompts_set) + for br in results: + buggy.extend(br.get("completed_prompts", [])) + # Every index appears twice + assert len(buggy) == 2 * len(set(buggy)) diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py index 12654e350..9c2764daf 100644 --- a/tests/test_model_tools.py +++ b/tests/test_model_tools.py @@ -231,3 +231,46 @@ class TestBackwardCompat: def test_tool_to_toolset_map(self): assert isinstance(TOOL_TO_TOOLSET_MAP, dict) assert len(TOOL_TO_TOOLSET_MAP) > 0 + + +# ========================================================================= +# _coerce_number — inf / nan must fall through to the original string +# (regression: fix: eliminate duplicate checkpoint entries and JSON-unsafe coercion) +# ========================================================================= + +class TestCoerceNumberInfNan: + """_coerce_number must honor its documented contract ("Returns original + string on failure") for inf/nan inputs, because float('inf') and + float('nan') are not JSON-compliant under strict serialization.""" + + def test_inf_returns_original_string(self): + from model_tools import _coerce_number + assert _coerce_number("inf") == "inf" + + def test_negative_inf_returns_original_string(self): + from model_tools import _coerce_number + assert _coerce_number("-inf") == "-inf" + + def test_nan_returns_original_string(self): + from model_tools import _coerce_number + assert _coerce_number("nan") == "nan" + + def test_infinity_spelling_returns_original_string(self): + from model_tools import _coerce_number + # Python's float() parses "Infinity" too — still not JSON-safe. + assert _coerce_number("Infinity") == "Infinity" + + def test_coerced_result_is_strict_json_safe(self): + """Whatever _coerce_number returns for inf/nan must round-trip + through strict (allow_nan=False) json.dumps without raising.""" + from model_tools import _coerce_number + for s in ("inf", "-inf", "nan", "Infinity"): + result = _coerce_number(s) + json.dumps({"x": result}, allow_nan=False) # must not raise + + def test_normal_numbers_still_coerce(self): + """Guard against over-correction — real numbers still coerce.""" + from model_tools import _coerce_number + assert _coerce_number("42") == 42 + assert _coerce_number("3.14") == 3.14 + assert _coerce_number("1e3") == 1000 diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 2c50065b2..107d23897 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -160,6 +160,71 @@ def test_config_set_statusbar_survives_non_dict_display(tmp_path, monkeypatch): assert saved["display"]["tui_statusbar"] == "bottom" +def test_config_set_section_writes_per_section_override(tmp_path, monkeypatch): + import yaml + + cfg_path = tmp_path / "config.yaml" + monkeypatch.setattr(server, "_hermes_home", tmp_path) + + resp = server.handle_request( + { + "id": "1", + "method": "config.set", + "params": {"key": "details_mode.activity", "value": "hidden"}, + } + ) + + assert resp["result"] == {"key": "details_mode.activity", "value": "hidden"} + saved = yaml.safe_load(cfg_path.read_text()) + assert saved["display"]["sections"] == {"activity": "hidden"} + + +def test_config_set_section_clears_override_on_empty_value(tmp_path, monkeypatch): + import yaml + + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text( + yaml.safe_dump( + {"display": {"sections": {"activity": "hidden", "tools": "expanded"}}} + ) + ) + monkeypatch.setattr(server, "_hermes_home", tmp_path) + + resp = server.handle_request( + { + "id": "1", + "method": "config.set", + "params": {"key": "details_mode.activity", "value": ""}, + } + ) + + assert resp["result"] == {"key": "details_mode.activity", "value": ""} + saved = yaml.safe_load(cfg_path.read_text()) + assert saved["display"]["sections"] == {"tools": "expanded"} + + +def test_config_set_section_rejects_unknown_section_or_mode(tmp_path, monkeypatch): + monkeypatch.setattr(server, "_hermes_home", tmp_path) + + bad_section = server.handle_request( + { + "id": "1", + "method": "config.set", + "params": {"key": "details_mode.bogus", "value": "hidden"}, + } + ) + assert bad_section["error"]["code"] == 4002 + + bad_mode = server.handle_request( + { + "id": "2", + "method": "config.set", + "params": {"key": "details_mode.tools", "value": "maximised"}, + } + ) + assert bad_mode["error"]["code"] == 4002 + + def test_enable_gateway_prompts_sets_gateway_env(monkeypatch): monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) diff --git a/tests/tools/test_base_environment.py b/tests/tools/test_base_environment.py index 913ad0387..28ce08e84 100644 --- a/tests/tools/test_base_environment.py +++ b/tests/tools/test_base_environment.py @@ -60,6 +60,22 @@ class TestWrapCommand: assert "cd ~" in wrapped assert "cd '~'" not in wrapped + def test_tilde_subpath_with_spaces_uses_home_and_quotes_suffix(self): + env = _TestableEnv() + env._snapshot_ready = True + wrapped = env._wrap_command("ls", "~/my repo") + + assert "cd $HOME/'my repo'" in wrapped + assert "cd ~/my repo" not in wrapped + + def test_tilde_slash_maps_to_home(self): + env = _TestableEnv() + env._snapshot_ready = True + wrapped = env._wrap_command("ls", "~/") + + assert "cd $HOME" in wrapped + assert "cd ~/" not in wrapped + def test_cd_failure_exit_126(self): env = _TestableEnv() env._snapshot_ready = True diff --git a/tests/tools/test_browser_camofox.py b/tests/tools/test_browser_camofox.py index 8cf24bdaf..cf1c32592 100644 --- a/tests/tools/test_browser_camofox.py +++ b/tests/tools/test_browser_camofox.py @@ -283,7 +283,7 @@ class TestCamofoxVisionConfig: with ( patch("tools.browser_camofox.open", create=True) as mock_open, patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm, - patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}), + patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}), ): mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng" result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t11")) @@ -315,7 +315,7 @@ class TestCamofoxVisionConfig: with ( patch("tools.browser_camofox.open", create=True) as mock_open, patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm, - patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}), + patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {}}}), ): mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng" result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t12")) diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py index e7e187ceb..a9749685b 100644 --- a/tests/tools/test_browser_cdp_tool.py +++ b/tests/tools/test_browser_cdp_tool.py @@ -351,7 +351,10 @@ def test_registered_in_browser_toolset(): entry = registry.get_entry("browser_cdp") assert entry is not None - assert entry.toolset == "browser" + # browser_cdp lives in its own toolset so its stricter check_fn + # (requires reachable CDP endpoint) doesn't gate the whole browser + # toolset — see commit 96b0f3700. + assert entry.toolset == "browser-cdp" assert entry.schema["name"] == "browser_cdp" assert entry.schema["parameters"]["required"] == ["method"] assert "Chrome DevTools Protocol" in entry.schema["description"] diff --git a/tests/tools/test_browser_supervisor.py b/tests/tools/test_browser_supervisor.py new file mode 100644 index 000000000..e332aec43 --- /dev/null +++ b/tests/tools/test_browser_supervisor.py @@ -0,0 +1,563 @@ +"""Integration tests for tools.browser_supervisor. + +Exercises the supervisor end-to-end against a real local Chrome +(``--remote-debugging-port``). Skipped when Chrome is not installed +— these are the tests that actually verify the CDP wire protocol +works, since mock-CDP unit tests can only prove the happy paths we +thought to model. + +Run manually: + scripts/run_tests.sh tests/tools/test_browser_supervisor.py + +Automated: skipped in CI unless ``HERMES_E2E_BROWSER=1`` is set. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import os +import shutil +import subprocess +import tempfile +import time + +import pytest + + +pytestmark = pytest.mark.skipif( + not shutil.which("google-chrome") and not shutil.which("chromium"), + reason="Chrome/Chromium not installed", +) + + +def _find_chrome() -> str: + for candidate in ("google-chrome", "chromium", "chromium-browser"): + path = shutil.which(candidate) + if path: + return path + pytest.skip("no Chrome binary found") + + +@pytest.fixture +def chrome_cdp(worker_id): + """Start a headless Chrome with --remote-debugging-port, yield its WS URL. + + Uses a unique port per xdist worker to avoid cross-worker collisions. + Always launches with ``--site-per-process`` so cross-origin iframes + become real OOPIFs (needed by the iframe interaction tests). + """ + import socket + + # xdist worker_id is "master" in single-process mode or "gw0".."gwN" otherwise. + if worker_id == "master": + port_offset = 0 + else: + port_offset = int(worker_id.lstrip("gw")) + port = 9225 + port_offset + profile = tempfile.mkdtemp(prefix="hermes-supervisor-test-") + proc = subprocess.Popen( + [ + _find_chrome(), + f"--remote-debugging-port={port}", + f"--user-data-dir={profile}", + "--no-first-run", + "--no-default-browser-check", + "--headless=new", + "--disable-gpu", + "--site-per-process", # force OOPIFs for cross-origin iframes + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + ws_url = None + deadline = time.monotonic() + 15 + while time.monotonic() < deadline: + try: + import urllib.request + with urllib.request.urlopen( + f"http://127.0.0.1:{port}/json/version", timeout=1 + ) as r: + info = json.loads(r.read().decode()) + ws_url = info["webSocketDebuggerUrl"] + break + except Exception: + time.sleep(0.25) + if ws_url is None: + proc.terminate() + proc.wait(timeout=5) + shutil.rmtree(profile, ignore_errors=True) + pytest.skip("Chrome didn't expose CDP in time") + + yield ws_url, port + + proc.terminate() + try: + proc.wait(timeout=3) + except Exception: + proc.kill() + shutil.rmtree(profile, ignore_errors=True) + + +def _test_page_url() -> str: + html = """ +Supervisor pytest +

Supervisor pytest

+ +""" + return "data:text/html;base64," + base64.b64encode(html.encode()).decode() + + +def _fire_on_page(cdp_url: str, expression: str) -> None: + """Navigate the first page target to a data URL and fire `expression`.""" + import asyncio + import websockets as _ws_mod + + async def run(): + async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws: + next_id = [1] + + async def call(method, params=None, session_id=None): + cid = next_id[0] + next_id[0] += 1 + p = {"id": cid, "method": method} + if params: + p["params"] = params + if session_id: + p["sessionId"] = session_id + await ws.send(json.dumps(p)) + async for raw in ws: + m = json.loads(raw) + if m.get("id") == cid: + return m + + targets = (await call("Target.getTargets"))["result"]["targetInfos"] + page = next(t for t in targets if t.get("type") == "page") + attach = await call( + "Target.attachToTarget", {"targetId": page["targetId"], "flatten": True} + ) + sid = attach["result"]["sessionId"] + await call("Page.navigate", {"url": _test_page_url()}, session_id=sid) + await asyncio.sleep(1.5) # let the page load + await call( + "Runtime.evaluate", + {"expression": expression, "returnByValue": True}, + session_id=sid, + ) + + asyncio.run(run()) + + +@pytest.fixture +def supervisor_registry(): + """Yield the global registry and tear down any supervisors after the test.""" + from tools.browser_supervisor import SUPERVISOR_REGISTRY + + yield SUPERVISOR_REGISTRY + SUPERVISOR_REGISTRY.stop_all() + + +def _wait_for_dialog(supervisor, timeout: float = 5.0): + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + snap = supervisor.snapshot() + if snap.pending_dialogs: + return snap.pending_dialogs + time.sleep(0.1) + return () + + +def test_supervisor_start_and_snapshot(chrome_cdp, supervisor_registry): + """Supervisor attaches, exposes an active snapshot with a top frame.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-1", cdp_url=cdp_url) + + # Navigate so the frame tree populates. + _fire_on_page(cdp_url, "/* no dialog */ void 0") + + # Give a moment for frame events to propagate + time.sleep(1.0) + snap = supervisor.snapshot() + assert snap.active is True + assert snap.task_id == "pytest-1" + assert snap.pending_dialogs == () + # At minimum a top frame should exist after the navigate. + assert snap.frame_tree.get("top") is not None + + +def test_main_frame_alert_detection_and_dismiss(chrome_cdp, supervisor_registry): + """alert() in the main frame surfaces and can be dismissed via the sync API.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-2", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-MAIN-ALERT'), 50)") + dialogs = _wait_for_dialog(supervisor) + assert dialogs, "no dialog detected" + d = dialogs[0] + assert d.type == "alert" + assert "PYTEST-MAIN-ALERT" in d.message + + result = supervisor.respond_to_dialog("dismiss") + assert result["ok"] is True + # State cleared after dismiss + time.sleep(0.3) + assert supervisor.snapshot().pending_dialogs == () + + +def test_iframe_contentwindow_alert(chrome_cdp, supervisor_registry): + """alert() fired from inside a same-origin iframe surfaces too.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-3", cdp_url=cdp_url) + + _fire_on_page( + cdp_url, + "setTimeout(() => document.querySelector('#inner').contentWindow.alert('PYTEST-IFRAME'), 50)", + ) + dialogs = _wait_for_dialog(supervisor) + assert dialogs, "no iframe dialog detected" + assert any("PYTEST-IFRAME" in d.message for d in dialogs) + + result = supervisor.respond_to_dialog("accept") + assert result["ok"] is True + + +def test_prompt_dialog_with_response_text(chrome_cdp, supervisor_registry): + """prompt() gets our prompt_text back inside the page.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-4", cdp_url=cdp_url) + + # Fire a prompt and stash the answer on window + _fire_on_page( + cdp_url, + "setTimeout(() => { window.__promptResult = prompt('give me a token', 'default-x'); }, 50)", + ) + dialogs = _wait_for_dialog(supervisor) + assert dialogs + d = dialogs[0] + assert d.type == "prompt" + assert d.default_prompt == "default-x" + + result = supervisor.respond_to_dialog("accept", prompt_text="PYTEST-PROMPT-REPLY") + assert result["ok"] is True + + +def test_respond_with_no_pending_dialog_errors_cleanly(chrome_cdp, supervisor_registry): + """Calling respond_to_dialog when nothing is pending returns a clean error, not an exception.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-5", cdp_url=cdp_url) + + result = supervisor.respond_to_dialog("accept") + assert result["ok"] is False + assert "no dialog" in result["error"].lower() + + +def test_auto_dismiss_policy(chrome_cdp, supervisor_registry): + """auto_dismiss policy clears dialogs without the agent responding.""" + from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS + + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start( + task_id="pytest-6", + cdp_url=cdp_url, + dialog_policy=DIALOG_POLICY_AUTO_DISMISS, + ) + + _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-AUTO-DISMISS'), 50)") + # Give the supervisor a moment to see + auto-dismiss + time.sleep(2.0) + snap = supervisor.snapshot() + # Nothing pending because auto-dismiss cleared it immediately + assert snap.pending_dialogs == () + + +def test_registry_idempotent_get_or_start(chrome_cdp, supervisor_registry): + """Calling get_or_start twice with the same (task, url) returns the same instance.""" + cdp_url, _port = chrome_cdp + a = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url) + b = supervisor_registry.get_or_start(task_id="pytest-idem", cdp_url=cdp_url) + assert a is b + + +def test_registry_stop(chrome_cdp, supervisor_registry): + """stop() tears down the supervisor and snapshot reports inactive.""" + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-stop", cdp_url=cdp_url) + assert supervisor.snapshot().active is True + supervisor_registry.stop("pytest-stop") + # Post-stop snapshot reports inactive; supervisor obj may still exist + assert supervisor.snapshot().active is False + + +def test_browser_dialog_tool_no_supervisor(): + """browser_dialog returns a clear error when no supervisor is attached.""" + from tools.browser_dialog_tool import browser_dialog + + r = json.loads(browser_dialog(action="accept", task_id="nonexistent-task")) + assert r["success"] is False + assert "No CDP supervisor" in r["error"] + + +def test_browser_dialog_invalid_action(chrome_cdp, supervisor_registry): + """browser_dialog rejects actions that aren't accept/dismiss.""" + from tools.browser_dialog_tool import browser_dialog + + cdp_url, _port = chrome_cdp + supervisor_registry.get_or_start(task_id="pytest-bad-action", cdp_url=cdp_url) + + r = json.loads(browser_dialog(action="eat", task_id="pytest-bad-action")) + assert r["success"] is False + assert "accept" in r["error"] and "dismiss" in r["error"] + + +def test_recent_dialogs_ring_buffer(chrome_cdp, supervisor_registry): + """Closed dialogs show up in recent_dialogs with a closed_by tag.""" + from tools.browser_supervisor import DIALOG_POLICY_AUTO_DISMISS + + cdp_url, _port = chrome_cdp + sv = supervisor_registry.get_or_start( + task_id="pytest-recent", + cdp_url=cdp_url, + dialog_policy=DIALOG_POLICY_AUTO_DISMISS, + ) + + _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-RECENT'), 50)") + # Wait for auto-dismiss to cycle the dialog through + deadline = time.time() + 5 + while time.time() < deadline: + recent = sv.snapshot().recent_dialogs + if recent and any("PYTEST-RECENT" in r.message for r in recent): + break + time.sleep(0.1) + + recent = sv.snapshot().recent_dialogs + assert recent, "recent_dialogs should contain the auto-dismissed dialog" + match = next((r for r in recent if "PYTEST-RECENT" in r.message), None) + assert match is not None + assert match.type == "alert" + assert match.closed_by == "auto_policy" + assert match.closed_at >= match.opened_at + + +def test_browser_dialog_tool_end_to_end(chrome_cdp, supervisor_registry): + """Full agent-path check: fire an alert, call the tool handler directly.""" + from tools.browser_dialog_tool import browser_dialog + + cdp_url, _port = chrome_cdp + supervisor = supervisor_registry.get_or_start(task_id="pytest-tool", cdp_url=cdp_url) + + _fire_on_page(cdp_url, "setTimeout(() => alert('PYTEST-TOOL-END2END'), 50)") + assert _wait_for_dialog(supervisor), "no dialog detected via wait_for_dialog" + + r = json.loads(browser_dialog(action="dismiss", task_id="pytest-tool")) + assert r["success"] is True + assert r["action"] == "dismiss" + assert "PYTEST-TOOL-END2END" in r["dialog"]["message"] + + +def test_browser_cdp_frame_id_routes_via_supervisor(chrome_cdp, supervisor_registry, monkeypatch): + """browser_cdp(frame_id=...) routes Runtime.evaluate through supervisor. + + Mocks the supervisor with a known frame and verifies browser_cdp sends + the call via the supervisor's loop rather than opening a stateless + WebSocket. This is the path that makes cross-origin iframe eval work + on Browserbase. + """ + cdp_url, _port = chrome_cdp + sv = supervisor_registry.get_or_start(task_id="frame-id-test", cdp_url=cdp_url) + assert sv.snapshot().active + + # Inject a fake OOPIF frame pointing at the SUPERVISOR's own page session + # so we can verify routing. We fake is_oopif=True so the code path + # treats it as an OOPIF child. + import tools.browser_supervisor as _bs + with sv._state_lock: + fake_frame_id = "FAKE-FRAME-001" + sv._frames[fake_frame_id] = _bs.FrameInfo( + frame_id=fake_frame_id, + url="fake://", + origin="", + parent_frame_id=None, + is_oopif=True, + cdp_session_id=sv._page_session_id, # route at page scope + ) + + # Route the tool through the supervisor. Should succeed and return + # something that clearly came from CDP. + from tools.browser_cdp_tool import browser_cdp + result = browser_cdp( + method="Runtime.evaluate", + params={"expression": "1 + 1", "returnByValue": True}, + frame_id=fake_frame_id, + task_id="frame-id-test", + ) + r = json.loads(result) + assert r.get("success") is True, f"expected success, got: {r}" + assert r.get("frame_id") == fake_frame_id + assert r.get("session_id") == sv._page_session_id + value = r.get("result", {}).get("result", {}).get("value") + assert value == 2, f"expected 2, got {value!r}" + + +def test_browser_cdp_frame_id_real_oopif_smoke_documented(): + """Document that real-OOPIF E2E was manually verified — see PR #14540. + + A pytest version of this hits an asyncio version-quirk in the venv + (3.11) that doesn't show up in standalone scripts (3.13 + system + websockets). The mechanism IS verified end-to-end by two separate + smoke scripts in /tmp/dialog-iframe-test/: + + * smoke_local_oopif.py — local Chrome + 2 http servers on + different hostnames + --site-per-process. Outer page on + localhost:18905, iframe src=http://127.0.0.1:18906. Calls + browser_cdp(method='Runtime.evaluate', frame_id=) and + verifies inner page's title comes back from the OOPIF session. + PASSED on 2026-04-23: iframe document.title = 'INNER-FRAME-XYZ' + + * smoke_bb_iframe_agent_path.py — Browserbase + real cross-origin + iframe (src=https://example.com/). Same browser_cdp(frame_id=) + path. PASSED on 2026-04-23: iframe document.title = + 'Example Domain' + + The test_browser_cdp_frame_id_routes_via_supervisor pytest covers + the supervisor-routing plumbing with a fake injected OOPIF. + """ + pytest.skip( + "Real-OOPIF E2E verified manually with smoke_local_oopif.py and " + "smoke_bb_iframe_agent_path.py — pytest version hits an asyncio " + "version quirk between venv (3.11) and standalone (3.13). " + "Smoke logs preserved in /tmp/dialog-iframe-test/." + ) + + +def test_browser_cdp_frame_id_missing_supervisor(): + """browser_cdp(frame_id=...) errors cleanly when no supervisor is attached.""" + from tools.browser_cdp_tool import browser_cdp + result = browser_cdp( + method="Runtime.evaluate", + params={"expression": "1"}, + frame_id="any-frame-id", + task_id="no-such-task", + ) + r = json.loads(result) + assert r.get("success") is not True + assert "supervisor" in (r.get("error") or "").lower() + + +def test_browser_cdp_frame_id_not_in_frame_tree(chrome_cdp, supervisor_registry): + """browser_cdp(frame_id=...) errors when the frame_id isn't known.""" + cdp_url, _port = chrome_cdp + sv = supervisor_registry.get_or_start(task_id="bad-frame-test", cdp_url=cdp_url) + assert sv.snapshot().active + + from tools.browser_cdp_tool import browser_cdp + result = browser_cdp( + method="Runtime.evaluate", + params={"expression": "1"}, + frame_id="nonexistent-frame", + task_id="bad-frame-test", + ) + r = json.loads(result) + assert r.get("success") is not True + assert "not found" in (r.get("error") or "").lower() + + +def test_bridge_captures_prompt_and_returns_reply_text(chrome_cdp, supervisor_registry): + """End-to-end: agent's prompt_text round-trips INTO the page's JS. + + Proves the bridge isn't just catching dialogs — it's properly round- + tripping our reply back into the page via Fetch.fulfillRequest, so + ``prompt()`` actually returns the agent-supplied string to the page. + """ + import base64 as _b64 + + cdp_url, _port = chrome_cdp + sv = supervisor_registry.get_or_start(task_id="pytest-bridge-prompt", cdp_url=cdp_url) + + # Page fires prompt and stashes the return value on window. + html = """""" + url = "data:text/html;base64," + _b64.b64encode(html.encode()).decode() + + import asyncio as _asyncio + import websockets as _ws_mod + + async def nav_and_read(): + async with _ws_mod.connect(cdp_url, max_size=50 * 1024 * 1024) as ws: + nid = [1] + pending: dict = {} + + async def reader_fn(): + try: + async for raw in ws: + m = json.loads(raw) + if "id" in m: + fut = pending.pop(m["id"], None) + if fut and not fut.done(): + fut.set_result(m) + except Exception: + pass + + rd = _asyncio.create_task(reader_fn()) + + async def call(method, params=None, sid=None): + c = nid[0]; nid[0] += 1 + p = {"id": c, "method": method} + if params: p["params"] = params + if sid: p["sessionId"] = sid + fut = _asyncio.get_event_loop().create_future() + pending[c] = fut + await ws.send(json.dumps(p)) + return await _asyncio.wait_for(fut, timeout=20) + + try: + t = (await call("Target.getTargets"))["result"]["targetInfos"] + pg = next(x for x in t if x.get("type") == "page") + a = await call("Target.attachToTarget", {"targetId": pg["targetId"], "flatten": True}) + sid = a["result"]["sessionId"] + + # Fire navigate but don't await — prompt() blocks the page + nav_id = nid[0]; nid[0] += 1 + nav_fut = _asyncio.get_event_loop().create_future() + pending[nav_id] = nav_fut + await ws.send(json.dumps({"id": nav_id, "method": "Page.navigate", "params": {"url": url}, "sessionId": sid})) + + # Wait for supervisor to see the prompt + deadline = time.monotonic() + 10 + dialog = None + while time.monotonic() < deadline: + snap = sv.snapshot() + if snap.pending_dialogs: + dialog = snap.pending_dialogs[0] + break + await _asyncio.sleep(0.05) + assert dialog is not None, "no dialog captured" + assert dialog.bridge_request_id is not None, "expected bridge path" + assert dialog.type == "prompt" + + # Agent responds + resp = sv.respond_to_dialog("accept", prompt_text="AGENT-SUPPLIED-REPLY") + assert resp["ok"] is True + + # Wait for nav to complete + read back + try: + await _asyncio.wait_for(nav_fut, timeout=10) + except Exception: + pass + await _asyncio.sleep(0.5) + r = await call( + "Runtime.evaluate", + {"expression": "window.__ret", "returnByValue": True}, + sid=sid, + ) + return r.get("result", {}).get("result", {}).get("value") + finally: + rd.cancel() + try: await rd + except BaseException: pass + + value = asyncio.run(nav_and_read()) + assert value == "AGENT-SUPPLIED-REPLY", f"expected AGENT-SUPPLIED-REPLY, got {value!r}" diff --git a/tests/tools/test_checkpoint_manager.py b/tests/tools/test_checkpoint_manager.py index a464afc06..66fa10754 100644 --- a/tests/tools/test_checkpoint_manager.py +++ b/tests/tools/test_checkpoint_manager.py @@ -357,12 +357,33 @@ class TestWorkingDirResolution: result = mgr.get_working_dir_for_path(str(subdir / "file.py")) assert result == str(project) - def test_falls_back_to_parent(self, tmp_path): + def test_falls_back_to_parent(self, tmp_path, monkeypatch): mgr = CheckpointManager(enabled=True) filepath = tmp_path / "random" / "file.py" filepath.parent.mkdir(parents=True) filepath.write_text("x\\n") + # The walk-up scan for project markers (.git, pyproject.toml, etc.) + # stops at tmp_path — otherwise stray markers in ``/tmp`` (e.g. + # ``/tmp/pyproject.toml`` left by other tools on the host) get + # picked up as the project root and this test flakes on shared CI. + import pathlib as _pl + _real_exists = _pl.Path.exists + + def _guarded_exists(self): + s = str(self) + stop = str(tmp_path) + if not s.startswith(stop) and any( + s.endswith("/" + m) or s == "/" + m + for m in (".git", "pyproject.toml", "package.json", + "Cargo.toml", "go.mod", "Makefile", "pom.xml", + ".hg", "Gemfile") + ): + return False + return _real_exists(self) + + monkeypatch.setattr(_pl.Path, "exists", _guarded_exists) + result = mgr.get_working_dir_for_path(str(filepath)) assert result == str(filepath.parent) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 9c93f05c7..f3a1a2632 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -1319,6 +1319,112 @@ class TestDelegateHeartbeat(unittest.TestCase): any("API call #5 completed" in desc for desc in touch_calls), f"Heartbeat should include last_activity_desc: {touch_calls}") + def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self): + """A long-running tool (no iteration advance, but current_tool set) + must not be flagged stale at the idle threshold. + + Bug #13041: when a child is legitimately busy inside a slow tool + (terminal command, browser fetch), api_call_count does not advance. + The previous stale check treated this as idle and stopped the + heartbeat after 5 cycles (~150s), letting the gateway kill the + session. The fix uses a much higher in-tool threshold and only + applies the tight idle threshold when current_tool is None. + """ + from tools.delegate_tool import _run_single_child + + parent = _make_mock_parent() + touch_calls = [] + parent._touch_activity = lambda desc: touch_calls.append(desc) + + child = MagicMock() + # Child is stuck inside a single terminal call for the whole run. + # api_call_count never advances, current_tool is always set. + child.get_activity_summary.return_value = { + "current_tool": "terminal", + "api_call_count": 1, + "max_iterations": 50, + "last_activity_desc": "executing tool: terminal", + } + + def slow_run(**kwargs): + # Long enough to exceed the OLD idle threshold (5 cycles) at + # the patched interval, but shorter than the new in-tool + # threshold. + time.sleep(0.4) + return {"final_response": "done", "completed": True, "api_calls": 1} + + child.run_conversation.side_effect = slow_run + + # Patch both the interval AND the idle ceiling so the test proves + # the in-tool branch takes effect: with a 0.05s interval and the + # default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would + # trip after 0.25s and stop firing. We should see heartbeats + # continuing through the full 0.4s run. + with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): + _run_single_child( + task_index=0, + goal="Test long-running tool", + child=child, + parent_agent=parent, + ) + + # With the old idle threshold (5 cycles = 0.25s), touch_calls + # would cap at ~5. With the in-tool threshold (20 cycles = 1.0s), + # we should see substantially more heartbeats over 0.4s. + self.assertGreater( + len(touch_calls), 6, + f"Heartbeat stopped too early while child was inside a tool; " + f"got {len(touch_calls)} touches over 0.4s at 0.05s interval", + ) + + def test_heartbeat_still_trips_idle_stale_when_no_tool(self): + """A wedged child with no current_tool still trips the idle threshold. + + Regression guard: the fix for #13041 must not disable stale + detection entirely. A child that's hung between turns (no tool + running, no iteration progress) must still stop touching the + parent so the gateway timeout can fire. + """ + from tools.delegate_tool import _run_single_child + + parent = _make_mock_parent() + touch_calls = [] + parent._touch_activity = lambda desc: touch_calls.append(desc) + + child = MagicMock() + # Wedged child: no tool running, iteration frozen. + child.get_activity_summary.return_value = { + "current_tool": None, + "api_call_count": 3, + "max_iterations": 50, + "last_activity_desc": "waiting for API response", + } + + def slow_run(**kwargs): + time.sleep(0.6) + return {"final_response": "done", "completed": True, "api_calls": 3} + + child.run_conversation.side_effect = slow_run + + # At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s. + # We should see the heartbeat stop firing well before 0.6s. + with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): + _run_single_child( + task_index=0, + goal="Test wedged child", + child=child, + parent_agent=parent, + ) + + # With idle threshold=5 + interval=0.05s, touches should cap + # around 5. Bound loosely to avoid timing flakes. + self.assertLess( + len(touch_calls), 9, + f"Idle stale detection did not fire: got {len(touch_calls)} " + f"touches over 0.6s — expected heartbeat to stop after " + f"~5 stale cycles", + ) + class TestDelegationReasoningEffort(unittest.TestCase): """Tests for delegation.reasoning_effort config override.""" diff --git a/tests/tools/test_delegate_subagent_timeout_diagnostic.py b/tests/tools/test_delegate_subagent_timeout_diagnostic.py new file mode 100644 index 000000000..9bb49125a --- /dev/null +++ b/tests/tools/test_delegate_subagent_timeout_diagnostic.py @@ -0,0 +1,286 @@ +"""Regression tests for subagent timeout diagnostic dump (issue #14726). + +When delegate_task's child subagent times out without having made any API +call, a structured diagnostic file is written under +``~/.hermes/logs/subagent-timeout--.log``. This gives users a +concrete artifact to inspect (worker thread stack, system prompt size, +tool schema bytes, credential pool state, etc.) instead of the previous +opaque "subagent timed out" error. + +These tests pin: +- the diagnostic writer's output format and content +- the timeout branch in _run_single_child only dumps when api_calls == 0 +- the error message surfaces the diagnostic path +- api_calls > 0 timeouts do NOT write a dump (the old "stuck on slow API + call" explanation still applies) +""" +from __future__ import annotations + +import os +import threading +import time +from pathlib import Path +from typing import Optional +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture +def hermes_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + return home + + +class _StubChild: + """Minimal stand-in for an AIAgent subagent.""" + def __init__( + self, + *, + api_call_count: int = 0, + hang_seconds: float = 5.0, + subagent_id: str = "sa-0-stubabc", + tool_schema=None, + ): + self._subagent_id = subagent_id + self._delegate_depth = 1 + self._delegate_role = "leaf" + self.model = "test/model" + self.provider = "testprov" + self.api_mode = "chat_completions" + self.base_url = "https://example.test/v1" + self.max_iterations = 30 + self.quiet_mode = True + self.skip_memory = True + self.skip_context_files = True + self.platform = "cli" + self.ephemeral_system_prompt = "sys prompt" + self.enabled_toolsets = ["web", "terminal"] + self.valid_tool_names = {"web_search", "terminal"} + self.tools = tool_schema if tool_schema is not None else [ + {"name": "web_search", "description": "search"}, + {"name": "terminal", "description": "shell"}, + ] + self._api_call_count = api_call_count + self._hang = threading.Event() + self._hang_seconds = hang_seconds + + def get_activity_summary(self): + return { + "api_call_count": self._api_call_count, + "max_iterations": self.max_iterations, + "current_tool": None, + "seconds_since_activity": 60, + } + + def run_conversation(self, user_message, task_id=None): + self._hang.wait(self._hang_seconds) + return {"final_response": "", "completed": False, "api_calls": self._api_call_count} + + def interrupt(self): + self._hang.set() + + +# ── _dump_subagent_timeout_diagnostic ────────────────────────────────── + +class TestDumpSubagentTimeoutDiagnostic: + + def test_writes_log_with_expected_sections(self, hermes_home): + from tools.delegate_tool import _dump_subagent_timeout_diagnostic + child = _StubChild(subagent_id="sa-7-abc123") + + worker = threading.Thread( + target=lambda: child.run_conversation("test"), + daemon=True, + ) + worker.start() + time.sleep(0.1) + try: + path = _dump_subagent_timeout_diagnostic( + child=child, + task_index=7, + timeout_seconds=300.0, + duration_seconds=300.01, + worker_thread=worker, + goal="Research something long", + ) + finally: + child.interrupt() + worker.join(timeout=2.0) + + assert path is not None + p = Path(path) + assert p.is_file() + # File lives under HERMES_HOME/logs/ + assert p.parent == hermes_home / "logs" + assert p.name.startswith("subagent-timeout-sa-7-abc123-") + assert p.suffix == ".log" + + content = p.read_text() + # Header references the issue for future grep-ability + assert "issue #14726" in content + # Timeout facts + assert "task_index: 7" in content + assert "subagent_id: sa-7-abc123" in content + assert "configured_timeout: 300.0s" in content + assert "actual_duration: 300.01s" in content + # Goal + assert "Research something long" in content + # Child config + assert "model: 'test/model'" in content + assert "provider: 'testprov'" in content + assert "base_url: 'https://example.test/v1'" in content + assert "max_iterations: 30" in content + # Toolsets + assert "enabled_toolsets: ['web', 'terminal']" in content + assert "loaded tool count: 2" in content + # Prompt / schema sizes + assert "system_prompt_bytes:" in content + assert "tool_schema_count: 2" in content + assert "tool_schema_bytes:" in content + # Activity summary + assert "api_call_count: 0" in content + # Worker stack + assert "Worker thread stack at timeout" in content + # The thread is parked inside _hang.wait → cond.wait → waiter.acquire + assert "acquire" in content or "wait" in content + + def test_truncates_very_long_goal(self, hermes_home): + from tools.delegate_tool import _dump_subagent_timeout_diagnostic + child = _StubChild() + huge_goal = "x" * 5000 + + path = _dump_subagent_timeout_diagnostic( + child=child, + task_index=0, + timeout_seconds=300.0, + duration_seconds=300.0, + worker_thread=None, + goal=huge_goal, + ) + child.interrupt() + + content = Path(path).read_text() + assert "[truncated]" in content + # Goal section trimmed to 1000 chars + suffix + goal_block = content.split("## Goal", 1)[1].split("## Child config", 1)[0] + assert len(goal_block) < 1200 + + def test_missing_worker_thread_is_handled(self, hermes_home): + from tools.delegate_tool import _dump_subagent_timeout_diagnostic + child = _StubChild() + path = _dump_subagent_timeout_diagnostic( + child=child, + task_index=0, + timeout_seconds=300.0, + duration_seconds=300.0, + worker_thread=None, + goal="x", + ) + child.interrupt() + content = Path(path).read_text() + assert "" in content + + def test_exited_worker_thread_is_handled(self, hermes_home): + from tools.delegate_tool import _dump_subagent_timeout_diagnostic + child = _StubChild() + # A thread that has already finished + t = threading.Thread(target=lambda: None) + t.start() + t.join() + assert not t.is_alive() + path = _dump_subagent_timeout_diagnostic( + child=child, + task_index=0, + timeout_seconds=300.0, + duration_seconds=300.0, + worker_thread=t, + goal="x", + ) + child.interrupt() + content = Path(path).read_text() + assert "" in content + + def test_returns_none_on_unwritable_logs_dir(self, tmp_path, monkeypatch): + # Point HERMES_HOME at an unwritable path so logs/ can't be created + # (simulates permission-denied). Helper must not raise. + from tools.delegate_tool import _dump_subagent_timeout_diagnostic + bogus = tmp_path / "does-not-exist" / ".hermes" + monkeypatch.setenv("HERMES_HOME", str(bogus)) + child = _StubChild() + + # Make the logs dir itself unwritable by creating it as a FILE + # so mkdir(exist_ok=True) → NotADirectoryError and we fall through. + bogus.parent.mkdir(parents=True, exist_ok=True) + bogus.mkdir() + (bogus / "logs").write_text("not a dir") + result = _dump_subagent_timeout_diagnostic( + child=child, + task_index=0, + timeout_seconds=300.0, + duration_seconds=300.0, + worker_thread=None, + goal="x", + ) + child.interrupt() + # Either None (mkdir failed) or a real path; must never raise. + # We assert no exception propagates — the return value is advisory. + assert result is None or Path(result).exists() + + +# ── _run_single_child timeout branch wiring ─────────────────────────── + +class TestRunSingleChildTimeoutDump: + """The timeout branch in _run_single_child must emit the diagnostic + dump when api_calls == 0, and must NOT emit it when api_calls > 0.""" + + def _invoke_with_short_timeout(self, child, monkeypatch): + """Run _run_single_child with a tiny timeout to force the timeout branch.""" + from tools import delegate_tool + # Force a 0.3s timeout so the test is fast + monkeypatch.setattr(delegate_tool, "_get_child_timeout", lambda: 0.3) + + parent = MagicMock() + parent._touch_activity = MagicMock() + parent._current_task_id = None + return delegate_tool._run_single_child( + task_index=0, + goal="test goal", + child=child, + parent_agent=parent, + ) + + def test_zero_api_calls_writes_dump_and_surfaces_path(self, hermes_home, monkeypatch): + child = _StubChild(api_call_count=0, hang_seconds=10.0) + result = self._invoke_with_short_timeout(child, monkeypatch) + + assert result["status"] == "timeout" + assert result["api_calls"] == 0 + assert result["diagnostic_path"] is not None + dump_path = Path(result["diagnostic_path"]) + assert dump_path.is_file() + assert dump_path.parent == hermes_home / "logs" + + # Error message surfaces the path and the "no API call" phrasing + assert "without making any API call" in result["error"] + assert "Diagnostic:" in result["error"] + assert str(dump_path) in result["error"] + + def test_nonzero_api_calls_skips_dump_and_uses_old_message(self, hermes_home, monkeypatch): + child = _StubChild(api_call_count=5, hang_seconds=10.0) + result = self._invoke_with_short_timeout(child, monkeypatch) + + assert result["status"] == "timeout" + assert result["api_calls"] == 5 + # No diagnostic file should be written for timeouts that made + # actual API calls — the old generic "stuck on slow call" message + # still applies. + assert result.get("diagnostic_path") is None + assert "stuck on a slow API call" in result["error"] + # And no subagent-timeout-* file should exist under logs/ + logs_dir = hermes_home / "logs" + if logs_dir.is_dir(): + dumps = list(logs_dir.glob("subagent-timeout-*.log")) + assert dumps == [] diff --git a/tests/tools/test_dockerfile_pid1_reaping.py b/tests/tools/test_dockerfile_pid1_reaping.py new file mode 100644 index 000000000..55bd5e069 --- /dev/null +++ b/tests/tools/test_dockerfile_pid1_reaping.py @@ -0,0 +1,78 @@ +"""Contract tests for the container Dockerfile. + +These tests assert invariants about how the Dockerfile composes its runtime — +they deliberately avoid snapshotting specific package versions, line numbers, +or exact flag choices. What they DO assert is that the Dockerfile maintains +the properties required for correct production behaviour: + +- A PID-1 init (tini) is installed and wraps the entrypoint, so that orphaned + subprocesses (MCP stdio servers, git, bun, browser daemons) get reaped + instead of accumulating as zombies (#15012). +- Signal forwarding runs through the init so ``docker stop`` triggers + hermes's own graceful-shutdown path. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[2] +DOCKERFILE = REPO_ROOT / "Dockerfile" + + +@pytest.fixture(scope="module") +def dockerfile_text() -> str: + if not DOCKERFILE.exists(): + pytest.skip("Dockerfile not present in this checkout") + return DOCKERFILE.read_text() + + +def test_dockerfile_installs_an_init_for_zombie_reaping(dockerfile_text): + """Some init (tini, dumb-init, catatonit) must be installed. + + Without a PID-1 init that handles SIGCHLD, hermes accumulates zombie + processes from MCP stdio subprocesses, git operations, browser + daemons, etc. In long-running Docker deployments this eventually + exhausts the PID table. + """ + # Accept any of the common reapers. The contract is behavioural: + # something must be installed that reaps orphans. + known_inits = ("tini", "dumb-init", "catatonit") + installed = any(name in dockerfile_text for name in known_inits) + assert installed, ( + "No PID-1 init detected in Dockerfile (looked for: " + f"{', '.join(known_inits)}). Without an init process to reap " + "orphaned subprocesses, hermes accumulates zombies in Docker " + "deployments. See issue #15012." + ) + + +def test_dockerfile_entrypoint_routes_through_the_init(dockerfile_text): + """The ENTRYPOINT must invoke the init, not the entrypoint script directly. + + Installing tini is only half the fix — the container must actually run + with tini as PID 1. If the ENTRYPOINT executes the shell script + directly, the shell becomes PID 1 and will ``exec`` into hermes, + which then runs as PID 1 without any zombie reaping. + """ + # Find the last uncommented ENTRYPOINT line — Docker honours the final one. + entrypoint_line = None + for raw_line in dockerfile_text.splitlines(): + line = raw_line.strip() + if line.startswith("#"): + continue + if line.startswith("ENTRYPOINT"): + entrypoint_line = line + + assert entrypoint_line is not None, "Dockerfile is missing an ENTRYPOINT directive" + + known_inits = ("tini", "dumb-init", "catatonit") + routes_through_init = any(name in entrypoint_line for name in known_inits) + assert routes_through_init, ( + f"ENTRYPOINT does not route through an init: {entrypoint_line!r}. " + "If tini is only installed but not wired into ENTRYPOINT, hermes " + "still runs as PID 1 and zombies will accumulate (#15012)." + ) diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py index c2d75bf5d..5a215df14 100644 --- a/tests/tools/test_file_tools.py +++ b/tests/tools/test_file_tools.py @@ -247,7 +247,9 @@ class TestPatchHints: from tools.file_tools import patch_tool raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") - assert "[Hint:" in raw + # patch_tool surfaces the hint as a structured "_hint" field on the + # JSON error payload (not an inline "[Hint: ..." tail). + assert "_hint" in raw assert "read_file" in raw @patch("tools.file_tools._get_file_ops") @@ -260,7 +262,7 @@ class TestPatchHints: from tools.file_tools import patch_tool raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") - assert "[Hint:" not in raw + assert "_hint" not in raw class TestSearchHints: diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index da46348ea..1604d4adb 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -120,6 +120,177 @@ class TestSchemaConversion: assert schema["parameters"] == {"type": "object", "properties": {}} + def test_definitions_refs_are_rewritten_to_defs(self): + from tools.mcp_tool import _convert_mcp_schema + + mcp_tool = _make_mcp_tool( + name="submit", + description="Submit a payload", + input_schema={ + "type": "object", + "properties": { + "input": {"$ref": "#/definitions/Payload"}, + }, + "required": ["input"], + "definitions": { + "Payload": { + "type": "object", + "properties": { + "query": {"type": "string"}, + }, + "required": ["query"], + } + }, + }, + ) + + schema = _convert_mcp_schema("forms", mcp_tool) + + assert schema["parameters"]["properties"]["input"]["$ref"] == "#/$defs/Payload" + assert "$defs" in schema["parameters"] + assert "definitions" not in schema["parameters"] + + def test_nested_definition_refs_are_rewritten_recursively(self): + from tools.mcp_tool import _convert_mcp_schema + + mcp_tool = _make_mcp_tool( + name="nested", + description="Nested schema", + input_schema={ + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"$ref": "#/definitions/Entry"}, + }, + }, + "definitions": { + "Entry": { + "type": "object", + "properties": { + "child": {"$ref": "#/definitions/Child"}, + }, + }, + "Child": { + "type": "object", + "properties": { + "value": {"type": "string"}, + }, + }, + }, + }, + ) + + schema = _convert_mcp_schema("forms", mcp_tool) + + assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry" + assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child" + + def test_missing_type_on_object_is_coerced(self): + """Schemas that describe an object but omit ``type`` get type='object'.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "properties": {"q": {"type": "string"}}, + "required": ["q"], + }) + + assert schema["type"] == "object" + assert schema["properties"]["q"]["type"] == "string" + assert schema["required"] == ["q"] + + def test_null_type_on_object_is_coerced(self): + """type: None should be treated like missing type (common MCP server bug).""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": None, + "properties": {"x": {"type": "integer"}}, + }) + + assert schema["type"] == "object" + + def test_required_pruned_when_property_missing(self): + """Gemini 400s on required names that don't exist in properties.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": {"a": {"type": "string"}}, + "required": ["a", "ghost", "phantom"], + }) + + assert schema["required"] == ["a"] + + def test_required_removed_when_all_names_dangle(self): + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": {}, + "required": ["ghost"], + }) + + assert "required" not in schema + + def test_required_pruning_applies_recursively_inside_nested_objects(self): + """Nested object schemas also get required pruning.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": { + "filter": { + "type": "object", + "properties": {"field": {"type": "string"}}, + "required": ["field", "missing"], + }, + }, + }) + + assert schema["properties"]["filter"]["required"] == ["field"] + + def test_object_in_array_items_gets_properties_filled(self): + """Array-item object schemas without properties get an empty dict.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"type": "object"}, + }, + }, + }) + + assert schema["properties"]["items"]["items"]["properties"] == {} + + def test_convert_mcp_schema_survives_missing_inputschema_attribute(self): + """A Tool object without .inputSchema must not crash registration.""" + import types + + from tools.mcp_tool import _convert_mcp_schema + + bare_tool = types.SimpleNamespace(name="probe", description="Probe") + schema = _convert_mcp_schema("srv", bare_tool) + + assert schema["name"] == "mcp_srv_probe" + assert schema["parameters"] == {"type": "object", "properties": {}} + + def test_convert_mcp_schema_with_none_inputschema(self): + """Tool with inputSchema=None produces a valid empty object schema.""" + import types + + from tools.mcp_tool import _convert_mcp_schema + + # Note: _make_mcp_tool(input_schema=None) falls back to a default — + # build the namespace directly so .inputSchema really is None. + mcp_tool = types.SimpleNamespace(name="probe", description="Probe", inputSchema=None) + schema = _convert_mcp_schema("srv", mcp_tool) + + assert schema["parameters"] == {"type": "object", "properties": {}} + def test_tool_name_prefix_format(self): from tools.mcp_tool import _convert_mcp_schema @@ -1029,6 +1200,92 @@ class TestHTTPConfig: asyncio.run(_test()) + def test_http_seeds_initial_protocol_header(self): + from tools.mcp_tool import LATEST_PROTOCOL_VERSION, MCPServerTask + + server = MCPServerTask("remote") + captured = {} + + class DummyAsyncClient: + def __init__(self, **kwargs): + captured.update(kwargs) + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + class DummyTransportCtx: + async def __aenter__(self): + return MagicMock(), MagicMock(), (lambda: None) + + async def __aexit__(self, exc_type, exc, tb): + return False + + class DummySession: + def __init__(self, *args, **kwargs): + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def initialize(self): + return None + + class DummyLegacyTransportCtx: + def __init__(self, **kwargs): + captured["legacy_headers"] = kwargs.get("headers") + + async def __aenter__(self): + return MagicMock(), MagicMock(), (lambda: None) + + async def __aexit__(self, exc_type, exc, tb): + return False + + async def _discover_tools(self): + self._shutdown_event.set() + + async def _run(config, *, new_http): + captured.clear() + with patch("tools.mcp_tool._MCP_HTTP_AVAILABLE", True), \ + patch("tools.mcp_tool._MCP_NEW_HTTP", new_http), \ + patch("httpx.AsyncClient", DummyAsyncClient), \ + patch("tools.mcp_tool.streamable_http_client", return_value=DummyTransportCtx()), \ + patch("tools.mcp_tool.streamablehttp_client", side_effect=lambda url, **kwargs: DummyLegacyTransportCtx(**kwargs)), \ + patch("tools.mcp_tool.ClientSession", DummySession), \ + patch.object(MCPServerTask, "_discover_tools", _discover_tools): + await server._run_http(config) + + asyncio.run(_run({"url": "https://example.com/mcp"}, new_http=True)) + assert captured["headers"]["mcp-protocol-version"] == LATEST_PROTOCOL_VERSION + + asyncio.run(_run({ + "url": "https://example.com/mcp", + "headers": {"mcp-protocol-version": "custom-version"}, + }, new_http=True)) + assert captured["headers"]["mcp-protocol-version"] == "custom-version" + + asyncio.run(_run({ + "url": "https://example.com/mcp", + "headers": {"MCP-Protocol-Version": "custom-version"}, + }, new_http=True)) + assert captured["headers"]["MCP-Protocol-Version"] == "custom-version" + assert "mcp-protocol-version" not in captured["headers"] + + asyncio.run(_run({"url": "https://example.com/mcp"}, new_http=False)) + assert captured["legacy_headers"]["mcp-protocol-version"] == LATEST_PROTOCOL_VERSION + + asyncio.run(_run({ + "url": "https://example.com/mcp", + "headers": {"MCP-Protocol-Version": "custom-version"}, + }, new_http=False)) + assert captured["legacy_headers"]["MCP-Protocol-Version"] == "custom-version" + assert "mcp-protocol-version" not in captured["legacy_headers"] + # --------------------------------------------------------------------------- # Reconnection logic diff --git a/tests/tools/test_mcp_tool_session_expired.py b/tests/tools/test_mcp_tool_session_expired.py new file mode 100644 index 000000000..67e6e5874 --- /dev/null +++ b/tests/tools/test_mcp_tool_session_expired.py @@ -0,0 +1,359 @@ +"""Tests for MCP tool-handler transport-session auto-reconnect. + +When a Streamable HTTP MCP server garbage-collects its server-side +session (idle TTL, server restart, pod rotation, …) it rejects +subsequent requests with a JSON-RPC error containing phrases like +``"Invalid or expired session"``. The OAuth token remains valid — +only the transport session state needs rebuilding. + +Before the #13383 fix, this class of failure fell through as a plain +tool error with no recovery path, so every subsequent call on the +affected MCP server failed until the gateway was manually restarted. +""" +import json +import threading +import time +from unittest.mock import AsyncMock, MagicMock + +import pytest + + +# --------------------------------------------------------------------------- +# _is_session_expired_error — unit coverage +# --------------------------------------------------------------------------- + + +def test_is_session_expired_detects_invalid_or_expired_session(): + """Reporter's exact wpcom-mcp error message (#13383).""" + from tools.mcp_tool import _is_session_expired_error + exc = RuntimeError("Invalid params: Invalid or expired session") + assert _is_session_expired_error(exc) is True + + +def test_is_session_expired_detects_expired_session_variant(): + """Generic ``session expired`` / ``expired session`` phrasings used + by other SDK servers.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(RuntimeError("Session expired")) is True + assert _is_session_expired_error(RuntimeError("expired session: abc")) is True + + +def test_is_session_expired_detects_session_not_found(): + """Server-side GC produces ``session not found`` / ``unknown session`` + on some implementations.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(RuntimeError("session not found")) is True + assert _is_session_expired_error(RuntimeError("Unknown session: abc123")) is True + + +def test_is_session_expired_is_case_insensitive(): + """Match uses lower-cased comparison so servers that emit the + message in different cases (SDK formatter quirks) still trigger.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(RuntimeError("INVALID OR EXPIRED SESSION")) is True + assert _is_session_expired_error(RuntimeError("Session Expired")) is True + + +def test_is_session_expired_rejects_unrelated_errors(): + """Narrow scope: only the specific session-expired markers trigger. + A regular RuntimeError / ValueError does not.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(RuntimeError("Tool failed to execute")) is False + assert _is_session_expired_error(ValueError("Missing parameter")) is False + assert _is_session_expired_error(Exception("Connection refused")) is False + # 401 is handled by the sibling _is_auth_error path, not here. + assert _is_session_expired_error(RuntimeError("401 Unauthorized")) is False + + +def test_is_session_expired_rejects_interrupted_error(): + """InterruptedError is the user-cancel signal — must never route + through the session-reconnect path.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(InterruptedError()) is False + assert _is_session_expired_error(InterruptedError("Invalid or expired session")) is False + + +def test_is_session_expired_rejects_empty_message(): + """Bare exceptions with no message shouldn't match.""" + from tools.mcp_tool import _is_session_expired_error + assert _is_session_expired_error(RuntimeError("")) is False + assert _is_session_expired_error(Exception()) is False + + +# --------------------------------------------------------------------------- +# Handler integration — verify the recovery plumbing wires end-to-end +# --------------------------------------------------------------------------- + + +def _install_stub_server(name: str = "wpcom"): + """Register a minimal server stub that _handle_session_expired_and_retry + can signal via _reconnect_event, and that reports ready+session after + the event fires.""" + from tools import mcp_tool + + mcp_tool._ensure_mcp_loop() + + server = MagicMock() + server.name = name + # _reconnect_event is called via loop.call_soon_threadsafe(…set); use + # a threading-safe substitute. + reconnect_flag = threading.Event() + + class _EventAdapter: + def set(self): + reconnect_flag.set() + + server._reconnect_event = _EventAdapter() + + # Immediately "ready" — simulates a fast reconnect (_ready.is_set() + # is polled by _handle_session_expired_and_retry until the timeout). + ready_flag = threading.Event() + ready_flag.set() + server._ready = MagicMock() + server._ready.is_set = ready_flag.is_set + + # session attr must be truthy for the handler's initial check + # (``if not server or not server.session``) and for the post- + # reconnect readiness probe (``srv.session is not None``). + server.session = MagicMock() + return server, reconnect_flag + + +def test_call_tool_handler_reconnects_on_session_expired(monkeypatch, tmp_path): + """Reporter's exact repro: call_tool raises "Invalid or expired + session", handler triggers reconnect, retries once, and returns + the retry's successful JSON (not the generic error).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + server, reconnect_flag = _install_stub_server("wpcom") + mcp_tool._servers["wpcom"] = server + mcp_tool._server_error_counts.pop("wpcom", None) + + # First call raises session-expired; second call (post-reconnect) + # returns a proper MCP tool result. + call_count = {"n": 0} + + async def _call_sequence(*a, **kw): + call_count["n"] += 1 + if call_count["n"] == 1: + raise RuntimeError("Invalid params: Invalid or expired session") + # Second call: mimic the MCP SDK's structured success response. + result = MagicMock() + result.isError = False + result.content = [MagicMock(type="text", text="tool completed")] + result.structuredContent = None + return result + + server.session.call_tool = _call_sequence + + try: + handler = _make_tool_handler("wpcom", "wpcom-mcp-content-authoring", 10.0) + out = handler({"slug": "hello"}) + parsed = json.loads(out) + # Retry succeeded — no error surfaced to caller. + assert "error" not in parsed, ( + f"Expected retry to succeed after reconnect; got: {parsed}" + ) + # _reconnect_event was signalled exactly once. + assert reconnect_flag.is_set(), ( + "Handler did not trigger transport reconnect on session-expired " + "error — the reconnect flow is the whole point of this fix." + ) + # Exactly 2 call attempts (original + one retry). + assert call_count["n"] == 2, ( + f"Expected 1 original + 1 retry = 2 calls; got {call_count['n']}" + ) + finally: + mcp_tool._servers.pop("wpcom", None) + mcp_tool._server_error_counts.pop("wpcom", None) + + +def test_call_tool_handler_non_session_expired_error_falls_through( + monkeypatch, tmp_path +): + """Preserved-behaviour canary: a non-session-expired exception must + NOT trigger reconnect — it must fall through to the generic error + path so the caller sees the real failure.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + server, reconnect_flag = _install_stub_server("srv") + mcp_tool._servers["srv"] = server + mcp_tool._server_error_counts.pop("srv", None) + + async def _raises(*a, **kw): + raise RuntimeError("Tool execution failed — unrelated error") + + server.session.call_tool = _raises + + try: + handler = _make_tool_handler("srv", "mytool", 10.0) + out = handler({"arg": "v"}) + parsed = json.loads(out) + # Generic error path surfaced the failure. + assert "MCP call failed" in parsed.get("error", "") + # Reconnect was NOT triggered for this unrelated failure. + assert not reconnect_flag.is_set(), ( + "Reconnect must not fire for non-session-expired errors — " + "this would cause spurious transport churn on every tool " + "failure." + ) + finally: + mcp_tool._servers.pop("srv", None) + mcp_tool._server_error_counts.pop("srv", None) + + +def test_session_expired_handler_returns_none_without_loop(monkeypatch): + """Defensive: if the MCP loop isn't running (cold start / shutdown + race), the handler must fall through cleanly instead of hanging + or raising.""" + from tools import mcp_tool + from tools.mcp_tool import _handle_session_expired_and_retry + + # Install a server stub but make the event loop unavailable. + server = MagicMock() + server._reconnect_event = MagicMock() + server._ready = MagicMock() + server._ready.is_set = MagicMock(return_value=True) + server.session = MagicMock() + mcp_tool._servers["srv-noloop"] = server + + monkeypatch.setattr(mcp_tool, "_mcp_loop", None) + + try: + out = _handle_session_expired_and_retry( + "srv-noloop", + RuntimeError("Invalid or expired session"), + lambda: '{"ok": true}', + "tools/call", + ) + assert out is None, ( + "Without an event loop, session-expired handler must fall " + "through to caller's generic error path — not hang or raise." + ) + finally: + mcp_tool._servers.pop("srv-noloop", None) + + +def test_session_expired_handler_returns_none_without_server_record(): + """If the server has been torn down / isn't in _servers, fall + through cleanly — nothing to reconnect to.""" + from tools.mcp_tool import _handle_session_expired_and_retry + out = _handle_session_expired_and_retry( + "does-not-exist", + RuntimeError("Invalid or expired session"), + lambda: '{"ok": true}', + "tools/call", + ) + assert out is None + + +def test_session_expired_handler_returns_none_when_retry_also_fails( + monkeypatch, tmp_path +): + """If the retry after reconnect also raises, fall through to the + generic error path (don't loop forever, don't mask the second + failure).""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _handle_session_expired_and_retry + + server, _ = _install_stub_server("srv-retry-fail") + mcp_tool._servers["srv-retry-fail"] = server + + def _retry_raises(): + raise RuntimeError("retry blew up too") + + try: + out = _handle_session_expired_and_retry( + "srv-retry-fail", + RuntimeError("Invalid or expired session"), + _retry_raises, + "tools/call", + ) + assert out is None, ( + "When the retry itself fails, the handler must return None " + "so the caller's generic error path runs — no retry loop." + ) + finally: + mcp_tool._servers.pop("srv-retry-fail", None) + + +# --------------------------------------------------------------------------- +# Parallel coverage for resources/list, resources/read, prompts/list, +# prompts/get — all four handlers share the same exception path. +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "handler_factory, handler_kwargs, session_method, op_label", + [ + ("_make_list_resources_handler", {"tool_timeout": 10.0}, "list_resources", "list_resources"), + ("_make_read_resource_handler", {"tool_timeout": 10.0}, "read_resource", "read_resource"), + ("_make_list_prompts_handler", {"tool_timeout": 10.0}, "list_prompts", "list_prompts"), + ("_make_get_prompt_handler", {"tool_timeout": 10.0}, "get_prompt", "get_prompt"), + ], +) +def test_non_tool_handlers_also_reconnect_on_session_expired( + monkeypatch, tmp_path, handler_factory, handler_kwargs, session_method, op_label +): + """All four non-``tools/call`` MCP handlers share the recovery + pattern and must reconnect the same way on session-expired.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + + server, reconnect_flag = _install_stub_server(f"srv-{op_label}") + mcp_tool._servers[f"srv-{op_label}"] = server + mcp_tool._server_error_counts.pop(f"srv-{op_label}", None) + + call_count = {"n": 0} + + async def _sequence(*a, **kw): + call_count["n"] += 1 + if call_count["n"] == 1: + raise RuntimeError("Invalid or expired session") + # Return something with the shapes each handler expects. + # Explicitly set primitive attrs — MagicMock's default auto-attr + # behaviour surfaces ``MagicMock`` values for optional fields + # like ``description``, which break ``json.dumps`` downstream. + result = MagicMock() + result.resources = [] + result.prompts = [] + result.contents = [] + result.messages = [] # get_prompt + result.description = None # get_prompt optional field + return result + + setattr(server.session, session_method, _sequence) + + factory = getattr(mcp_tool, handler_factory) + # list_resources / list_prompts take (server_name, timeout). + # read_resource / get_prompt take the same signature. + try: + handler = factory(f"srv-{op_label}", **handler_kwargs) + if op_label == "read_resource": + out = handler({"uri": "file://foo"}) + elif op_label == "get_prompt": + out = handler({"name": "p1"}) + else: + out = handler({}) + parsed = json.loads(out) + assert "error" not in parsed, ( + f"{op_label}: expected retry success, got {parsed}" + ) + assert reconnect_flag.is_set(), ( + f"{op_label}: reconnect should fire for session-expired" + ) + assert call_count["n"] == 2, ( + f"{op_label}: expected 1 original + 1 retry" + ) + finally: + mcp_tool._servers.pop(f"srv-{op_label}", None) + mcp_tool._server_error_counts.pop(f"srv-{op_label}", None) diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py index d015b4838..f5e65582a 100644 --- a/tests/tools/test_registry.py +++ b/tests/tools/test_registry.py @@ -292,6 +292,7 @@ class TestBuiltinDiscovery: def test_matches_previous_manual_builtin_tool_set(self): expected = { "tools.browser_cdp_tool", + "tools.browser_dialog_tool", "tools.browser_tool", "tools.clarify_tool", "tools.code_execution_tool", diff --git a/tests/tools/test_schema_sanitizer.py b/tests/tools/test_schema_sanitizer.py new file mode 100644 index 000000000..171651ca7 --- /dev/null +++ b/tests/tools/test_schema_sanitizer.py @@ -0,0 +1,205 @@ +"""Tests for tools/schema_sanitizer.py. + +Targets the known llama.cpp ``json-schema-to-grammar`` failure modes that +cause ``HTTP 400: Unable to generate parser for this template. ... +Unrecognized schema: "object"`` errors on local inference backends. +""" + +from __future__ import annotations + +import copy + +from tools.schema_sanitizer import sanitize_tool_schemas + + +def _tool(name: str, parameters: dict) -> dict: + return {"type": "function", "function": {"name": name, "parameters": parameters}} + + +def test_object_without_properties_gets_empty_properties(): + tools = [_tool("t", {"type": "object"})] + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} + + +def test_nested_object_without_properties_gets_empty_properties(): + tools = [_tool("t", { + "type": "object", + "properties": { + "name": {"type": "string"}, + "arguments": {"type": "object", "description": "free-form"}, + }, + "required": ["name"], + })] + out = sanitize_tool_schemas(tools) + args = out[0]["function"]["parameters"]["properties"]["arguments"] + assert args["type"] == "object" + assert args["properties"] == {} + assert args["description"] == "free-form" + + +def test_bare_string_object_value_replaced_with_schema_dict(): + # Malformed: a property's schema value is the bare string "object". + # This is the exact shape llama.cpp reports as `Unrecognized schema: "object"`. + tools = [_tool("t", { + "type": "object", + "properties": { + "payload": "object", # <-- invalid, should be {"type": "object"} + }, + })] + out = sanitize_tool_schemas(tools) + payload = out[0]["function"]["parameters"]["properties"]["payload"] + assert isinstance(payload, dict) + assert payload["type"] == "object" + assert payload["properties"] == {} + + +def test_bare_string_primitive_value_replaced_with_schema_dict(): + tools = [_tool("t", { + "type": "object", + "properties": {"name": "string"}, + })] + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"]["properties"]["name"] == {"type": "string"} + + +def test_nullable_type_array_collapsed_to_single_string(): + tools = [_tool("t", { + "type": "object", + "properties": { + "maybe_name": {"type": ["string", "null"]}, + }, + })] + out = sanitize_tool_schemas(tools) + prop = out[0]["function"]["parameters"]["properties"]["maybe_name"] + assert prop["type"] == "string" + assert prop.get("nullable") is True + + +def test_anyof_nested_objects_sanitized(): + tools = [_tool("t", { + "type": "object", + "properties": { + "opt": { + "anyOf": [ + {"type": "object"}, # bare object + {"type": "string"}, + ], + }, + }, + })] + out = sanitize_tool_schemas(tools) + variants = out[0]["function"]["parameters"]["properties"]["opt"]["anyOf"] + assert variants[0] == {"type": "object", "properties": {}} + assert variants[1] == {"type": "string"} + + +def test_missing_parameters_gets_default_object_schema(): + tools = [{"type": "function", "function": {"name": "t"}}] + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} + + +def test_non_dict_parameters_gets_default_object_schema(): + tools = [_tool("t", "object")] # pathological + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"] == {"type": "object", "properties": {}} + + +def test_required_pruned_to_existing_properties(): + tools = [_tool("t", { + "type": "object", + "properties": {"name": {"type": "string"}}, + "required": ["name", "missing_field"], + })] + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"]["required"] == ["name"] + + +def test_required_all_missing_is_dropped(): + tools = [_tool("t", { + "type": "object", + "properties": {}, + "required": ["x", "y"], + })] + out = sanitize_tool_schemas(tools) + assert "required" not in out[0]["function"]["parameters"] + + +def test_well_formed_schema_unchanged(): + schema = { + "type": "object", + "properties": { + "path": {"type": "string", "description": "File path"}, + "offset": {"type": "integer", "minimum": 1}, + }, + "required": ["path"], + } + tools = [_tool("read_file", copy.deepcopy(schema))] + out = sanitize_tool_schemas(tools) + assert out[0]["function"]["parameters"] == schema + + +def test_additional_properties_bool_preserved(): + tools = [_tool("t", { + "type": "object", + "properties": { + "payload": { + "type": "object", + "properties": {}, + "additionalProperties": True, + }, + }, + })] + out = sanitize_tool_schemas(tools) + payload = out[0]["function"]["parameters"]["properties"]["payload"] + assert payload["additionalProperties"] is True + + +def test_additional_properties_schema_sanitized(): + tools = [_tool("t", { + "type": "object", + "properties": { + "dict_field": { + "type": "object", + "additionalProperties": {"type": "object"}, # bare object schema + }, + }, + })] + out = sanitize_tool_schemas(tools) + field = out[0]["function"]["parameters"]["properties"]["dict_field"] + assert field["additionalProperties"] == {"type": "object", "properties": {}} + + +def test_deepcopy_does_not_mutate_input(): + original = { + "type": "object", + "properties": {"x": {"type": "object"}}, + } + tools = [_tool("t", original)] + _ = sanitize_tool_schemas(tools) + # Original should still lack properties on the nested object + assert "properties" not in original["properties"]["x"] + + +def test_items_sanitized_in_array_schema(): + tools = [_tool("t", { + "type": "object", + "properties": { + "bag": { + "type": "array", + "items": {"type": "object"}, # bare object items + }, + }, + })] + out = sanitize_tool_schemas(tools) + items = out[0]["function"]["parameters"]["properties"]["bag"]["items"] + assert items == {"type": "object", "properties": {}} + + +def test_empty_tools_list_returns_empty(): + assert sanitize_tool_schemas([]) == [] + + +def test_none_tools_returns_none(): + assert sanitize_tool_schemas(None) is None diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py index 3cdfa98a9..79470710b 100644 --- a/tests/tools/test_skills_tool.py +++ b/tests/tools/test_skills_tool.py @@ -347,6 +347,70 @@ class TestSkillView: assert result["name"] == "my-skill" assert "Step 1" in result["content"] + def test_skill_view_applies_template_vars(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_preprocessing.load_skills_config", + return_value={"template_vars": True, "inline_shell": False}, + ), + ): + skill_dir = _make_skill( + tmp_path, + "templated", + body="Run ${HERMES_SKILL_DIR}/scripts/do.sh in ${HERMES_SESSION_ID}", + ) + raw = skill_view("templated", task_id="session-123") + + result = json.loads(raw) + assert result["success"] is True + assert f"Run {skill_dir}/scripts/do.sh in session-123" in result["content"] + assert "${HERMES_SKILL_DIR}" not in result["content"] + + def test_skill_view_applies_inline_shell_when_enabled(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_preprocessing.load_skills_config", + return_value={ + "template_vars": True, + "inline_shell": True, + "inline_shell_timeout": 5, + }, + ), + ): + _make_skill( + tmp_path, + "dynamic", + body="Current date: !`printf 2026-04-24`", + ) + raw = skill_view("dynamic") + + result = json.loads(raw) + assert result["success"] is True + assert "Current date: 2026-04-24" in result["content"] + assert "!`printf 2026-04-24`" not in result["content"] + + def test_skill_view_leaves_inline_shell_literal_when_disabled(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_preprocessing.load_skills_config", + return_value={"template_vars": True, "inline_shell": False}, + ), + ): + _make_skill( + tmp_path, + "static", + body="Current date: !`printf SHOULD_NOT_RUN`", + ) + raw = skill_view("static") + + result = json.loads(raw) + assert result["success"] is True + assert "Current date: !`printf SHOULD_NOT_RUN`" in result["content"] + assert "Current date: SHOULD_NOT_RUN" not in result["content"] + def test_view_nonexistent_skill(self, tmp_path): with patch("tools.skills_tool.SKILLS_DIR", tmp_path): _make_skill(tmp_path, "other-skill") diff --git a/tests/tools/test_spotify_client.py b/tests/tools/test_spotify_client.py new file mode 100644 index 000000000..d22bc4480 --- /dev/null +++ b/tests/tools/test_spotify_client.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import json + +import pytest + +from plugins.spotify import client as spotify_mod +from plugins.spotify import tools as spotify_tool + + +class _FakeResponse: + def __init__(self, status_code: int, payload: dict | None = None, *, text: str = "", headers: dict | None = None): + self.status_code = status_code + self._payload = payload + self.text = text or (json.dumps(payload) if payload is not None else "") + self.headers = headers or {"content-type": "application/json"} + self.content = self.text.encode("utf-8") if self.text else b"" + + def json(self): + if self._payload is None: + raise ValueError("no json") + return self._payload + + +class _StubSpotifyClient: + def __init__(self, payload): + self.payload = payload + + def get_currently_playing(self, *, market=None): + return self.payload + + +def test_spotify_client_retries_once_after_401(monkeypatch: pytest.MonkeyPatch) -> None: + calls: list[str] = [] + tokens = iter([ + { + "access_token": "token-1", + "base_url": "https://api.spotify.com/v1", + }, + { + "access_token": "token-2", + "base_url": "https://api.spotify.com/v1", + }, + ]) + + monkeypatch.setattr( + spotify_mod, + "resolve_spotify_runtime_credentials", + lambda **kwargs: next(tokens), + ) + + def fake_request(method, url, headers=None, params=None, json=None, timeout=None): + calls.append(headers["Authorization"]) + if len(calls) == 1: + return _FakeResponse(401, {"error": {"message": "expired token"}}) + return _FakeResponse(200, {"devices": [{"id": "dev-1"}]}) + + monkeypatch.setattr(spotify_mod.httpx, "request", fake_request) + + client = spotify_mod.SpotifyClient() + payload = client.get_devices() + + assert payload["devices"][0]["id"] == "dev-1" + assert calls == ["Bearer token-1", "Bearer token-2"] + + +def test_normalize_spotify_uri_accepts_urls() -> None: + uri = spotify_mod.normalize_spotify_uri( + "https://open.spotify.com/track/7ouMYWpwJ422jRcDASZB7P", + "track", + ) + assert uri == "spotify:track:7ouMYWpwJ422jRcDASZB7P" + + +@pytest.mark.parametrize( + ("status_code", "path", "payload", "expected"), + [ + ( + 403, + "/me/player/play", + {"error": {"message": "Premium required"}}, + "Spotify rejected this playback request. Playback control usually requires a Spotify Premium account and an active Spotify Connect device.", + ), + ( + 404, + "/me/player", + {"error": {"message": "Device not found"}}, + "Spotify could not find an active playback device or player session for this request.", + ), + ( + 429, + "/search", + {"error": {"message": "rate limit"}}, + "Spotify rate limit exceeded. Retry after 7 seconds.", + ), + ], +) +def test_spotify_client_formats_friendly_api_errors( + monkeypatch: pytest.MonkeyPatch, + status_code: int, + path: str, + payload: dict, + expected: str, +) -> None: + monkeypatch.setattr( + spotify_mod, + "resolve_spotify_runtime_credentials", + lambda **kwargs: { + "access_token": "token-1", + "base_url": "https://api.spotify.com/v1", + }, + ) + + def fake_request(method, url, headers=None, params=None, json=None, timeout=None): + return _FakeResponse(status_code, payload, headers={"content-type": "application/json", "Retry-After": "7"}) + + monkeypatch.setattr(spotify_mod.httpx, "request", fake_request) + + client = spotify_mod.SpotifyClient() + with pytest.raises(spotify_mod.SpotifyAPIError) as exc: + client.request("GET", path) + + assert str(exc.value) == expected + + +def test_get_currently_playing_returns_explanatory_empty_payload(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + spotify_mod, + "resolve_spotify_runtime_credentials", + lambda **kwargs: { + "access_token": "token-1", + "base_url": "https://api.spotify.com/v1", + }, + ) + + def fake_request(method, url, headers=None, params=None, json=None, timeout=None): + return _FakeResponse(204, None, text="", headers={"content-type": "application/json"}) + + monkeypatch.setattr(spotify_mod.httpx, "request", fake_request) + + client = spotify_mod.SpotifyClient() + payload = client.get_currently_playing() + + assert payload == { + "status_code": 204, + "empty": True, + "message": "Spotify is not currently playing anything. Start playback in Spotify and try again.", + } + + +def test_spotify_playback_get_currently_playing_returns_explanatory_empty_result(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + spotify_tool, + "_spotify_client", + lambda: _StubSpotifyClient({ + "status_code": 204, + "empty": True, + "message": "Spotify is not currently playing anything. Start playback in Spotify and try again.", + }), + ) + + payload = json.loads(spotify_tool._handle_spotify_playback({"action": "get_currently_playing"})) + + assert payload == { + "success": True, + "action": "get_currently_playing", + "is_playing": False, + "status_code": 204, + "message": "Spotify is not currently playing anything. Start playback in Spotify and try again.", + } + + +def test_library_contains_uses_generic_library_endpoint(monkeypatch: pytest.MonkeyPatch) -> None: + seen: list[tuple[str, str, dict | None]] = [] + + monkeypatch.setattr( + spotify_mod, + "resolve_spotify_runtime_credentials", + lambda **kwargs: { + "access_token": "token-1", + "base_url": "https://api.spotify.com/v1", + }, + ) + + def fake_request(method, url, headers=None, params=None, json=None, timeout=None): + seen.append((method, url, params)) + return _FakeResponse(200, [True]) + + monkeypatch.setattr(spotify_mod.httpx, "request", fake_request) + + client = spotify_mod.SpotifyClient() + payload = client.library_contains(uris=["spotify:album:abc", "spotify:track:def"]) + + assert payload == [True] + assert seen == [ + ( + "GET", + "https://api.spotify.com/v1/me/library/contains", + {"uris": "spotify:album:abc,spotify:track:def"}, + ) + ] + + +@pytest.mark.parametrize( + ("method_name", "item_key", "item_value", "expected_uris"), + [ + ("remove_saved_tracks", "track_ids", ["track-a", "track-b"], ["spotify:track:track-a", "spotify:track:track-b"]), + ("remove_saved_albums", "album_ids", ["album-a"], ["spotify:album:album-a"]), + ], +) +def test_library_remove_uses_generic_library_endpoint( + monkeypatch: pytest.MonkeyPatch, + method_name: str, + item_key: str, + item_value: list[str], + expected_uris: list[str], +) -> None: + seen: list[tuple[str, str, dict | None]] = [] + + monkeypatch.setattr( + spotify_mod, + "resolve_spotify_runtime_credentials", + lambda **kwargs: { + "access_token": "token-1", + "base_url": "https://api.spotify.com/v1", + }, + ) + + def fake_request(method, url, headers=None, params=None, json=None, timeout=None): + seen.append((method, url, params)) + return _FakeResponse(200, {}) + + monkeypatch.setattr(spotify_mod.httpx, "request", fake_request) + + client = spotify_mod.SpotifyClient() + getattr(client, method_name)(**{item_key: item_value}) + + assert seen == [ + ( + "DELETE", + "https://api.spotify.com/v1/me/library", + {"uris": ",".join(expected_uris)}, + ) + ] + + + +def test_spotify_library_tracks_list_routes_to_saved_tracks(monkeypatch: pytest.MonkeyPatch) -> None: + seen: list[str] = [] + + class _LibStub: + def get_saved_tracks(self, **kw): + seen.append("tracks") + return {"items": [], "total": 0} + + def get_saved_albums(self, **kw): + seen.append("albums") + return {"items": [], "total": 0} + + monkeypatch.setattr(spotify_tool, "_spotify_client", lambda: _LibStub()) + json.loads(spotify_tool._handle_spotify_library({"kind": "tracks", "action": "list"})) + assert seen == ["tracks"] + + +def test_spotify_library_albums_list_routes_to_saved_albums(monkeypatch: pytest.MonkeyPatch) -> None: + seen: list[str] = [] + + class _LibStub: + def get_saved_tracks(self, **kw): + seen.append("tracks") + return {"items": [], "total": 0} + + def get_saved_albums(self, **kw): + seen.append("albums") + return {"items": [], "total": 0} + + monkeypatch.setattr(spotify_tool, "_spotify_client", lambda: _LibStub()) + json.loads(spotify_tool._handle_spotify_library({"kind": "albums", "action": "list"})) + assert seen == ["albums"] + + +def test_spotify_library_rejects_missing_kind() -> None: + payload = json.loads(spotify_tool._handle_spotify_library({"action": "list"})) + assert "kind" in (payload.get("error") or "").lower() + + +def test_spotify_playback_recently_played_action(monkeypatch: pytest.MonkeyPatch) -> None: + """recently_played is now an action on spotify_playback (folded from spotify_activity).""" + seen: list[dict] = [] + + class _RecentStub: + def get_recently_played(self, **kw): + seen.append(kw) + return {"items": [{"track": {"name": "x"}}]} + + monkeypatch.setattr(spotify_tool, "_spotify_client", lambda: _RecentStub()) + payload = json.loads(spotify_tool._handle_spotify_playback({"action": "recently_played", "limit": 5})) + assert seen and seen[0]["limit"] == 5 + assert isinstance(payload, dict) diff --git a/tests/tools/test_tool_output_limits.py b/tests/tools/test_tool_output_limits.py new file mode 100644 index 000000000..19fa3fc05 --- /dev/null +++ b/tests/tools/test_tool_output_limits.py @@ -0,0 +1,152 @@ +"""Tests for tools.tool_output_limits. + +Covers: +1. Default values when no config is provided. +2. Config override picks up user-supplied max_bytes / max_lines / + max_line_length. +3. Malformed values (None, negative, wrong type) fall back to defaults + rather than raising. +4. Integration: the helpers return what the terminal_tool and + file_operations call paths will actually consume. + +Port-tracking: anomalyco/opencode PR #23770 +(feat(truncate): allow configuring tool output truncation limits). +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from tools import tool_output_limits as tol + + +class TestDefaults: + def test_defaults_match_previous_hardcoded_values(self): + assert tol.DEFAULT_MAX_BYTES == 50_000 + assert tol.DEFAULT_MAX_LINES == 2000 + assert tol.DEFAULT_MAX_LINE_LENGTH == 2000 + + def test_get_limits_returns_defaults_when_config_missing(self): + with patch("hermes_cli.config.load_config", return_value={}): + limits = tol.get_tool_output_limits() + assert limits == { + "max_bytes": tol.DEFAULT_MAX_BYTES, + "max_lines": tol.DEFAULT_MAX_LINES, + "max_line_length": tol.DEFAULT_MAX_LINE_LENGTH, + } + + def test_get_limits_returns_defaults_when_config_not_a_dict(self): + # load_config should always return a dict but be defensive anyway. + with patch("hermes_cli.config.load_config", return_value="not a dict"): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + + def test_get_limits_returns_defaults_when_load_config_raises(self): + def _boom(): + raise RuntimeError("boom") + + with patch("hermes_cli.config.load_config", side_effect=_boom): + limits = tol.get_tool_output_limits() + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + + +class TestOverrides: + def test_user_config_overrides_all_three(self): + cfg = { + "tool_output": { + "max_bytes": 100_000, + "max_lines": 5000, + "max_line_length": 4096, + } + } + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits == { + "max_bytes": 100_000, + "max_lines": 5000, + "max_line_length": 4096, + } + + def test_partial_override_preserves_other_defaults(self): + cfg = {"tool_output": {"max_bytes": 200_000}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == 200_000 + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + def test_section_not_a_dict_falls_back(self): + cfg = {"tool_output": "nonsense"} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + + +class TestCoercion: + @pytest.mark.parametrize("bad", [None, "not a number", -1, 0, [], {}]) + def test_invalid_values_fall_back_to_defaults(self, bad): + cfg = {"tool_output": {"max_bytes": bad, "max_lines": bad, "max_line_length": bad}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + def test_string_integer_is_coerced(self): + cfg = {"tool_output": {"max_bytes": "75000"}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == 75_000 + + +class TestShortcuts: + def test_individual_accessors_delegate_to_get_tool_output_limits(self): + cfg = { + "tool_output": { + "max_bytes": 111, + "max_lines": 222, + "max_line_length": 333, + } + } + with patch("hermes_cli.config.load_config", return_value=cfg): + assert tol.get_max_bytes() == 111 + assert tol.get_max_lines() == 222 + assert tol.get_max_line_length() == 333 + + +class TestDefaultConfigHasSection: + """The DEFAULT_CONFIG in hermes_cli.config must expose tool_output so + that ``hermes setup`` and default installs stay in sync with the + helpers here.""" + + def test_default_config_contains_tool_output_section(self): + from hermes_cli.config import DEFAULT_CONFIG + assert "tool_output" in DEFAULT_CONFIG + section = DEFAULT_CONFIG["tool_output"] + assert isinstance(section, dict) + assert section["max_bytes"] == tol.DEFAULT_MAX_BYTES + assert section["max_lines"] == tol.DEFAULT_MAX_LINES + assert section["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + +class TestIntegrationReadPagination: + """normalize_read_pagination uses get_max_lines() — verify the plumbing.""" + + def test_pagination_limit_clamped_by_config_value(self): + from tools.file_operations import normalize_read_pagination + cfg = {"tool_output": {"max_lines": 50}} + with patch("hermes_cli.config.load_config", return_value=cfg): + offset, limit = normalize_read_pagination(offset=1, limit=1000) + # limit should have been clamped to 50 (the configured max_lines) + assert limit == 50 + assert offset == 1 + + def test_pagination_default_when_config_missing(self): + from tools.file_operations import normalize_read_pagination + with patch("hermes_cli.config.load_config", return_value={}): + offset, limit = normalize_read_pagination(offset=10, limit=100000) + # Clamped to default MAX_LINES (2000). + assert limit == tol.DEFAULT_MAX_LINES + assert offset == 10 diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index 9e753af53..50cbe22a6 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -505,6 +505,101 @@ class TestTranscribeLocalExtended: assert result["success"] is True assert result["transcript"] == "Hello world" + def test_load_time_cuda_lib_failure_falls_back_to_cpu(self, tmp_path): + """Missing libcublas at load time → reload on CPU, succeed.""" + audio = tmp_path / "test.ogg" + audio.write_bytes(b"fake") + + seg = MagicMock() + seg.text = "hi" + info = MagicMock() + info.language = "en" + info.duration = 1.0 + + cpu_model = MagicMock() + cpu_model.transcribe.return_value = ([seg], info) + + call_args = [] + + def fake_whisper(model_name, device, compute_type): + call_args.append((device, compute_type)) + if device == "auto": + raise RuntimeError("Library libcublas.so.12 is not found or cannot be loaded") + return cpu_model + + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ + patch("faster_whisper.WhisperModel", side_effect=fake_whisper), \ + patch("tools.transcription_tools._local_model", None), \ + patch("tools.transcription_tools._local_model_name", None): + from tools.transcription_tools import _transcribe_local + result = _transcribe_local(str(audio), "base") + + assert result["success"] is True + assert result["transcript"] == "hi" + assert call_args == [("auto", "auto"), ("cpu", "int8")] + + def test_runtime_cuda_lib_failure_evicts_cache_and_retries_on_cpu(self, tmp_path): + """libcublas dlopen fails at transcribe() → evict cache, reload CPU, retry.""" + audio = tmp_path / "test.ogg" + audio.write_bytes(b"fake") + + seg = MagicMock() + seg.text = "recovered" + info = MagicMock() + info.language = "en" + info.duration = 1.0 + + # First model loads fine (auto), but transcribe() blows up on dlopen + gpu_model = MagicMock() + gpu_model.transcribe.side_effect = RuntimeError( + "Library libcublas.so.12 is not found or cannot be loaded" + ) + # Second model (forced CPU) works + cpu_model = MagicMock() + cpu_model.transcribe.return_value = ([seg], info) + + models = [gpu_model, cpu_model] + call_args = [] + + def fake_whisper(model_name, device, compute_type): + call_args.append((device, compute_type)) + return models.pop(0) + + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ + patch("faster_whisper.WhisperModel", side_effect=fake_whisper), \ + patch("tools.transcription_tools._local_model", None), \ + patch("tools.transcription_tools._local_model_name", None): + from tools.transcription_tools import _transcribe_local + result = _transcribe_local(str(audio), "base") + + assert result["success"] is True + assert result["transcript"] == "recovered" + # First load is auto, retry forces CPU. + assert call_args == [("auto", "auto"), ("cpu", "int8")] + # Cached-bad-model eviction: the broken GPU model was called once, + # then discarded; the CPU model served the retry. + assert gpu_model.transcribe.call_count == 1 + assert cpu_model.transcribe.call_count == 1 + + def test_cuda_out_of_memory_does_not_trigger_cpu_fallback(self, tmp_path): + """'CUDA out of memory' is a real error, not a missing lib — surface it.""" + audio = tmp_path / "test.ogg" + audio.write_bytes(b"fake") + + mock_whisper_cls = MagicMock(side_effect=RuntimeError("CUDA out of memory")) + + with patch("tools.transcription_tools._HAS_FASTER_WHISPER", True), \ + patch("faster_whisper.WhisperModel", mock_whisper_cls), \ + patch("tools.transcription_tools._local_model", None), \ + patch("tools.transcription_tools._local_model_name", None): + from tools.transcription_tools import _transcribe_local + result = _transcribe_local(str(audio), "base") + + # Single call — no CPU retry, because OOM isn't a missing-lib symptom. + assert mock_whisper_cls.call_count == 1 + assert result["success"] is False + assert "CUDA out of memory" in result["error"] + # ============================================================================ # Model auto-correction diff --git a/tests/tools/test_write_deny.py b/tests/tools/test_write_deny.py index a525c3527..7d2645253 100644 --- a/tests/tools/test_write_deny.py +++ b/tests/tools/test_write_deny.py @@ -33,7 +33,12 @@ class TestWriteDenyExactPaths: assert _is_write_denied(path) is True def test_hermes_env(self): - path = os.path.join(str(Path.home()), ".hermes", ".env") + # ``.env`` under the active HERMES_HOME (profile-aware, not just + # ``~/.hermes``) must be write-denied. The hermetic test conftest + # points HERMES_HOME at a tempdir — resolve via get_hermes_home() + # to match the denylist. + from hermes_constants import get_hermes_home + path = str(get_hermes_home() / ".env") assert _is_write_denied(path) is True def test_shell_profiles(self): diff --git a/tests/tools/test_zombie_process_cleanup.py b/tests/tools/test_zombie_process_cleanup.py index 999bc3fe7..646b186fe 100644 --- a/tests/tools/test_zombie_process_cleanup.py +++ b/tests/tools/test_zombie_process_cleanup.py @@ -110,8 +110,8 @@ class TestAgentCloseMethod: agent.client = None with patch("tools.process_registry.process_registry") as mock_registry, \ - patch("tools.terminal_tool.cleanup_vm") as mock_cleanup_vm, \ - patch("tools.browser_tool.cleanup_browser") as mock_cleanup_browser: + patch("run_agent.cleanup_vm") as mock_cleanup_vm, \ + patch("run_agent.cleanup_browser") as mock_cleanup_browser: agent.close() mock_registry.kill_all.assert_called_once_with( @@ -172,9 +172,9 @@ class TestAgentCloseMethod: with patch( "tools.process_registry.process_registry" ) as mock_reg, patch( - "tools.terminal_tool.cleanup_vm" + "run_agent.cleanup_vm" ) as mock_vm, patch( - "tools.browser_tool.cleanup_browser" + "run_agent.cleanup_browser" ) as mock_browser: mock_reg.kill_all.side_effect = RuntimeError("boom") diff --git a/tests/tui_gateway/test_make_agent_provider.py b/tests/tui_gateway/test_make_agent_provider.py index bdc7fecf4..483b533df 100644 --- a/tests/tui_gateway/test_make_agent_provider.py +++ b/tests/tui_gateway/test_make_agent_provider.py @@ -27,16 +27,22 @@ def test_make_agent_passes_resolved_provider(): "agent": {"system_prompt": "test"}, } - with patch("tui_gateway.server._load_cfg", return_value=fake_cfg), \ - patch("tui_gateway.server._get_db", return_value=MagicMock()), \ - patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"), \ - patch("tui_gateway.server._load_reasoning_config", return_value=None), \ - patch("tui_gateway.server._load_service_tier", return_value=None), \ - patch("tui_gateway.server._load_enabled_toolsets", return_value=None), \ - patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as mock_resolve, \ - patch("run_agent.AIAgent") as mock_agent: + with ( + patch("tui_gateway.server._load_cfg", return_value=fake_cfg), + patch("tui_gateway.server._get_db", return_value=MagicMock()), + patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"), + patch("tui_gateway.server._load_reasoning_config", return_value=None), + patch("tui_gateway.server._load_service_tier", return_value=None), + patch("tui_gateway.server._load_enabled_toolsets", return_value=None), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value=fake_runtime, + ) as mock_resolve, + patch("run_agent.AIAgent") as mock_agent, + ): from tui_gateway.server import _make_agent + _make_agent("sid-1", "key-1") mock_resolve.assert_called_once_with(requested=None) @@ -46,3 +52,136 @@ def test_make_agent_passes_resolved_provider(): assert call_kwargs.kwargs["base_url"] == "https://api.anthropic.com" assert call_kwargs.kwargs["api_key"] == "sk-test-key" assert call_kwargs.kwargs["api_mode"] == "anthropic_messages" + + +def test_make_agent_ignores_display_personality_without_system_prompt(): + """The TUI matches the classic CLI: personality only becomes active once + it has been saved to agent.system_prompt.""" + + fake_runtime = { + "provider": "openrouter", + "base_url": "https://api.synthetic.new/v1", + "api_key": "sk-test", + "api_mode": "chat_completions", + "command": None, + "args": None, + "credential_pool": None, + } + fake_cfg = { + "agent": { + "system_prompt": "", + "personalities": {"kawaii": "sparkle system prompt"}, + }, + "display": {"personality": "kawaii"}, + "model": {"default": "glm-5"}, + } + + with ( + patch("tui_gateway.server._load_cfg", return_value=fake_cfg), + patch("tui_gateway.server._get_db", return_value=MagicMock()), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value=fake_runtime, + ), + patch("run_agent.AIAgent") as mock_agent, + ): + from tui_gateway.server import _make_agent + + _make_agent("sid-default-personality", "key-default-personality") + + assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None + + +def test_probe_config_health_flags_null_sections(): + """Bare YAML keys (`agent:` with no value) parse as None and silently + drop nested settings; probe must surface them so users can fix.""" + from tui_gateway.server import _probe_config_health + + assert _probe_config_health({"agent": {"x": 1}}) == "" + assert _probe_config_health({}) == "" + + msg = _probe_config_health({"agent": None, "display": None, "model": {}}) + assert "agent" in msg and "display" in msg + assert "model" not in msg + + +def test_probe_config_health_flags_null_personalities_with_active_personality(): + from tui_gateway.server import _probe_config_health + + msg = _probe_config_health( + { + "agent": {"personalities": None}, + "display": {"personality": "kawaii"}, + "model": {}, + } + ) + assert "display.personality" in msg + assert "agent.personalities" in msg + + +def test_make_agent_tolerates_null_config_sections(): + """Bare `agent:` / `display:` keys in ~/.hermes/config.yaml parse as + None. cfg.get("agent", {}) returns None (default only fires on missing + key), so downstream .get() chains must be guarded. Reported via Twitter + against the new TUI.""" + + fake_runtime = { + "provider": "openrouter", + "base_url": "https://api.synthetic.new/v1", + "api_key": "sk-test", + "api_mode": "chat_completions", + "command": None, + "args": None, + "credential_pool": None, + } + null_cfg = {"agent": None, "display": None, "model": {"default": "glm-5"}} + + with ( + patch("tui_gateway.server._load_cfg", return_value=null_cfg), + patch("tui_gateway.server._get_db", return_value=MagicMock()), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value=fake_runtime, + ), + patch("run_agent.AIAgent") as mock_agent, + ): + + from tui_gateway.server import _make_agent + + _make_agent("sid-null", "key-null") + + assert mock_agent.called + + +def test_make_agent_tolerates_null_personalities_with_active_personality(): + fake_runtime = { + "provider": "openrouter", + "base_url": "https://api.synthetic.new/v1", + "api_key": "sk-test", + "api_mode": "chat_completions", + "command": None, + "args": None, + "credential_pool": None, + } + cfg = { + "agent": {"personalities": None}, + "display": {"personality": "kawaii"}, + "model": {"default": "glm-5"}, + } + + with ( + patch("tui_gateway.server._load_cfg", return_value=cfg), + patch("tui_gateway.server._get_db", return_value=MagicMock()), + patch("cli.load_cli_config", return_value={"agent": {"personalities": None}}), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value=fake_runtime, + ), + patch("run_agent.AIAgent") as mock_agent, + ): + from tui_gateway.server import _make_agent + + _make_agent("sid-null-personality", "key-null-personality") + + assert mock_agent.called + assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py index 9b13b2bb6..f9099cbc8 100644 --- a/tools/browser_cdp_tool.py +++ b/tools/browser_cdp_tool.py @@ -188,10 +188,116 @@ async def _cdp_call( # --------------------------------------------------------------------------- +def _browser_cdp_via_supervisor( + task_id: str, + frame_id: str, + method: str, + params: Optional[Dict[str, Any]], + timeout: float, +) -> str: + """Route a CDP call through the live supervisor session for an OOPIF frame. + + Looks up the frame in the supervisor's snapshot, extracts its child + ``cdp_session_id``, and dispatches ``method`` with that sessionId via + the supervisor's already-connected WebSocket (using + ``asyncio.run_coroutine_threadsafe`` onto the supervisor loop). + """ + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + except Exception as exc: # pragma: no cover — defensive + return tool_error( + f"CDP supervisor is not available: {exc}. frame_id routing requires " + f"a running supervisor attached via /browser connect or an active " + f"Browserbase session." + ) + + supervisor = SUPERVISOR_REGISTRY.get(task_id) + if supervisor is None: + return tool_error( + f"No CDP supervisor is attached for task={task_id!r}. Call " + f"browser_navigate or /browser connect first so the supervisor " + f"can attach. Once attached, browser_snapshot will populate " + f"frame_tree with frame_ids you can pass here." + ) + + snap = supervisor.snapshot() + # Search both the top frame and the children for the requested id. + top = snap.frame_tree.get("top") + frame_info: Optional[Dict[str, Any]] = None + if top and top.get("frame_id") == frame_id: + frame_info = top + else: + for child in snap.frame_tree.get("children", []) or []: + if child.get("frame_id") == frame_id: + frame_info = child + break + if frame_info is None: + # Check the raw frames dict too (frame_tree is capped at 30 entries) + with supervisor._state_lock: # type: ignore[attr-defined] + raw = supervisor._frames.get(frame_id) # type: ignore[attr-defined] + if raw is not None: + frame_info = raw.to_dict() + + if frame_info is None: + return tool_error( + f"frame_id {frame_id!r} not found in supervisor state. " + f"Call browser_snapshot to see current frame_tree." + ) + + child_sid = frame_info.get("session_id") + if not child_sid: + # Not an OOPIF — fall back to top-level session (evaluating at page + # scope). Same-origin iframes don't get their own sessionId; the + # agent can still use contentWindow/contentDocument from the parent. + return tool_error( + f"frame_id {frame_id!r} is not an out-of-process iframe (no " + f"dedicated CDP session). For same-origin iframes, use " + f"`browser_cdp(method='Runtime.evaluate', params={{'expression': " + f"\"document.querySelector('iframe').contentDocument.title\"}})` " + f"at the top-level page instead." + ) + + # Dispatch onto the supervisor's loop. + import asyncio as _asyncio + loop = supervisor._loop # type: ignore[attr-defined] + if loop is None or not loop.is_running(): + return tool_error( + "CDP supervisor loop is not running. Try reconnecting with " + "/browser connect." + ) + + async def _do_cdp(): + return await supervisor._cdp( # type: ignore[attr-defined] + method, + params or {}, + session_id=child_sid, + timeout=timeout, + ) + + try: + fut = _asyncio.run_coroutine_threadsafe(_do_cdp(), loop) + result_msg = fut.result(timeout=timeout + 2) + except Exception as exc: + return tool_error( + f"CDP call via supervisor failed: {type(exc).__name__}: {exc}", + cdp_docs=CDP_DOCS_URL, + ) + + payload: Dict[str, Any] = { + "success": True, + "method": method, + "frame_id": frame_id, + "session_id": child_sid, + "result": result_msg.get("result", {}), + } + return json.dumps(payload, ensure_ascii=False) + + def browser_cdp( method: str, params: Optional[Dict[str, Any]] = None, target_id: Optional[str] = None, + frame_id: Optional[str] = None, timeout: float = 30.0, task_id: Optional[str] = None, ) -> str: @@ -202,16 +308,34 @@ def browser_cdp( params: Method-specific parameters; defaults to ``{}``. target_id: Optional target/tab ID for page-level methods. When set, we first attach to the target (``flatten=True``) and send - ``method`` with the resulting ``sessionId``. + ``method`` with the resulting ``sessionId``. Uses a fresh + stateless CDP connection. + frame_id: Optional cross-origin (OOPIF) iframe ``frame_id`` from + ``browser_snapshot.frame_tree.children[]``. When set (and the + frame is an OOPIF with a live session tracked by the CDP + supervisor), routes the call through the supervisor's existing + WebSocket — which is how you Runtime.evaluate *inside* an + iframe on backends where per-call fresh CDP connections would + hit signed-URL expiry (Browserbase) or expensive reattach. timeout: Seconds to wait for the call to complete. - task_id: Unused (tool is stateless) — accepted for uniformity with - other browser tools. + task_id: Task identifier for supervisor lookup. When ``frame_id`` + is set, this identifies which task's supervisor to use; the + handler will default to ``"default"`` otherwise. Returns: JSON string ``{"success": True, "method": ..., "result": {...}}`` on success, or ``{"error": "..."}`` on failure. """ - del task_id # unused — stateless + # --- Route iframe-scoped calls through the supervisor --------------- + if frame_id: + return _browser_cdp_via_supervisor( + task_id=task_id or "default", + frame_id=frame_id, + method=method, + params=params, + timeout=timeout, + ) + del task_id # stateless path below if not method or not isinstance(method, str): return tool_error( @@ -324,12 +448,18 @@ BROWSER_CDP_SCHEMA: Dict[str, Any] = { "'mobile': false}, target_id=\n\n" "**Usage rules:**\n" "- Browser-level methods (Target.*, Browser.*, Storage.*): omit " - "target_id.\n" + "target_id and frame_id.\n" "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, " "Network.* scoped to a tab): pass target_id from Target.getTargets.\n" - "- Each call is independent — sessions and event subscriptions do " - "not persist between calls. For stateful workflows, prefer the " - "dedicated browser tools." + "- **Cross-origin iframe scope** (Runtime.evaluate inside an OOPIF, " + "Page.* targeting a frame target, etc.): pass frame_id from the " + "browser_snapshot frame_tree output. This routes through the CDP " + "supervisor's live connection — the only reliable way on " + "Browserbase where stateless CDP calls hit signed-URL expiry.\n" + "- Each stateless call (without frame_id) is independent — sessions " + "and event subscriptions do not persist between calls. For stateful " + "workflows, prefer the dedicated browser tools or use frame_id " + "routing." ), "parameters": { "type": "object", @@ -347,14 +477,31 @@ BROWSER_CDP_SCHEMA: Dict[str, Any] = { "Method-specific parameters as a JSON object. Omit or " "pass {} for methods that take no parameters." ), + "properties": {}, "additionalProperties": True, }, "target_id": { "type": "string", "description": ( "Optional. Target/tab ID from Target.getTargets result " - "(each entry's 'targetId'). Required for page-level " - "methods; must be omitted for browser-level methods." + "(each entry's 'targetId'). Use for page-level methods " + "at the top-level tab scope. Mutually exclusive with " + "frame_id." + ), + }, + "frame_id": { + "type": "string", + "description": ( + "Optional. Out-of-process iframe (OOPIF) frame_id from " + "browser_snapshot.frame_tree.children[] where " + "is_oopif=true. When set, routes the call through the " + "CDP supervisor's live session for that iframe. " + "Essential for Runtime.evaluate inside cross-origin " + "iframes, especially on Browserbase where fresh " + "per-call CDP connections can't keep up with signed " + "URL rotation. For same-origin iframes, use parent " + "contentWindow/contentDocument from Runtime.evaluate " + "at the top-level page instead." ), }, "timeout": { @@ -408,6 +555,7 @@ registry.register( method=args.get("method", ""), params=args.get("params"), target_id=args.get("target_id"), + frame_id=args.get("frame_id"), timeout=args.get("timeout", 30.0), task_id=kw.get("task_id"), ), diff --git a/tools/browser_dialog_tool.py b/tools/browser_dialog_tool.py new file mode 100644 index 000000000..51ab0c424 --- /dev/null +++ b/tools/browser_dialog_tool.py @@ -0,0 +1,148 @@ +"""Agent-facing tool: respond to a native JS dialog captured by the CDP supervisor. + +This tool is response-only — the agent first reads ``pending_dialogs`` from +``browser_snapshot`` output, then calls ``browser_dialog(action=...)`` to +accept or dismiss. + +Gated on the same ``_browser_cdp_check`` as ``browser_cdp`` so it only +appears when a CDP endpoint is reachable (Browserbase with a +``connectUrl``, local Chrome via ``/browser connect``, or +``browser.cdp_url`` set in config). + +See ``website/docs/developer-guide/browser-supervisor.md`` for the full +design. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, Dict, Optional + +from tools.browser_supervisor import SUPERVISOR_REGISTRY +from tools.registry import registry + +logger = logging.getLogger(__name__) + + +BROWSER_DIALOG_SCHEMA: Dict[str, Any] = { + "name": "browser_dialog", + "description": ( + "Respond to a native JavaScript dialog (alert / confirm / prompt / " + "beforeunload) that is currently blocking the page.\n\n" + "**Workflow:** call ``browser_snapshot`` first — if a dialog is open, " + "it appears in the ``pending_dialogs`` field with ``id``, ``type``, " + "and ``message``. Then call this tool with ``action='accept'`` or " + "``action='dismiss'``.\n\n" + "**Prompt dialogs:** pass ``prompt_text`` to supply the response " + "string. Ignored for alert/confirm/beforeunload.\n\n" + "**Multiple dialogs:** if more than one dialog is queued (rare — " + "happens when a second dialog fires while the first is still open), " + "pass ``dialog_id`` from the snapshot to disambiguate.\n\n" + "**Availability:** only present when a CDP-capable backend is " + "attached — Browserbase sessions, local Chrome via " + "``/browser connect``, or ``browser.cdp_url`` in config.yaml. " + "Not available on Camofox (REST-only) or the default Playwright " + "local browser (CDP port is hidden)." + ), + "parameters": { + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": ["accept", "dismiss"], + "description": ( + "'accept' clicks OK / returns the prompt text. " + "'dismiss' clicks Cancel / returns null from prompt(). " + "For ``beforeunload`` dialogs: 'accept' allows the " + "navigation, 'dismiss' keeps the page." + ), + }, + "prompt_text": { + "type": "string", + "description": ( + "Response string for a ``prompt()`` dialog. Ignored for " + "other dialog types. Defaults to empty string." + ), + }, + "dialog_id": { + "type": "string", + "description": ( + "Specific dialog to respond to, from " + "``browser_snapshot.pending_dialogs[].id``. Required " + "only when multiple dialogs are queued." + ), + }, + }, + "required": ["action"], + }, +} + + +def browser_dialog( + action: str, + prompt_text: Optional[str] = None, + dialog_id: Optional[str] = None, + task_id: Optional[str] = None, +) -> str: + """Respond to a pending dialog on the active task's CDP supervisor.""" + effective_task_id = task_id or "default" + supervisor = SUPERVISOR_REGISTRY.get(effective_task_id) + if supervisor is None: + return json.dumps( + { + "success": False, + "error": ( + "No CDP supervisor is attached to this task. Either the " + "browser backend doesn't expose CDP (Camofox, default " + "Playwright) or no browser session has been started yet. " + "Call browser_navigate or /browser connect first." + ), + } + ) + + result = supervisor.respond_to_dialog( + action=action, + prompt_text=prompt_text, + dialog_id=dialog_id, + ) + if result.get("ok"): + return json.dumps( + { + "success": True, + "action": action, + "dialog": result.get("dialog", {}), + } + ) + return json.dumps({"success": False, "error": result.get("error", "unknown error")}) + + +def _browser_dialog_check() -> bool: + """Gate: same as ``browser_cdp`` — only offered when CDP is reachable. + + Kept identical so the two tools appear and disappear together. The + supervisor itself is started lazily by ``browser_navigate`` / + ``/browser connect`` / Browserbase session creation, so a reachable + CDP URL is enough to commit to showing the tool. + """ + try: + from tools.browser_cdp_tool import _browser_cdp_check # type: ignore[import-not-found] + except Exception as exc: # pragma: no cover — defensive + logger.debug("browser_dialog check: browser_cdp_tool import failed: %s", exc) + return False + return _browser_cdp_check() + + +registry.register( + name="browser_dialog", + toolset="browser-cdp", + schema=BROWSER_DIALOG_SCHEMA, + handler=lambda args, **kw: browser_dialog( + action=args.get("action", ""), + prompt_text=args.get("prompt_text"), + dialog_id=args.get("dialog_id"), + task_id=kw.get("task_id"), + ), + check_fn=_browser_dialog_check, + emoji="💬", +) diff --git a/tools/browser_supervisor.py b/tools/browser_supervisor.py new file mode 100644 index 000000000..e230d92ed --- /dev/null +++ b/tools/browser_supervisor.py @@ -0,0 +1,1362 @@ +"""Persistent CDP supervisor for browser dialog + frame detection. + +One ``CDPSupervisor`` runs per Hermes ``task_id`` that has a reachable CDP +endpoint. It holds a single persistent WebSocket to the backend, subscribes +to ``Page`` / ``Runtime`` / ``Target`` events on every attached session +(top-level page and every OOPIF / worker target that auto-attaches), and +surfaces observable state — pending dialogs and frame tree — through a +thread-safe snapshot object that tool handlers consume synchronously. + +The supervisor is NOT in the agent's tool schema. Its output reaches the +agent via two channels: + +1. ``browser_snapshot`` merges supervisor state into its return payload + (see ``tools/browser_tool.py``). +2. ``browser_dialog`` tool responds to a pending dialog by calling + ``respond_to_dialog()`` on the active supervisor. + +Design spec: ``website/docs/developer-guide/browser-supervisor.md``. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import threading +import time +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple + +import websockets +from websockets.asyncio.client import ClientConnection + +logger = logging.getLogger(__name__) + + +# ── Config defaults ─────────────────────────────────────────────────────────── + +DIALOG_POLICY_MUST_RESPOND = "must_respond" +DIALOG_POLICY_AUTO_DISMISS = "auto_dismiss" +DIALOG_POLICY_AUTO_ACCEPT = "auto_accept" + +_VALID_POLICIES = frozenset( + {DIALOG_POLICY_MUST_RESPOND, DIALOG_POLICY_AUTO_DISMISS, DIALOG_POLICY_AUTO_ACCEPT} +) + +DEFAULT_DIALOG_POLICY = DIALOG_POLICY_MUST_RESPOND +DEFAULT_DIALOG_TIMEOUT_S = 300.0 + +# Snapshot caps for frame_tree — keep payloads bounded on ad-heavy pages. +FRAME_TREE_MAX_ENTRIES = 30 +FRAME_TREE_MAX_OOPIF_DEPTH = 2 + +# Ring buffer of recent console-level events (used later by PR 2 diagnostics). +CONSOLE_HISTORY_MAX = 50 + +# Keep the last N closed dialogs in ``recent_dialogs`` so agents on backends +# that auto-dismiss server-side (e.g. Browserbase) can still observe that a +# dialog fired, even if they couldn't respond to it in time. +RECENT_DIALOGS_MAX = 20 + +# Magic host the injected dialog bridge XHRs to. Intercepted via the CDP +# Fetch domain before any network resolution happens, so the hostname never +# has to exist. Keep this ASCII + URL-safe; we also gate Fetch patterns on it. +DIALOG_BRIDGE_HOST = "hermes-dialog-bridge.invalid" +DIALOG_BRIDGE_URL_PATTERN = f"http://{DIALOG_BRIDGE_HOST}/*" + +# Script injected into every frame via Page.addScriptToEvaluateOnNewDocument. +# Overrides alert/confirm/prompt to round-trip through a sync XHR that we +# intercept via Fetch.requestPaused. Works on Browserbase (whose CDP proxy +# auto-dismisses REAL native dialogs) because the native dialogs never fire +# in the first place — the overrides take precedence. +_DIALOG_BRIDGE_SCRIPT = r""" +(() => { + if (window.__hermesDialogBridgeInstalled) return; + window.__hermesDialogBridgeInstalled = true; + const ENDPOINT = "http://hermes-dialog-bridge.invalid/"; + function ask(kind, message, defaultPrompt) { + try { + const xhr = new XMLHttpRequest(); + // Use GET with query params so we don't need to worry about request + // body encoding in the Fetch interceptor. + const params = new URLSearchParams({ + kind: String(kind || ""), + message: String(message == null ? "" : message), + default_prompt: String(defaultPrompt == null ? "" : defaultPrompt), + }); + xhr.open("GET", ENDPOINT + "?" + params.toString(), false); // sync + xhr.send(null); + if (xhr.status !== 200) return null; + const body = xhr.responseText || ""; + let parsed; + try { parsed = JSON.parse(body); } catch (e) { return null; } + if (kind === "alert") return undefined; + if (kind === "confirm") return Boolean(parsed && parsed.accept); + if (kind === "prompt") { + if (!parsed || !parsed.accept) return null; + return parsed.prompt_text == null ? "" : String(parsed.prompt_text); + } + return null; + } catch (e) { + // If the bridge is unreachable, fall back to the native call so the + // page still sees *some* behavior (the backend will auto-dismiss). + return null; + } + } + const realAlert = window.alert; + const realConfirm = window.confirm; + const realPrompt = window.prompt; + window.alert = function(message) { ask("alert", message, ""); }; + window.confirm = function(message) { + const r = ask("confirm", message, ""); + return r === null ? false : Boolean(r); + }; + window.prompt = function(message, def) { + const r = ask("prompt", message, def == null ? "" : def); + return r === null ? null : String(r); + }; + // onbeforeunload — we can't really synchronously prompt the user from this + // event without racing navigation. Leave native behavior for now; the + // supervisor's native-dialog fallback path still surfaces them in + // recent_dialogs. +})(); +""" + + +# ── Data model ──────────────────────────────────────────────────────────────── + + +@dataclass +class PendingDialog: + """A JS dialog currently open on some frame's session.""" + + id: str + type: str # "alert" | "confirm" | "prompt" | "beforeunload" + message: str + default_prompt: str + opened_at: float + cdp_session_id: str # which attached CDP session the dialog fired in + frame_id: Optional[str] = None + # When set, the dialog was captured via the bridge XHR path (Fetch domain). + # Response must be delivered via Fetch.fulfillRequest, NOT + # Page.handleJavaScriptDialog — the native dialog never fired. + bridge_request_id: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "type": self.type, + "message": self.message, + "default_prompt": self.default_prompt, + "opened_at": self.opened_at, + "frame_id": self.frame_id, + } + + +@dataclass +class DialogRecord: + """A historical record of a dialog that was opened and then handled. + + Retained in ``recent_dialogs`` for a short window so agents on backends + that auto-dismiss dialogs server-side (Browserbase) can still observe + that a dialog fired, even though they couldn't respond to it. + """ + + id: str + type: str + message: str + opened_at: float + closed_at: float + closed_by: str # "agent" | "auto_policy" | "remote" | "watchdog" + frame_id: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "type": self.type, + "message": self.message, + "opened_at": self.opened_at, + "closed_at": self.closed_at, + "closed_by": self.closed_by, + "frame_id": self.frame_id, + } + + +@dataclass +class FrameInfo: + """One frame in the page's frame tree. + + ``is_oopif`` means the frame has its own CDP target (separate process, + reachable via ``cdp_session_id``). Same-origin / srcdoc iframes share + the parent process and have ``is_oopif=False`` + ``cdp_session_id=None``. + """ + + frame_id: str + url: str + origin: str + parent_frame_id: Optional[str] + is_oopif: bool + cdp_session_id: Optional[str] = None + name: str = "" + + def to_dict(self) -> Dict[str, Any]: + d = { + "frame_id": self.frame_id, + "url": self.url, + "origin": self.origin, + "is_oopif": self.is_oopif, + } + if self.cdp_session_id: + d["session_id"] = self.cdp_session_id + if self.parent_frame_id: + d["parent_frame_id"] = self.parent_frame_id + if self.name: + d["name"] = self.name + return d + + +@dataclass +class ConsoleEvent: + """Ring buffer entry for console + exception traffic.""" + + ts: float + level: str # "log" | "error" | "warning" | "exception" + text: str + url: Optional[str] = None + + +@dataclass(frozen=True) +class SupervisorSnapshot: + """Read-only snapshot of supervisor state. + + Frozen dataclass so tool handlers can freely dereference without + worrying about mutation under their feet. + """ + + pending_dialogs: Tuple[PendingDialog, ...] + recent_dialogs: Tuple[DialogRecord, ...] + frame_tree: Dict[str, Any] + console_errors: Tuple[ConsoleEvent, ...] + active: bool # False if supervisor is detached/stopped + cdp_url: str + task_id: str + + def to_dict(self) -> Dict[str, Any]: + """Serialize for inclusion in ``browser_snapshot`` output.""" + out: Dict[str, Any] = { + "pending_dialogs": [d.to_dict() for d in self.pending_dialogs], + "frame_tree": self.frame_tree, + } + if self.recent_dialogs: + out["recent_dialogs"] = [d.to_dict() for d in self.recent_dialogs] + return out + + +# ── Supervisor core ─────────────────────────────────────────────────────────── + + +class CDPSupervisor: + """One supervisor per (task_id, cdp_url) pair. + + Lifecycle: + * ``start()`` — kicked off by ``SupervisorRegistry.get_or_start``; spawns + a daemon thread running its own asyncio loop, connects the WebSocket, + attaches to the first page target, enables domains, starts + auto-attaching to child targets. + * ``snapshot()`` — sync, thread-safe, called from tool handlers. + * ``respond_to_dialog(action, ...)`` — sync bridge; schedules a coroutine + on the supervisor's loop and waits (with timeout) for the CDP ack. + * ``stop()`` — cancels task, closes WebSocket, joins thread. + + All CDP I/O lives on the supervisor's own loop. External callers never + touch the loop directly; they go through the sync API above. + """ + + def __init__( + self, + task_id: str, + cdp_url: str, + *, + dialog_policy: str = DEFAULT_DIALOG_POLICY, + dialog_timeout_s: float = DEFAULT_DIALOG_TIMEOUT_S, + ) -> None: + if dialog_policy not in _VALID_POLICIES: + raise ValueError( + f"Invalid dialog_policy {dialog_policy!r}; " + f"must be one of {sorted(_VALID_POLICIES)}" + ) + self.task_id = task_id + self.cdp_url = cdp_url + self.dialog_policy = dialog_policy + self.dialog_timeout_s = float(dialog_timeout_s) + + # State protected by ``_state_lock`` for cross-thread reads. + self._state_lock = threading.Lock() + self._pending_dialogs: Dict[str, PendingDialog] = {} + self._recent_dialogs: List[DialogRecord] = [] + self._frames: Dict[str, FrameInfo] = {} + self._console_events: List[ConsoleEvent] = [] + self._active = False + + # Supervisor loop machinery — populated in start(). + self._loop: Optional[asyncio.AbstractEventLoop] = None + self._thread: Optional[threading.Thread] = None + self._ready_event = threading.Event() + self._start_error: Optional[BaseException] = None + self._stop_requested = False + + # CDP call tracking (runs on supervisor loop only). + self._next_call_id = 1 + self._pending_calls: Dict[int, asyncio.Future] = {} + self._ws: Optional[ClientConnection] = None + self._page_session_id: Optional[str] = None + self._child_sessions: Dict[str, Dict[str, Any]] = {} # session_id -> info + + # Dialog auto-dismiss watchdog handles (per dialog id). + self._dialog_watchdogs: Dict[str, asyncio.TimerHandle] = {} + # Monotonic id generator for dialogs (human-readable in snapshots). + self._dialog_seq = 0 + + # ── Public sync API ────────────────────────────────────────────────────── + + def start(self, timeout: float = 15.0) -> None: + """Launch the background loop and wait until attachment is complete. + + Raises whatever exception attach failed with (connect error, bad + WebSocket URL, CDP domain enable failure, etc.). On success, the + supervisor is fully wired up — pending-dialog events will be captured + as of the moment ``start()`` returns. + """ + if self._thread and self._thread.is_alive(): + return + self._ready_event.clear() + self._start_error = None + self._stop_requested = False + self._thread = threading.Thread( + target=self._thread_main, + name=f"cdp-supervisor-{self.task_id}", + daemon=True, + ) + self._thread.start() + if not self._ready_event.wait(timeout=timeout): + self.stop() + raise TimeoutError( + f"CDP supervisor did not attach within {timeout}s " + f"(cdp_url={self.cdp_url[:80]}...)" + ) + if self._start_error is not None: + err = self._start_error + self.stop() + raise err + + def stop(self, timeout: float = 5.0) -> None: + """Cancel the supervisor task and join the thread.""" + self._stop_requested = True + loop = self._loop + if loop is not None and loop.is_running(): + # Close the WebSocket from inside the loop — this makes ``async for + # raw in self._ws`` return cleanly, ``_run`` hits its ``finally``, + # pending tasks get cancelled in order, THEN the thread exits. + async def _close_ws(): + ws = self._ws + self._ws = None + if ws is not None: + try: + await ws.close() + except Exception: + pass + + try: + fut = asyncio.run_coroutine_threadsafe(_close_ws(), loop) + try: + fut.result(timeout=2.0) + except Exception: + pass + except RuntimeError: + pass # loop already shutting down + if self._thread is not None: + self._thread.join(timeout=timeout) + with self._state_lock: + self._active = False + + def snapshot(self) -> SupervisorSnapshot: + """Return an immutable snapshot of current state.""" + with self._state_lock: + dialogs = tuple(self._pending_dialogs.values()) + recent = tuple(self._recent_dialogs[-RECENT_DIALOGS_MAX:]) + frames_tree = self._build_frame_tree_locked() + console = tuple(self._console_events[-CONSOLE_HISTORY_MAX:]) + active = self._active + return SupervisorSnapshot( + pending_dialogs=dialogs, + recent_dialogs=recent, + frame_tree=frames_tree, + console_errors=console, + active=active, + cdp_url=self.cdp_url, + task_id=self.task_id, + ) + + def respond_to_dialog( + self, + action: str, + *, + prompt_text: Optional[str] = None, + dialog_id: Optional[str] = None, + timeout: float = 10.0, + ) -> Dict[str, Any]: + """Accept/dismiss a pending dialog. Sync bridge onto the supervisor loop. + + Returns ``{"ok": True, "dialog": {...}}`` on success, + ``{"ok": False, "error": "..."}`` on a recoverable error (no dialog, + ambiguous dialog_id, supervisor inactive). + """ + if action not in ("accept", "dismiss"): + return {"ok": False, "error": f"action must be 'accept' or 'dismiss', got {action!r}"} + + with self._state_lock: + if not self._active: + return {"ok": False, "error": "supervisor is not active"} + pending = list(self._pending_dialogs.values()) + if not pending: + return {"ok": False, "error": "no dialog is currently open"} + if dialog_id: + dialog = self._pending_dialogs.get(dialog_id) + if dialog is None: + return { + "ok": False, + "error": f"dialog_id {dialog_id!r} not found " + f"(known: {sorted(self._pending_dialogs)})", + } + elif len(pending) > 1: + return { + "ok": False, + "error": ( + f"{len(pending)} pending dialogs; specify dialog_id. " + f"Candidates: {[d.id for d in pending]}" + ), + } + else: + dialog = pending[0] + snapshot_copy = dialog + + loop = self._loop + if loop is None: + return {"ok": False, "error": "supervisor loop is not running"} + + async def _do_respond(): + return await self._handle_dialog_cdp( + snapshot_copy, accept=(action == "accept"), prompt_text=prompt_text or "" + ) + + try: + fut = asyncio.run_coroutine_threadsafe(_do_respond(), loop) + fut.result(timeout=timeout) + except Exception as e: + return {"ok": False, "error": f"{type(e).__name__}: {e}"} + return {"ok": True, "dialog": snapshot_copy.to_dict()} + + # ── Supervisor loop internals ──────────────────────────────────────────── + + def _thread_main(self) -> None: + """Entry point for the supervisor's dedicated thread.""" + loop = asyncio.new_event_loop() + self._loop = loop + try: + asyncio.set_event_loop(loop) + loop.run_until_complete(self._run()) + except BaseException as e: # noqa: BLE001 — propagate via _start_error + if not self._ready_event.is_set(): + self._start_error = e + self._ready_event.set() + else: + logger.warning("CDP supervisor %s crashed: %s", self.task_id, e) + finally: + # Flush any remaining tasks before closing the loop so we don't + # emit "Task was destroyed but it is pending" warnings. + try: + pending = [t for t in asyncio.all_tasks(loop) if not t.done()] + for t in pending: + t.cancel() + if pending: + loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) + except Exception: + pass + try: + loop.close() + except Exception: + pass + with self._state_lock: + self._active = False + + async def _run(self) -> None: + """Top-level supervisor coroutine. + + Holds a reconnecting loop so we survive the remote closing the + WebSocket — Browserbase in particular tears down the CDP socket + every time a short-lived client (e.g. agent-browser's per-command + CDP client) disconnects. We drop our state snapshot keys that + depend on specific CDP session ids, re-attach, and keep going. + """ + attempt = 0 + last_success_at = 0.0 + backoff = 0.5 + while not self._stop_requested: + try: + self._ws = await asyncio.wait_for( + websockets.connect(self.cdp_url, max_size=50 * 1024 * 1024), + timeout=10.0, + ) + except Exception as e: + attempt += 1 + if not self._ready_event.is_set(): + # Never connected once — fatal for start(). + self._start_error = e + self._ready_event.set() + return + logger.warning( + "CDP supervisor %s: connect failed (attempt %s): %s", + self.task_id, attempt, e, + ) + await asyncio.sleep(min(backoff, 10.0)) + backoff = min(backoff * 2, 10.0) + continue + + reader_task = asyncio.create_task(self._read_loop(), name="cdp-reader") + try: + # Reset per-connection session state so stale ids don't hang + # around after a reconnect. + self._page_session_id = None + self._child_sessions.clear() + # We deliberately keep `_pending_dialogs` and `_frames` — + # they're reconciled as the supervisor resubscribes and + # receives fresh events. Worst case: an agent sees a stale + # dialog entry that the new session's handleJavaScriptDialog + # call rejects with "no dialog is showing" (logged, not + # surfaced). + await self._attach_initial_page() + with self._state_lock: + self._active = True + last_success_at = time.time() + backoff = 0.5 # reset after a successful attach + if not self._ready_event.is_set(): + self._ready_event.set() + # Run until the reader returns. + await reader_task + except BaseException as e: + if not self._ready_event.is_set(): + # Never got to ready — propagate to start(). + self._start_error = e + self._ready_event.set() + raise + logger.warning( + "CDP supervisor %s: session dropped after %.1fs: %s", + self.task_id, + time.time() - last_success_at, + e, + ) + finally: + with self._state_lock: + self._active = False + if not reader_task.done(): + reader_task.cancel() + try: + await reader_task + except (asyncio.CancelledError, Exception): + pass + for handle in list(self._dialog_watchdogs.values()): + handle.cancel() + self._dialog_watchdogs.clear() + ws = self._ws + self._ws = None + if ws is not None: + try: + await ws.close() + except Exception: + pass + + if self._stop_requested: + return + + # Reconnect: brief backoff, then reattach. + logger.debug( + "CDP supervisor %s: reconnecting in %.1fs...", self.task_id, backoff, + ) + await asyncio.sleep(backoff) + backoff = min(backoff * 2, 10.0) + + async def _attach_initial_page(self) -> None: + """Find a page target, attach flattened session, enable domains, install dialog bridge.""" + resp = await self._cdp("Target.getTargets") + targets = resp.get("result", {}).get("targetInfos", []) + page_target = next((t for t in targets if t.get("type") == "page"), None) + if page_target is None: + created = await self._cdp("Target.createTarget", {"url": "about:blank"}) + target_id = created["result"]["targetId"] + else: + target_id = page_target["targetId"] + + attach = await self._cdp( + "Target.attachToTarget", + {"targetId": target_id, "flatten": True}, + ) + self._page_session_id = attach["result"]["sessionId"] + await self._cdp("Page.enable", session_id=self._page_session_id) + await self._cdp("Runtime.enable", session_id=self._page_session_id) + await self._cdp( + "Target.setAutoAttach", + {"autoAttach": True, "waitForDebuggerOnStart": False, "flatten": True}, + session_id=self._page_session_id, + ) + # Install the dialog bridge — overrides native alert/confirm/prompt with + # a synchronous XHR we intercept via Fetch domain. This is how we make + # dialog response work on Browserbase (whose CDP proxy auto-dismisses + # real native dialogs before we can call handleJavaScriptDialog). + await self._install_dialog_bridge(self._page_session_id) + + async def _install_dialog_bridge(self, session_id: str) -> None: + """Install the dialog-bridge init script + Fetch interceptor on a session. + + Two CDP calls: + 1. ``Page.addScriptToEvaluateOnNewDocument`` — the JS override runs + in every frame before any page script. Replaces alert/confirm/ + prompt with a sync XHR to our bridge URL. + 2. ``Fetch.enable`` scoped to the bridge URL — we catch those XHRs, + surface them as pending dialogs, then fulfill once the agent + responds. + + Idempotent at the CDP level: Chromium de-duplicates identical + add-script calls by source, and Fetch.enable replaces prior patterns. + """ + try: + await self._cdp( + "Page.addScriptToEvaluateOnNewDocument", + {"source": _DIALOG_BRIDGE_SCRIPT, "runImmediately": True}, + session_id=session_id, + timeout=5.0, + ) + except Exception as e: + logger.debug( + "dialog bridge: addScriptToEvaluateOnNewDocument failed on sid=%s: %s", + (session_id or "")[:16], e, + ) + try: + await self._cdp( + "Fetch.enable", + { + "patterns": [ + { + "urlPattern": DIALOG_BRIDGE_URL_PATTERN, + "requestStage": "Request", + } + ], + "handleAuthRequests": False, + }, + session_id=session_id, + timeout=5.0, + ) + except Exception as e: + logger.debug( + "dialog bridge: Fetch.enable failed on sid=%s: %s", + (session_id or "")[:16], e, + ) + # Also try to inject into the already-loaded document so existing + # pages pick up the override on reconnect. Best-effort. + try: + await self._cdp( + "Runtime.evaluate", + {"expression": _DIALOG_BRIDGE_SCRIPT, "returnByValue": True}, + session_id=session_id, + timeout=3.0, + ) + except Exception: + pass + + async def _cdp( + self, + method: str, + params: Optional[Dict[str, Any]] = None, + *, + session_id: Optional[str] = None, + timeout: float = 10.0, + ) -> Dict[str, Any]: + """Send a CDP command and await its response.""" + if self._ws is None: + raise RuntimeError("supervisor WebSocket is not connected") + call_id = self._next_call_id + self._next_call_id += 1 + payload: Dict[str, Any] = {"id": call_id, "method": method} + if params: + payload["params"] = params + if session_id: + payload["sessionId"] = session_id + fut: asyncio.Future = asyncio.get_running_loop().create_future() + self._pending_calls[call_id] = fut + await self._ws.send(json.dumps(payload)) + try: + return await asyncio.wait_for(fut, timeout=timeout) + finally: + self._pending_calls.pop(call_id, None) + + async def _read_loop(self) -> None: + """Continuously dispatch incoming CDP frames.""" + assert self._ws is not None + try: + async for raw in self._ws: + if self._stop_requested: + break + try: + msg = json.loads(raw) + except Exception: + logger.debug("CDP supervisor: non-JSON frame dropped") + continue + if "id" in msg: + fut = self._pending_calls.pop(msg["id"], None) + if fut is not None and not fut.done(): + if "error" in msg: + fut.set_exception( + RuntimeError(f"CDP error on id={msg['id']}: {msg['error']}") + ) + else: + fut.set_result(msg) + elif "method" in msg: + await self._on_event(msg["method"], msg.get("params", {}), msg.get("sessionId")) + except Exception as e: + logger.debug("CDP read loop exited: %s", e) + + # ── Event dispatch ────────────────────────────────────────────────────── + + async def _on_event( + self, method: str, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + if method == "Page.javascriptDialogOpening": + await self._on_dialog_opening(params, session_id) + elif method == "Page.javascriptDialogClosed": + await self._on_dialog_closed(params, session_id) + elif method == "Fetch.requestPaused": + await self._on_fetch_paused(params, session_id) + elif method == "Page.frameAttached": + self._on_frame_attached(params, session_id) + elif method == "Page.frameNavigated": + self._on_frame_navigated(params, session_id) + elif method == "Page.frameDetached": + self._on_frame_detached(params, session_id) + elif method == "Target.attachedToTarget": + await self._on_target_attached(params) + elif method == "Target.detachedFromTarget": + self._on_target_detached(params) + elif method == "Runtime.consoleAPICalled": + self._on_console(params, level_from="api") + elif method == "Runtime.exceptionThrown": + self._on_console(params, level_from="exception") + + async def _on_dialog_opening( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + self._dialog_seq += 1 + dialog = PendingDialog( + id=f"d-{self._dialog_seq}", + type=str(params.get("type") or ""), + message=str(params.get("message") or ""), + default_prompt=str(params.get("defaultPrompt") or ""), + opened_at=time.time(), + cdp_session_id=session_id or self._page_session_id or "", + frame_id=params.get("frameId"), + ) + + if self.dialog_policy == DIALOG_POLICY_AUTO_DISMISS: + # Archive immediately with the policy tag so the ``closed`` event + # arriving right after our handleJavaScriptDialog call doesn't + # re-archive it as "remote". + with self._state_lock: + self._archive_dialog_locked(dialog, "auto_policy") + asyncio.create_task( + self._auto_handle_dialog(dialog, accept=False, prompt_text="") + ) + elif self.dialog_policy == DIALOG_POLICY_AUTO_ACCEPT: + with self._state_lock: + self._archive_dialog_locked(dialog, "auto_policy") + asyncio.create_task( + self._auto_handle_dialog( + dialog, accept=True, prompt_text=dialog.default_prompt + ) + ) + else: + # must_respond → add to pending and arm watchdog. + with self._state_lock: + self._pending_dialogs[dialog.id] = dialog + loop = asyncio.get_running_loop() + handle = loop.call_later( + self.dialog_timeout_s, + lambda: asyncio.create_task(self._dialog_timeout_expired(dialog.id)), + ) + self._dialog_watchdogs[dialog.id] = handle + + async def _auto_handle_dialog( + self, dialog: PendingDialog, *, accept: bool, prompt_text: str + ) -> None: + """Send handleJavaScriptDialog for auto_dismiss/auto_accept. + + Dialog has already been archived by the caller (``_on_dialog_opening``); + this just fires the CDP call so the page unblocks. + """ + params: Dict[str, Any] = {"accept": accept} + if dialog.type == "prompt": + params["promptText"] = prompt_text + try: + await self._cdp( + "Page.handleJavaScriptDialog", + params, + session_id=dialog.cdp_session_id or None, + timeout=5.0, + ) + except Exception as e: + logger.debug("auto-handle CDP call failed for %s: %s", dialog.id, e) + + async def _dialog_timeout_expired(self, dialog_id: str) -> None: + with self._state_lock: + dialog = self._pending_dialogs.get(dialog_id) + if dialog is None: + return + logger.warning( + "CDP supervisor %s: dialog %s (%s) auto-dismissed after %ss timeout", + self.task_id, + dialog_id, + dialog.type, + self.dialog_timeout_s, + ) + try: + # Archive with watchdog tag BEFORE fulfilling / dismissing. + with self._state_lock: + if dialog_id in self._pending_dialogs: + self._pending_dialogs.pop(dialog_id, None) + self._archive_dialog_locked(dialog, "watchdog") + # Unblock the page — via bridge Fetch fulfill for bridge dialogs, + # else native Page.handleJavaScriptDialog for real dialogs. + if dialog.bridge_request_id: + await self._fulfill_bridge_request(dialog, accept=False, prompt_text="") + else: + await self._cdp( + "Page.handleJavaScriptDialog", + {"accept": False}, + session_id=dialog.cdp_session_id or None, + timeout=5.0, + ) + except Exception as e: + logger.debug("auto-dismiss failed for %s: %s", dialog_id, e) + + def _archive_dialog_locked(self, dialog: PendingDialog, closed_by: str) -> None: + """Move a pending dialog to the recent_dialogs ring buffer. Must hold state_lock.""" + record = DialogRecord( + id=dialog.id, + type=dialog.type, + message=dialog.message, + opened_at=dialog.opened_at, + closed_at=time.time(), + closed_by=closed_by, + frame_id=dialog.frame_id, + ) + self._recent_dialogs.append(record) + if len(self._recent_dialogs) > RECENT_DIALOGS_MAX * 2: + self._recent_dialogs = self._recent_dialogs[-RECENT_DIALOGS_MAX:] + + async def _handle_dialog_cdp( + self, dialog: PendingDialog, *, accept: bool, prompt_text: str + ) -> None: + """Send the Page.handleJavaScriptDialog CDP command (agent path only). + + Routes to the bridge-fulfill path when the dialog was captured via + the injected XHR override (see ``_on_fetch_paused``). + """ + if dialog.bridge_request_id: + try: + await self._fulfill_bridge_request( + dialog, accept=accept, prompt_text=prompt_text + ) + finally: + with self._state_lock: + if dialog.id in self._pending_dialogs: + self._pending_dialogs.pop(dialog.id, None) + self._archive_dialog_locked(dialog, "agent") + handle = self._dialog_watchdogs.pop(dialog.id, None) + if handle is not None: + handle.cancel() + return + + params: Dict[str, Any] = {"accept": accept} + if dialog.type == "prompt": + params["promptText"] = prompt_text + try: + await self._cdp( + "Page.handleJavaScriptDialog", + params, + session_id=dialog.cdp_session_id or None, + timeout=5.0, + ) + finally: + # Clear regardless — the CDP error path usually means the dialog + # already closed (browser auto-dismissed after navigation, etc.). + with self._state_lock: + if dialog.id in self._pending_dialogs: + self._pending_dialogs.pop(dialog.id, None) + self._archive_dialog_locked(dialog, "agent") + handle = self._dialog_watchdogs.pop(dialog.id, None) + if handle is not None: + handle.cancel() + + async def _on_dialog_closed( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + # ``Page.javascriptDialogClosed`` spec has only ``result`` (bool) and + # ``userInput`` (string), not the original ``message``. Match by + # session id and clear the oldest dialog on that session — if Chrome + # closed one on us (e.g. our disconnect auto-dismissed it, or the + # browser navigated, or Browserbase's CDP proxy auto-dismissed), there + # shouldn't be more than one in flight per session anyway because the + # JS thread is blocked while a dialog is up. + with self._state_lock: + candidate_ids = [ + d.id + for d in self._pending_dialogs.values() + if d.cdp_session_id == session_id + # Bridge-captured dialogs aren't cleared by native close events; + # they're resolved via Fetch.fulfillRequest instead. Only the + # real-native-dialog path uses Page.javascriptDialogClosed. + and d.bridge_request_id is None + ] + if candidate_ids: + did = candidate_ids[0] + dialog = self._pending_dialogs.pop(did, None) + if dialog is not None: + self._archive_dialog_locked(dialog, "remote") + handle = self._dialog_watchdogs.pop(did, None) + if handle is not None: + handle.cancel() + + async def _on_fetch_paused( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + """Bridge XHR captured mid-flight — materialize as a pending dialog. + + The injected script (``_DIALOG_BRIDGE_SCRIPT``) fires a synchronous + XHR to ``DIALOG_BRIDGE_HOST`` whenever page code calls alert/confirm/ + prompt. We catch it via Fetch.enable pattern; the page's JS thread + is blocked on the XHR's response until we call Fetch.fulfillRequest + (which happens from ``respond_to_dialog``) or until the watchdog + fires (at which point we fulfill with a cancel response). + """ + url = str(params.get("request", {}).get("url") or "") + request_id = params.get("requestId") + if not request_id: + return + # Only care about our bridge URLs. Fetch can still deliver other + # intercepted requests if patterns were ever broadened. + if DIALOG_BRIDGE_HOST not in url: + # Not ours — forward unchanged so the page sees its own request. + try: + await self._cdp( + "Fetch.continueRequest", {"requestId": request_id}, + session_id=session_id, timeout=3.0, + ) + except Exception: + pass + return + + # Parse query string for dialog metadata. Use urllib to be robust. + from urllib.parse import urlparse, parse_qs + q = parse_qs(urlparse(url).query) + + def _q(name: str) -> str: + v = q.get(name, [""]) + return v[0] if v else "" + + kind = _q("kind") or "alert" + message = _q("message") + default_prompt = _q("default_prompt") + + self._dialog_seq += 1 + dialog = PendingDialog( + id=f"d-{self._dialog_seq}", + type=kind, + message=message, + default_prompt=default_prompt, + opened_at=time.time(), + cdp_session_id=session_id or self._page_session_id or "", + frame_id=params.get("frameId"), + bridge_request_id=str(request_id), + ) + + # Apply policy exactly as for native dialogs. + if self.dialog_policy == DIALOG_POLICY_AUTO_DISMISS: + with self._state_lock: + self._archive_dialog_locked(dialog, "auto_policy") + asyncio.create_task( + self._fulfill_bridge_request(dialog, accept=False, prompt_text="") + ) + elif self.dialog_policy == DIALOG_POLICY_AUTO_ACCEPT: + with self._state_lock: + self._archive_dialog_locked(dialog, "auto_policy") + asyncio.create_task( + self._fulfill_bridge_request( + dialog, accept=True, prompt_text=default_prompt + ) + ) + else: + # must_respond — add to pending + arm watchdog. + with self._state_lock: + self._pending_dialogs[dialog.id] = dialog + loop = asyncio.get_running_loop() + handle = loop.call_later( + self.dialog_timeout_s, + lambda: asyncio.create_task(self._dialog_timeout_expired(dialog.id)), + ) + self._dialog_watchdogs[dialog.id] = handle + + async def _fulfill_bridge_request( + self, dialog: PendingDialog, *, accept: bool, prompt_text: str + ) -> None: + """Resolve a bridge XHR via Fetch.fulfillRequest so the page unblocks.""" + if not dialog.bridge_request_id: + return + payload = { + "accept": bool(accept), + "prompt_text": prompt_text if dialog.type == "prompt" else "", + "dialog_id": dialog.id, + } + body = json.dumps(payload).encode() + try: + import base64 as _b64 + await self._cdp( + "Fetch.fulfillRequest", + { + "requestId": dialog.bridge_request_id, + "responseCode": 200, + "responseHeaders": [ + {"name": "Content-Type", "value": "application/json"}, + {"name": "Access-Control-Allow-Origin", "value": "*"}, + ], + "body": _b64.b64encode(body).decode(), + }, + session_id=dialog.cdp_session_id or None, + timeout=5.0, + ) + except Exception as e: + logger.debug("bridge fulfill failed for %s: %s", dialog.id, e) + + # ── Frame / target tracking ───────────────────────────────────────────── + + def _on_frame_attached( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + frame_id = params.get("frameId") + if not frame_id: + return + with self._state_lock: + self._frames[frame_id] = FrameInfo( + frame_id=frame_id, + url="", + origin="", + parent_frame_id=params.get("parentFrameId"), + is_oopif=False, + cdp_session_id=session_id, + ) + + def _on_frame_navigated( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + frame = params.get("frame") or {} + frame_id = frame.get("id") + if not frame_id: + return + with self._state_lock: + existing = self._frames.get(frame_id) + info = FrameInfo( + frame_id=frame_id, + url=str(frame.get("url") or ""), + origin=str(frame.get("securityOrigin") or frame.get("origin") or ""), + parent_frame_id=frame.get("parentId") or (existing.parent_frame_id if existing else None), + is_oopif=bool(existing.is_oopif if existing else False), + cdp_session_id=existing.cdp_session_id if existing else session_id, + name=str(frame.get("name") or (existing.name if existing else "")), + ) + self._frames[frame_id] = info + + def _on_frame_detached( + self, params: Dict[str, Any], session_id: Optional[str] + ) -> None: + """Remove a frame from our state only when it's truly gone. + + CDP emits ``Page.frameDetached`` with a ``reason`` of either + ``"remove"`` (the frame is actually gone from the DOM) or ``"swap"`` + (the frame is migrating to a new process — typical when a + same-process iframe becomes an OOPIF, or when history navigates). + Dropping on ``swap`` would hide OOPIFs from the agent the moment + Chromium promotes them to their own process, so treat swap as a + no-op. + + Even with ``reason=remove``, the parent page's perspective is + "the child frame left MY process tree" — which is what happens + when a same-origin iframe gets promoted to an OOPIF. If we + already have a live child CDP session attached for that frame_id, + the frame is still very much alive; only drop it when we have + no session record. + """ + frame_id = params.get("frameId") + if not frame_id: + return + reason = str(params.get("reason") or "remove").lower() + if reason == "swap": + return + with self._state_lock: + existing = self._frames.get(frame_id) + # Keep OOPIF records even when the parent says the frame was + # "removed" — the iframe is still visible, just in a different + # process. If the frame truly goes away later, Target.detached + # + the next Page.frameDetached without a live session will + # clear it. + if existing and existing.is_oopif and existing.cdp_session_id: + return + self._frames.pop(frame_id, None) + + async def _on_target_attached(self, params: Dict[str, Any]) -> None: + info = params.get("targetInfo") or {} + sid = params.get("sessionId") + target_type = info.get("type") + if not sid or target_type not in ("iframe", "worker"): + return + self._child_sessions[sid] = {"info": info, "type": target_type} + + # Record the frame with its OOPIF session id for interaction routing. + if target_type == "iframe": + target_id = info.get("targetId") + with self._state_lock: + existing = self._frames.get(target_id) + self._frames[target_id] = FrameInfo( + frame_id=target_id, + url=str(info.get("url") or ""), + origin="", # filled by frameNavigated on the child session + parent_frame_id=(existing.parent_frame_id if existing else None), + is_oopif=True, + cdp_session_id=sid, + name=str(info.get("title") or (existing.name if existing else "")), + ) + + # Enable domains on the child off-loop so the reader keeps pumping. + # Awaiting the CDP replies here would deadlock because only the + # reader can resolve those replies' Futures. + asyncio.create_task(self._enable_child_domains(sid)) + + async def _enable_child_domains(self, sid: str) -> None: + """Enable Page+Runtime (+nested setAutoAttach) on a child CDP session. + + Also installs the dialog bridge so iframe-scoped alert/confirm/prompt + calls round-trip through Fetch too. + """ + try: + await self._cdp("Page.enable", session_id=sid, timeout=3.0) + await self._cdp("Runtime.enable", session_id=sid, timeout=3.0) + await self._cdp( + "Target.setAutoAttach", + {"autoAttach": True, "waitForDebuggerOnStart": False, "flatten": True}, + session_id=sid, + timeout=3.0, + ) + except Exception as e: + logger.debug("child session %s setup failed: %s", sid[:16], e) + # Install the dialog bridge on the child so iframe dialogs are captured. + await self._install_dialog_bridge(sid) + + def _on_target_detached(self, params: Dict[str, Any]) -> None: + """Handle a child CDP session detaching. + + We deliberately DO NOT drop frames from ``_frames`` here — Browserbase + fires transient detach events during page transitions even while the + iframe is still visible to the user, and dropping the record hides + OOPIFs from the agent between the detach and the next + ``Target.attachedToTarget``. Instead, we just clear the session + binding so stale ``cdp_session_id`` values aren't used for routing. + If the iframe truly goes away, ``Page.frameDetached`` will clean up. + """ + sid = params.get("sessionId") + if not sid: + return + self._child_sessions.pop(sid, None) + with self._state_lock: + for fid, frame in list(self._frames.items()): + if frame.cdp_session_id == sid: + # Replace with a copy that has cdp_session_id cleared so + # routing falls back to top-level page session if retried. + self._frames[fid] = FrameInfo( + frame_id=frame.frame_id, + url=frame.url, + origin=frame.origin, + parent_frame_id=frame.parent_frame_id, + is_oopif=frame.is_oopif, + cdp_session_id=None, + name=frame.name, + ) + + # ── Console / exception ring buffer ───────────────────────────────────── + + def _on_console(self, params: Dict[str, Any], *, level_from: str) -> None: + if level_from == "exception": + details = params.get("exceptionDetails") or {} + text = str(details.get("text") or "") + url = details.get("url") + event = ConsoleEvent(ts=time.time(), level="exception", text=text, url=url) + else: + raw_level = str(params.get("type") or "log") + level = "error" if raw_level in ("error", "assert") else ( + "warning" if raw_level == "warning" else "log" + ) + args = params.get("args") or [] + parts: List[str] = [] + for a in args[:4]: + if isinstance(a, dict): + parts.append(str(a.get("value") or a.get("description") or "")) + event = ConsoleEvent(ts=time.time(), level=level, text=" ".join(parts)) + with self._state_lock: + self._console_events.append(event) + if len(self._console_events) > CONSOLE_HISTORY_MAX * 2: + # Keep last CONSOLE_HISTORY_MAX; allow 2x slack to reduce churn. + self._console_events = self._console_events[-CONSOLE_HISTORY_MAX:] + + # ── Frame tree building (bounded) ─────────────────────────────────────── + + def _build_frame_tree_locked(self) -> Dict[str, Any]: + """Build the capped frame_tree payload. Must be called under state lock.""" + frames = self._frames + if not frames: + return {"top": None, "children": [], "truncated": False} + + # Identify a top frame — one with no parent, preferring oopif=False. + tops = [f for f in frames.values() if not f.parent_frame_id] + top = next((f for f in tops if not f.is_oopif), tops[0] if tops else None) + + # BFS from top, capped by FRAME_TREE_MAX_ENTRIES and + # FRAME_TREE_MAX_OOPIF_DEPTH for OOPIF branches. + children: List[Dict[str, Any]] = [] + truncated = False + if top is None: + return {"top": None, "children": [], "truncated": False} + + queue: List[Tuple[FrameInfo, int]] = [ + (f, 1) for f in frames.values() if f.parent_frame_id == top.frame_id + ] + visited: set[str] = {top.frame_id} + while queue and len(children) < FRAME_TREE_MAX_ENTRIES: + frame, depth = queue.pop(0) + if frame.frame_id in visited: + continue + visited.add(frame.frame_id) + if frame.is_oopif and depth > FRAME_TREE_MAX_OOPIF_DEPTH: + truncated = True + continue + children.append(frame.to_dict()) + for f in frames.values(): + if f.parent_frame_id == frame.frame_id and f.frame_id not in visited: + queue.append((f, depth + 1)) + if queue: + truncated = True + + return { + "top": top.to_dict(), + "children": children, + "truncated": truncated, + } + + +# ── Registry ───────────────────────────────────────────────────────────────── + + +class _SupervisorRegistry: + """Process-global (task_id → supervisor) map with idempotent start/stop. + + One instance, exposed as ``SUPERVISOR_REGISTRY``. Safe to call from any + thread — mutations go through ``_lock``. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + self._by_task: Dict[str, CDPSupervisor] = {} + + def get(self, task_id: str) -> Optional[CDPSupervisor]: + """Return the supervisor for ``task_id`` if running, else ``None``.""" + with self._lock: + return self._by_task.get(task_id) + + def get_or_start( + self, + task_id: str, + cdp_url: str, + *, + dialog_policy: str = DEFAULT_DIALOG_POLICY, + dialog_timeout_s: float = DEFAULT_DIALOG_TIMEOUT_S, + start_timeout: float = 15.0, + ) -> CDPSupervisor: + """Idempotently ensure a supervisor is running for ``(task_id, cdp_url)``. + + If a supervisor exists for this task but was bound to a different + ``cdp_url``, the old one is stopped and a fresh one is started. + """ + with self._lock: + existing = self._by_task.get(task_id) + if existing is not None: + if existing.cdp_url == cdp_url: + return existing + # URL changed — tear down old, fall through to re-create. + self._by_task.pop(task_id, None) + if existing is not None: + existing.stop() + + supervisor = CDPSupervisor( + task_id=task_id, + cdp_url=cdp_url, + dialog_policy=dialog_policy, + dialog_timeout_s=dialog_timeout_s, + ) + supervisor.start(timeout=start_timeout) + with self._lock: + # Guard against a concurrent get_or_start from another thread. + already = self._by_task.get(task_id) + if already is not None and already.cdp_url == cdp_url: + supervisor.stop() + return already + self._by_task[task_id] = supervisor + return supervisor + + def stop(self, task_id: str) -> None: + """Stop and discard the supervisor for ``task_id`` if it exists.""" + with self._lock: + supervisor = self._by_task.pop(task_id, None) + if supervisor is not None: + supervisor.stop() + + def stop_all(self) -> None: + """Stop every running supervisor. For shutdown / test teardown.""" + with self._lock: + items = list(self._by_task.items()) + self._by_task.clear() + for _, supervisor in items: + supervisor.stop() + + +SUPERVISOR_REGISTRY = _SupervisorRegistry() + + +__all__ = [ + "CDPSupervisor", + "ConsoleEvent", + "DEFAULT_DIALOG_POLICY", + "DEFAULT_DIALOG_TIMEOUT_S", + "DIALOG_POLICY_AUTO_ACCEPT", + "DIALOG_POLICY_AUTO_DISMISS", + "DIALOG_POLICY_MUST_RESPOND", + "DialogRecord", + "FrameInfo", + "PendingDialog", + "SUPERVISOR_REGISTRY", + "SupervisorSnapshot", + "_SupervisorRegistry", +] diff --git a/tools/browser_tool.py b/tools/browser_tool.py index e46636ad9..469e9be28 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -63,7 +63,7 @@ import tempfile import threading import time import requests -from typing import Dict, Any, Optional, List +from typing import Dict, Any, Optional, List, Tuple from pathlib import Path from agent.auxiliary_client import call_llm from hermes_constants import get_hermes_home @@ -287,6 +287,100 @@ def _get_cdp_override() -> str: return "" +def _get_dialog_policy_config() -> Tuple[str, float]: + """Read ``browser.dialog_policy`` + ``browser.dialog_timeout_s`` from config. + + Returns a ``(policy, timeout_s)`` tuple, falling back to the supervisor's + defaults when keys are absent or invalid. + """ + # Defer imports so browser_tool can be imported in minimal environments. + from tools.browser_supervisor import ( + DEFAULT_DIALOG_POLICY, + DEFAULT_DIALOG_TIMEOUT_S, + _VALID_POLICIES, + ) + + try: + from hermes_cli.config import read_raw_config + + cfg = read_raw_config() + browser_cfg = cfg.get("browser", {}) if isinstance(cfg, dict) else {} + if not isinstance(browser_cfg, dict): + return DEFAULT_DIALOG_POLICY, DEFAULT_DIALOG_TIMEOUT_S + policy = str(browser_cfg.get("dialog_policy") or DEFAULT_DIALOG_POLICY) + if policy not in _VALID_POLICIES: + logger.debug("Invalid browser.dialog_policy=%r; using default", policy) + policy = DEFAULT_DIALOG_POLICY + timeout_raw = browser_cfg.get("dialog_timeout_s") + try: + timeout_s = float(timeout_raw) if timeout_raw is not None else DEFAULT_DIALOG_TIMEOUT_S + if timeout_s <= 0: + timeout_s = DEFAULT_DIALOG_TIMEOUT_S + except (TypeError, ValueError): + timeout_s = DEFAULT_DIALOG_TIMEOUT_S + return policy, timeout_s + except Exception: + return DEFAULT_DIALOG_POLICY, DEFAULT_DIALOG_TIMEOUT_S + + +def _ensure_cdp_supervisor(task_id: str) -> None: + """Start a CDP supervisor for ``task_id`` if an endpoint is reachable. + + Idempotent — delegates to ``SupervisorRegistry.get_or_start`` which skips + when a supervisor for this ``(task_id, cdp_url)`` already exists and + tears down + restarts on URL change. Safe to call on every + ``browser_navigate`` / ``/browser connect`` without worrying about + double-attach. + + Resolves the CDP URL in this order: + 1. ``BROWSER_CDP_URL`` / ``browser.cdp_url`` — covers ``/browser connect`` + and config-set overrides. + 2. ``_active_sessions[task_id]["cdp_url"]`` — covers Browserbase + any + other cloud provider whose ``create_session`` returns a raw CDP URL. + + Swallows all errors — failing to attach the supervisor must not break + the browser session itself. The agent simply won't see + ``pending_dialogs`` / ``frame_tree`` fields in snapshots. + """ + cdp_url = _get_cdp_override() + if not cdp_url: + # Fallback: active session may carry a per-session CDP URL from a + # cloud provider (Browserbase sets this). + with _cleanup_lock: + session_info = _active_sessions.get(task_id, {}) + maybe = str(session_info.get("cdp_url") or "") + if maybe: + cdp_url = _resolve_cdp_override(maybe) + if not cdp_url: + return + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + + policy, timeout_s = _get_dialog_policy_config() + SUPERVISOR_REGISTRY.get_or_start( + task_id=task_id, + cdp_url=cdp_url, + dialog_policy=policy, + dialog_timeout_s=timeout_s, + ) + except Exception as exc: + logger.debug( + "CDP supervisor attach for task=%s failed (non-fatal): %s", + task_id, + exc, + ) + + +def _stop_cdp_supervisor(task_id: str) -> None: + """Stop the CDP supervisor for ``task_id`` if one exists. No-op otherwise.""" + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + + SUPERVISOR_REGISTRY.stop(task_id) + except Exception as exc: + logger.debug("CDP supervisor stop for task=%s failed (non-fatal): %s", task_id, exc) + + # ============================================================================ # Cloud Provider Registry # ============================================================================ @@ -995,7 +1089,12 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: if task_id in _active_sessions: return _active_sessions[task_id] _active_sessions[task_id] = session_info - + + # Lazy-start the CDP supervisor now that the session exists (if the + # backend surfaces a CDP URL via override or session_info["cdp_url"]). + # Idempotent; swallows errors. See _ensure_cdp_supervisor for details. + _ensure_cdp_supervisor(task_id) + return session_info @@ -1455,7 +1554,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: if is_first_nav: session_info["_first_nav"] = False _maybe_start_recording(effective_task_id) - + result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60)) if result.get("success"): @@ -1578,7 +1677,20 @@ def browser_snapshot( "snapshot": snapshot_text, "element_count": len(refs) if refs else 0 } - + + # Merge supervisor state (pending dialogs + frame tree) when a CDP + # supervisor is attached to this task. No-op otherwise. See + # website/docs/developer-guide/browser-supervisor.md. + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + _supervisor = SUPERVISOR_REGISTRY.get(effective_task_id) + if _supervisor is not None: + _sv_snap = _supervisor.snapshot() + if _sv_snap.active: + response.update(_sv_snap.to_dict()) + except Exception as _sv_exc: + logger.debug("supervisor snapshot merge failed: %s", _sv_exc) + return json.dumps(response, ensure_ascii=False) else: return json.dumps({ @@ -2248,7 +2360,11 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: """ if task_id is None: task_id = "default" - + + # Stop the CDP supervisor for this task FIRST so we close our WebSocket + # before the backend tears down the underlying CDP endpoint. + _stop_cdp_supervisor(task_id) + # Also clean up Camofox session if running in Camofox mode. # Skip full close when managed persistence is enabled — the browser # profile (and its session cookies) must survive across agent tasks. @@ -2329,6 +2445,13 @@ def cleanup_all_browsers() -> None: for task_id in task_ids: cleanup_browser(task_id) + # Tear down CDP supervisors for all tasks so background threads exit. + try: + from tools.browser_supervisor import SUPERVISOR_REGISTRY # type: ignore[import-not-found] + SUPERVISOR_REGISTRY.stop_all() + except Exception: + pass + # Reset cached lookups so they are re-evaluated on next use. global _cached_agent_browser, _agent_browser_resolved global _cached_command_timeout, _command_timeout_resolved diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index b6aacf54e..88a28611e 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -217,6 +217,8 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]: result["script"] = job["script"] if job.get("enabled_toolsets"): result["enabled_toolsets"] = job["enabled_toolsets"] + if job.get("workdir"): + result["workdir"] = job["workdir"] return result @@ -237,6 +239,7 @@ def cronjob( reason: Optional[str] = None, script: Optional[str] = None, enabled_toolsets: Optional[List[str]] = None, + workdir: Optional[str] = None, task_id: str = None, ) -> str: """Unified cron job management tool.""" @@ -275,6 +278,7 @@ def cronjob( base_url=_normalize_optional_job_value(base_url, strip_trailing_slash=True), script=_normalize_optional_job_value(script), enabled_toolsets=enabled_toolsets or None, + workdir=_normalize_optional_job_value(workdir), ) return json.dumps( { @@ -366,6 +370,10 @@ def cronjob( updates["script"] = _normalize_optional_job_value(script) if script else None if enabled_toolsets is not None: updates["enabled_toolsets"] = enabled_toolsets or None + if workdir is not None: + # Empty string clears the field (restores old behaviour); + # otherwise pass raw — update_job() validates / normalizes. + updates["workdir"] = _normalize_optional_job_value(workdir) or None if repeat is not None: # Normalize: treat 0 or negative as None (infinite) normalized_repeat = None if repeat <= 0 else repeat @@ -470,6 +478,10 @@ Important safety rule: cron-run sessions should not recursively schedule more cr "items": {"type": "string"}, "description": "Optional list of toolset names to restrict the job's agent to (e.g. [\"web\", \"terminal\", \"file\", \"delegation\"]). When set, only tools from these toolsets are loaded, significantly reducing input token overhead. When omitted, all default tools are loaded. Infer from the job's prompt — e.g. use \"web\" if it calls web_search, \"terminal\" if it runs scripts, \"file\" if it reads files, \"delegation\" if it calls delegate_task. On update, pass an empty array to clear." }, + "workdir": { + "type": "string", + "description": "Optional absolute path to run the job from. When set, AGENTS.md / CLAUDE.md / .cursorrules from that directory are injected into the system prompt, and the terminal/file/code_exec tools use it as their working directory — useful for running a job inside a specific project repo. Must be an absolute path that exists. When unset (default), preserves the original behaviour: no project context files, tools use the scheduler's cwd. On update, pass an empty string to clear. Jobs with workdir run sequentially (not parallel) to keep per-job directories isolated." + }, }, "required": ["action"] } @@ -515,6 +527,7 @@ registry.register( reason=args.get("reason"), script=args.get("script"), enabled_toolsets=args.get("enabled_toolsets"), + workdir=args.get("workdir"), task_id=kw.get("task_id"), ))(), check_fn=check_cronjob_requirements, diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 2c35c7c7e..2bbf354cf 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -298,7 +298,7 @@ def _get_child_timeout() -> float: """Read delegation.child_timeout_seconds from config. Returns the number of seconds a single child agent is allowed to run - before being considered stuck. Default: 300 s (5 minutes). + before being considered stuck. Default: 600 s (10 minutes). """ cfg = _load_config() val = cfg.get("child_timeout_seconds") @@ -409,11 +409,17 @@ def _preserve_parent_mcp_toolsets( DEFAULT_MAX_ITERATIONS = 50 -DEFAULT_CHILD_TIMEOUT = 300 # seconds before a child agent is considered stuck +DEFAULT_CHILD_TIMEOUT = 600 # seconds before a child agent is considered stuck _HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation -_HEARTBEAT_STALE_CYCLES = ( - 5 # mark child stale after this many heartbeats with no iteration progress -) +# Stale-heartbeat thresholds. A child with no API-call progress is either: +# - idle between turns (no current_tool) — probably stuck on a slow API call +# - inside a tool (current_tool set) — probably running a legitimately long +# operation (terminal command, web fetch, large file read) +# The idle ceiling stays tight so genuinely stuck children don't mask the gateway +# timeout. The in-tool ceiling is much higher so legit long-running tools get +# time to finish; child_timeout_seconds (default 600s) is still the hard cap. +_HEARTBEAT_STALE_CYCLES_IDLE = 5 # 5 * 30s = 150s idle between turns → stale +_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20 # 20 * 30s = 600s stuck on same tool → stale DEFAULT_TOOLSETS = ["terminal", "file", "web"] @@ -1016,6 +1022,150 @@ def _build_child_agent( return child +def _dump_subagent_timeout_diagnostic( + *, + child: Any, + task_index: int, + timeout_seconds: float, + duration_seconds: float, + worker_thread: Optional[threading.Thread], + goal: str, +) -> Optional[str]: + """Write a structured diagnostic dump for a subagent that timed out + before making any API call. + + See issue #14726: users hit "subagent timed out after 300s with no response" + with zero API calls and no way to inspect what happened. This helper + writes a dedicated log under ``~/.hermes/logs/subagent--.log`` + capturing the child's config, system-prompt / tool-schema sizes, activity + tracker snapshot, and the worker thread's Python stack at timeout. + + Returns the absolute path to the diagnostic file, or None on failure. + """ + try: + from hermes_constants import get_hermes_home + import datetime as _dt + import sys as _sys + import traceback as _traceback + + hermes_home = get_hermes_home() + logs_dir = hermes_home / "logs" + try: + logs_dir.mkdir(parents=True, exist_ok=True) + except Exception: + return None + + subagent_id = getattr(child, "_subagent_id", None) or f"idx{task_index}" + ts = _dt.datetime.now().strftime("%Y%m%d_%H%M%S") + dump_path = logs_dir / f"subagent-timeout-{subagent_id}-{ts}.log" + + lines: List[str] = [] + def _w(line: str = "") -> None: + lines.append(line) + + _w(f"# Subagent timeout diagnostic — issue #14726") + _w(f"# Generated: {_dt.datetime.now().isoformat()}") + _w("") + _w("## Timeout") + _w(f" task_index: {task_index}") + _w(f" subagent_id: {subagent_id}") + _w(f" configured_timeout: {timeout_seconds}s") + _w(f" actual_duration: {duration_seconds:.2f}s") + _w("") + + _w("## Goal") + _goal_preview = (goal or "").strip() + if len(_goal_preview) > 1000: + _goal_preview = _goal_preview[:1000] + " ...[truncated]" + _w(_goal_preview or "(empty)") + _w("") + + _w("## Child config") + for attr in ( + "model", "provider", "api_mode", "base_url", "max_iterations", + "quiet_mode", "skip_memory", "skip_context_files", "platform", + "_delegate_role", "_delegate_depth", + ): + try: + val = getattr(child, attr, None) + # Redact api_key-shaped values defensively + if isinstance(val, str) and attr == "base_url": + pass + _w(f" {attr}: {val!r}") + except Exception: + _w(f" {attr}: ") + _w("") + + _w("## Toolsets") + enabled = getattr(child, "enabled_toolsets", None) + _w(f" enabled_toolsets: {enabled!r}") + tool_names = getattr(child, "valid_tool_names", None) + if tool_names: + _w(f" loaded tool count: {len(tool_names)}") + try: + _w(f" loaded tools: {sorted(list(tool_names))}") + except Exception: + pass + _w("") + + _w("## Prompt / schema sizes") + try: + sys_prompt = getattr(child, "ephemeral_system_prompt", None) \ + or getattr(child, "system_prompt", None) \ + or "" + _w(f" system_prompt_bytes: {len(sys_prompt.encode('utf-8')) if isinstance(sys_prompt, str) else 'n/a'}") + _w(f" system_prompt_chars: {len(sys_prompt) if isinstance(sys_prompt, str) else 'n/a'}") + except Exception as exc: + _w(f" system_prompt: ") + try: + tools_schema = getattr(child, "tools", None) + if tools_schema is not None: + _schema_json = json.dumps(tools_schema, default=str) + _w(f" tool_schema_count: {len(tools_schema)}") + _w(f" tool_schema_bytes: {len(_schema_json.encode('utf-8'))}") + except Exception as exc: + _w(f" tool_schema: ") + _w("") + + _w("## Activity summary") + try: + summary = child.get_activity_summary() + for k, v in summary.items(): + _w(f" {k}: {v!r}") + except Exception as exc: + _w(f" ") + _w("") + + _w("## Worker thread stack at timeout") + if worker_thread is not None and worker_thread.is_alive(): + frames = _sys._current_frames() + worker_frame = frames.get(worker_thread.ident) + if worker_frame is not None: + stack = _traceback.format_stack(worker_frame) + for frame_line in stack: + for sub in frame_line.rstrip().split("\n"): + _w(f" {sub}") + else: + _w(" ") + elif worker_thread is None: + _w(" ") + else: + _w(" ") + _w("") + + _w("## Notes") + _w(" This file is written ONLY when a subagent times out with 0 API calls.") + _w(" 0-API-call timeouts mean the child never reached its first LLM request.") + _w(" Common causes: oversized prompt rejected by provider, transport hang,") + _w(" credential resolution stuck. See issue #14726 for context.") + + dump_path.write_text("\n".join(lines), encoding="utf-8") + return str(dump_path) + except Exception as exc: + logger.warning("Subagent timeout diagnostic dump failed: %s", exc) + return None + + def _run_single_child( task_index: int, goal: str, @@ -1057,7 +1207,11 @@ def _run_single_child( # Without this, the parent's _last_activity_ts freezes when delegate_task # starts and the gateway eventually kills the agent for "no activity". _heartbeat_stop = threading.Event() - _last_seen_iter = [0] # mutable container for heartbeat stale detection + # Stale detection: track the child's (tool, iteration) pair across + # heartbeat cycles. If neither advances, count the cycle as stale. + # Different thresholds for idle vs in-tool (see _HEARTBEAT_STALE_CYCLES_*). + _last_seen_iter = [0] + _last_seen_tool = [None] # type: list _stale_count = [0] def _heartbeat_loop(): @@ -1075,22 +1229,38 @@ def _run_single_child( child_iter = child_summary.get("api_call_count", 0) child_max = child_summary.get("max_iterations", 0) - # Stale detection: if iteration count hasn't advanced, - # increment stale counter. After N cycles with no - # progress, stop masking the hang so the gateway - # inactivity timeout can fire as a last resort. - if child_iter <= _last_seen_iter[0]: - _stale_count[0] += 1 - else: + # Stale detection: count cycles where neither the iteration + # count nor the current_tool advances. A child running a + # legitimately long-running tool (terminal command, web + # fetch) keeps current_tool set but doesn't advance + # api_call_count — we don't want that to look stale at the + # idle threshold. + iter_advanced = child_iter > _last_seen_iter[0] + tool_changed = child_tool != _last_seen_tool[0] + if iter_advanced or tool_changed: _last_seen_iter[0] = child_iter + _last_seen_tool[0] = child_tool _stale_count[0] = 0 + else: + _stale_count[0] += 1 - if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES: + # Pick threshold based on whether the child is currently + # inside a tool call. In-tool threshold is high enough to + # cover legitimately slow tools; idle threshold stays + # tight so the gateway timeout can fire on a truly wedged + # child. + stale_limit = ( + _HEARTBEAT_STALE_CYCLES_IN_TOOL + if child_tool + else _HEARTBEAT_STALE_CYCLES_IDLE + ) + if _stale_count[0] >= stale_limit: logger.warning( - "Subagent %d appears stale (no iteration progress " - "for %d heartbeat cycles) — stopping heartbeat", + "Subagent %d appears stale (no progress for %d " + "heartbeat cycles, tool=%s) — stopping heartbeat", task_index, _stale_count[0], + child_tool or "", ) break # stop touching parent, let gateway timeout fire @@ -1168,11 +1338,18 @@ def _run_single_child( # when the child's API call or tool-level HTTP request hangs. child_timeout = _get_child_timeout() _timeout_executor = ThreadPoolExecutor(max_workers=1) - _child_future = _timeout_executor.submit( - child.run_conversation, - user_message=goal, - task_id=child_task_id, - ) + # Capture the worker thread so the timeout diagnostic can dump its + # Python stack (see #14726 — 0-API-call hangs are opaque without it). + _worker_thread_holder: Dict[str, Optional[threading.Thread]] = {"t": None} + + def _run_with_thread_capture(): + _worker_thread_holder["t"] = threading.current_thread() + return child.run_conversation( + user_message=goal, + task_id=child_task_id, + ) + + _child_future = _timeout_executor.submit(_run_with_thread_capture) try: result = _child_future.result(timeout=child_timeout) except Exception as _timeout_exc: @@ -1194,6 +1371,32 @@ def _run_single_child( duration, ) + # When a subagent times out BEFORE making any API call, dump a + # diagnostic to help users (and us) see what the child was doing. + # See #14726 — without this, 0-API-call hangs are black boxes. + diagnostic_path: Optional[str] = None + child_api_calls = 0 + try: + _summary = child.get_activity_summary() + child_api_calls = int(_summary.get("api_call_count", 0) or 0) + except Exception: + pass + if is_timeout and child_api_calls == 0: + diagnostic_path = _dump_subagent_timeout_diagnostic( + child=child, + task_index=task_index, + timeout_seconds=float(child_timeout), + duration_seconds=float(duration), + worker_thread=_worker_thread_holder.get("t"), + goal=goal, + ) + if diagnostic_path: + logger.warning( + "Subagent %d 0-API-call timeout — diagnostic written to %s", + task_index, + diagnostic_path, + ) + if child_progress_cb: try: child_progress_cb( @@ -1210,22 +1413,35 @@ def _run_single_child( except Exception: pass + if is_timeout: + if child_api_calls == 0: + _err = ( + f"Subagent timed out after {child_timeout}s without " + f"making any API call — the child never reached its " + f"first LLM request (prompt construction, credential " + f"resolution, or transport may be stuck)." + ) + if diagnostic_path: + _err += f" Diagnostic: {diagnostic_path}" + else: + _err = ( + f"Subagent timed out after {child_timeout}s with " + f"{child_api_calls} API call(s) completed — likely " + f"stuck on a slow API call or unresponsive network request." + ) + else: + _err = str(_timeout_exc) + return { "task_index": task_index, "status": "timeout" if is_timeout else "error", "summary": None, - "error": ( - ( - f"Subagent timed out after {child_timeout}s with no response. " - "The child may be stuck on a slow API call or unresponsive network request." - ) - if is_timeout - else str(_timeout_exc) - ), + "error": _err, "exit_reason": "timeout" if is_timeout else "error", - "api_calls": 0, + "api_calls": child_api_calls, "duration_seconds": duration, "_child_role": getattr(child, "_delegate_role", None), + "diagnostic_path": diagnostic_path, } finally: # Shut down executor without waiting — if the child thread diff --git a/tools/environments/base.py b/tools/environments/base.py index d89b66f19..4510b1749 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -368,6 +368,17 @@ class BaseEnvironment(ABC): # Command wrapping # ------------------------------------------------------------------ + @staticmethod + def _quote_cwd_for_cd(cwd: str) -> str: + """Quote a ``cd`` target while preserving ``~`` expansion.""" + if cwd == "~": + return cwd + if cwd == "~/": + return "$HOME" + if cwd.startswith("~/"): + return f"$HOME/{shlex.quote(cwd[2:])}" + return shlex.quote(cwd) + def _wrap_command(self, command: str, cwd: str) -> str: """Build the full bash script that sources snapshot, cd's, runs command, re-dumps env vars, and emits CWD markers.""" @@ -379,10 +390,9 @@ class BaseEnvironment(ABC): if self._snapshot_ready: parts.append(f"source {self._snapshot_path} 2>/dev/null || true") - # cd to working directory — let bash expand ~ natively - quoted_cwd = ( - shlex.quote(cwd) if cwd != "~" and not cwd.startswith("~/") else cwd - ) + # Preserve bare ``~`` expansion, but rewrite ``~/...`` through + # ``$HOME`` so suffixes with spaces remain a single shell word. + quoted_cwd = self._quote_cwd_for_cd(cwd) parts.append(f"builtin cd {quoted_cwd} || exit 126") # Run the actual command diff --git a/tools/file_operations.py b/tools/file_operations.py index 7e75578b2..9e0b44c14 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -292,10 +292,15 @@ def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET, Tool schemas declare minimum/maximum values, but not every caller or provider enforces schemas before dispatch. Clamp here so invalid values cannot leak into sed ranges like ``0,-1p``. + + The upper bound on ``limit`` comes from ``tool_output.max_lines`` in + config.yaml (defaults to the module-level ``MAX_LINES`` constant). """ + from tools.tool_output_limits import get_max_lines + max_lines = get_max_lines() normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET)) normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT) - normalized_limit = max(1, min(normalized_limit, MAX_LINES)) + normalized_limit = max(1, min(normalized_limit, max_lines)) return normalized_offset, normalized_limit @@ -414,12 +419,14 @@ class ShellFileOperations(FileOperations): def _add_line_numbers(self, content: str, start_line: int = 1) -> str: """Add line numbers to content in LINE_NUM|CONTENT format.""" + from tools.tool_output_limits import get_max_line_length + max_line_length = get_max_line_length() lines = content.split('\n') numbered = [] for i, line in enumerate(lines, start=start_line): # Truncate long lines - if len(line) > MAX_LINE_LENGTH: - line = line[:MAX_LINE_LENGTH] + "... [truncated]" + if len(line) > max_line_length: + line = line[:max_line_length] + "... [truncated]" numbered.append(f"{i:6d}|{line}") return '\n'.join(numbered) diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py index 7910c3cdc..fd655bf3d 100644 --- a/tools/mcp_oauth.py +++ b/tools/mcp_oauth.py @@ -233,7 +233,7 @@ class HermesTokenStorage: return None async def set_tokens(self, tokens: "OAuthToken") -> None: - payload = tokens.model_dump(exclude_none=True) + payload = tokens.model_dump(mode="json", exclude_none=True) # Persist an absolute ``expires_at`` so a process restart can # reconstruct the correct remaining TTL. Without this the MCP SDK's # ``_initialize`` reloads a relative ``expires_in`` which has no @@ -265,7 +265,7 @@ class HermesTokenStorage: return None async def set_client_info(self, client_info: "OAuthClientInformationFull") -> None: - _write_json(self._client_info_path(), client_info.model_dump(exclude_none=True)) + _write_json(self._client_info_path(), client_info.model_dump(mode="json", exclude_none=True)) logger.debug("OAuth client info saved for %s", self._server_name) # -- cleanup ----------------------------------------------------------- @@ -365,8 +365,15 @@ async def _wait_for_callback() -> tuple[str, str | None]: Raises: OAuthNonInteractiveError: If the callback times out (no user present to complete the browser auth). + RuntimeError: If ``_oauth_port`` has not been set, which would indicate + that ``build_oauth_auth`` was skipped — the asserting form below + was a silent bug when running Python with ``-O``/``-OO``. """ - assert _oauth_port is not None, "OAuth callback port not set" + if _oauth_port is None: + raise RuntimeError( + "OAuth callback port not set — build_oauth_auth must be called " + "before _wait_for_oauth_callback" + ) # The callback server is already running (started in build_oauth_auth). # We just need to poll for the result. @@ -508,7 +515,7 @@ def _maybe_preregister_client( info_dict["scope"] = cfg["scope"] client_info = OAuthClientInformationFull.model_validate(info_dict) - _write_json(storage._client_info_path(), client_info.model_dump(exclude_none=True)) + _write_json(storage._client_info_path(), client_info.model_dump(mode="json", exclude_none=True)) logger.debug("Pre-registered client_id=%s for '%s'", client_id, storage._server_name) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index efef5ea91..565dbfca0 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -78,12 +78,86 @@ import math import os import re import shutil +import sys import threading import time +from datetime import datetime from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Stdio subprocess stderr redirection +# --------------------------------------------------------------------------- +# +# The MCP SDK's ``stdio_client(server, errlog=sys.stderr)`` defaults the +# subprocess stderr stream to the parent process's real stderr, i.e. the +# user's TTY. That means any MCP server we spawn at startup (FastMCP +# banners, slack-mcp-server JSON startup logs, etc.) writes directly onto +# the terminal while prompt_toolkit / Rich is rendering the TUI — which +# corrupts the display and can hang the session. +# +# Instead we redirect every stdio MCP subprocess's stderr into a shared +# per-profile log file (~/.hermes/logs/mcp-stderr.log), tagged with the +# server name so individual servers remain debuggable. +# +# Fallback is os.devnull if opening the log file fails for any reason. + +_mcp_stderr_log_fh: Optional[Any] = None +_mcp_stderr_log_lock = threading.Lock() + + +def _get_mcp_stderr_log() -> Any: + """Return a shared append-mode file handle for MCP subprocess stderr. + + Opened once per process and reused for every stdio server. Must have a + real OS-level file descriptor (``fileno()``) because asyncio's subprocess + machinery wires the child's stderr directly to that fd. Falls back to + ``/dev/null`` if opening the log file fails. + """ + global _mcp_stderr_log_fh + with _mcp_stderr_log_lock: + if _mcp_stderr_log_fh is not None: + return _mcp_stderr_log_fh + try: + from hermes_constants import get_hermes_home + log_dir = get_hermes_home() / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / "mcp-stderr.log" + # Line-buffered so server output lands on disk promptly; errors= + # "replace" tolerates garbled binary output from misbehaving + # servers. + fh = open(log_path, "a", encoding="utf-8", errors="replace", buffering=1) + # Sanity-check: confirm a real fd is available before we commit. + fh.fileno() + _mcp_stderr_log_fh = fh + except Exception as exc: # pragma: no cover — best-effort fallback + logger.debug("Failed to open MCP stderr log, using devnull: %s", exc) + try: + _mcp_stderr_log_fh = open(os.devnull, "w", encoding="utf-8") + except Exception: + # Last resort: the real stderr. Not ideal for TUI users but + # it matches pre-fix behavior. + _mcp_stderr_log_fh = sys.stderr + return _mcp_stderr_log_fh + + +def _write_stderr_log_header(server_name: str) -> None: + """Write a human-readable session marker before launching a server. + + Gives operators a way to find each server's output in the shared + ``mcp-stderr.log`` file without needing per-line prefixes (which would + require a pipe + reader thread and complicate shutdown). + """ + fh = _get_mcp_stderr_log() + try: + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + fh.write(f"\n===== [{ts}] starting MCP server '{server_name}' =====\n") + fh.flush() + except Exception: + pass + # --------------------------------------------------------------------------- # Graceful import -- MCP SDK is an optional dependency # --------------------------------------------------------------------------- @@ -93,6 +167,10 @@ _MCP_HTTP_AVAILABLE = False _MCP_SAMPLING_TYPES = False _MCP_NOTIFICATION_TYPES = False _MCP_MESSAGE_HANDLER_SUPPORTED = False +# Conservative fallback for SDK builds that don't export LATEST_PROTOCOL_VERSION. +# Streamable HTTP was introduced by 2025-03-26, so this remains valid for the +# HTTP transport path even on older-but-supported SDK versions. +LATEST_PROTOCOL_VERSION = "2025-03-26" try: from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client @@ -109,6 +187,10 @@ try: _MCP_NEW_HTTP = True except ImportError: _MCP_NEW_HTTP = False + try: + from mcp.types import LATEST_PROTOCOL_VERSION + except ImportError: + logger.debug("mcp.types.LATEST_PROTOCOL_VERSION not available -- using fallback protocol version") # Sampling types -- separated so older SDK versions don't break MCP support try: from mcp.types import ( @@ -962,7 +1044,13 @@ class MCPServerTask: # Snapshot child PIDs before spawning so we can track the new one. pids_before = _snapshot_child_pids() - async with stdio_client(server_params) as (read_stream, write_stream): + # Redirect subprocess stderr into a shared log file so MCP servers + # (FastMCP banners, slack-mcp startup JSON, etc.) don't dump onto + # the user's TTY and corrupt the TUI. Preserves debuggability via + # ~/.hermes/logs/mcp-stderr.log. + _write_stderr_log_header(self.name) + _errlog = _get_mcp_stderr_log() + async with stdio_client(server_params, errlog=_errlog) as (read_stream, write_stream): # Capture the newly spawned subprocess PID for force-kill cleanup. new_pids = _snapshot_child_pids() - pids_before if new_pids: @@ -995,6 +1083,12 @@ class MCPServerTask: url = config["url"] headers = dict(config.get("headers") or {}) + # Some MCP servers require MCP-Protocol-Version on the initial + # initialize request and reject session-less POSTs otherwise. + # Seed it as a client-level default, but treat user overrides as + # case-insensitive so conventional casing is preserved. + if not any(key.lower() == "mcp-protocol-version" for key in headers): + headers["mcp-protocol-version"] = LATEST_PROTOCOL_VERSION connect_timeout = config.get("connect_timeout", _DEFAULT_CONNECT_TIMEOUT) ssl_verify = config.get("ssl_verify", True) @@ -1024,10 +1118,23 @@ class MCPServerTask: # matching the SDK's own create_mcp_http_client defaults. import httpx + _original_url = httpx.URL(url) + + async def _strip_auth_on_cross_origin_redirect(response): + """Strip Authorization headers when redirected to a different origin.""" + if response.is_redirect and response.next_request: + target = response.next_request.url + if (target.scheme, target.host, target.port) != ( + _original_url.scheme, _original_url.host, _original_url.port, + ): + response.next_request.headers.pop("authorization", None) + response.next_request.headers.pop("Authorization", None) + client_kwargs: dict = { "follow_redirects": True, "timeout": httpx.Timeout(float(connect_timeout), read=300.0), "verify": ssl_verify, + "event_hooks": {"response": [_strip_auth_on_cross_origin_redirect]}, } if headers: client_kwargs["headers"] = headers @@ -1475,6 +1582,129 @@ def _handle_auth_error_and_retry( "server": server_name, }, ensure_ascii=False) + +# Substrings (lower-cased match) that indicate the MCP server rejected +# the request because its server-side transport session expired / +# was garbage-collected. The caller's OAuth token is still valid — +# only the transport-layer session state needs rebuilding. See #13383. +_SESSION_EXPIRED_MARKERS: tuple = ( + "invalid or expired session", + "expired session", + "session expired", + "session not found", + "unknown session", +) + + +def _is_session_expired_error(exc: BaseException) -> bool: + """Return True if ``exc`` looks like an MCP transport session expiry. + + Streamable HTTP MCP servers may garbage-collect server-side session + state while the OAuth token remains valid — idle TTL, server + restart, horizontal-scaling pod rotation, etc. The SDK surfaces + this as a JSON-RPC error whose message contains phrases like + ``"Invalid or expired session"``. This class of failure is + distinct from :func:`_is_auth_error`: re-running the OAuth refresh + flow would be pointless because the access token is fine. What's + needed is a transport reconnect — tear down and rebuild the + ``streamablehttp_client`` + ``ClientSession`` pair, which is + exactly what ``MCPServerTask._reconnect_event`` triggers. + """ + if isinstance(exc, InterruptedError): + return False + # Exception messages vary across SDK versions + server + # implementations, so match on a small allow-list of stable + # substrings rather than exception type. Kept narrow to avoid + # false positives on unrelated server errors. + msg = str(exc).lower() + if not msg: + return False + return any(marker in msg for marker in _SESSION_EXPIRED_MARKERS) + + +def _handle_session_expired_and_retry( + server_name: str, + exc: BaseException, + retry_call, + op_description: str, +): + """Trigger a transport reconnect and retry once on session expiry. + + Unlike :func:`_handle_auth_error_and_retry`, this does **not** call + the OAuth manager's ``handle_401`` — the access token is still + valid, only the server-side session state is stale. Setting + ``_reconnect_event`` causes the server task's lifecycle loop to + tear down the current ``streamablehttp_client`` + ``ClientSession`` + and rebuild them, reusing the existing OAuth provider instance. + See #13383. + + Args: + server_name: Name of the MCP server that raised. + exc: The exception from the failed call. + retry_call: Zero-arg callable that re-runs the operation, + returning the same JSON string format as the handler. + op_description: Human-readable name of the operation (logs). + + Returns: + A JSON string if reconnect + retry was attempted and produced + a response, or ``None`` to fall through to the caller's + generic error path (not a session-expired error, no server + record, reconnect didn't ready in time, or retry also failed). + """ + if not _is_session_expired_error(exc): + return None + + with _lock: + srv = _servers.get(server_name) + if srv is None or not hasattr(srv, "_reconnect_event"): + return None + + loop = _mcp_loop + if loop is None or not loop.is_running(): + return None + + logger.info( + "MCP server '%s': %s failed with session-expired error (%s); " + "signalling transport reconnect and retrying once.", + server_name, op_description, exc, + ) + + # Trigger the same reconnect mechanism the OAuth recovery path + # uses, then wait briefly for the new session to come back ready. + loop.call_soon_threadsafe(srv._reconnect_event.set) + deadline = time.monotonic() + 15 + ready = False + while time.monotonic() < deadline: + if srv.session is not None and srv._ready.is_set(): + ready = True + break + time.sleep(0.25) + if not ready: + logger.warning( + "MCP server '%s': reconnect did not ready within 15s after " + "session-expired error; falling through to error response.", + server_name, + ) + return None + + try: + result = retry_call() + try: + parsed = json.loads(result) + if "error" not in parsed: + _server_error_counts[server_name] = 0 + return result + except (json.JSONDecodeError, TypeError): + _server_error_counts[server_name] = 0 + return result + except Exception as retry_exc: + logger.warning( + "MCP %s/%s retry after session reconnect failed: %s", + server_name, op_description, retry_exc, + ) + return None + + # Dedicated event loop running in a background daemon thread. _mcp_loop: Optional[asyncio.AbstractEventLoop] = None _mcp_thread: Optional[threading.Thread] = None @@ -1761,6 +1991,16 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): if recovered is not None: return recovered + # Transport session expiry (#13383): same reconnect flow + # but skips OAuth recovery because the access token is + # still valid — only the server-side session is stale. + recovered = _handle_session_expired_and_retry( + server_name, exc, _call_once, + f"tools/call {tool_name}", + ) + if recovered is not None: + return recovered + _bump_server_error(server_name) logger.error( "MCP tool %s/%s call failed: %s", @@ -1813,6 +2053,11 @@ def _make_list_resources_handler(server_name: str, tool_timeout: float): recovered = _handle_auth_error_and_retry( server_name, exc, _call_once, "resources/list", ) + if recovered is not None: + return recovered + recovered = _handle_session_expired_and_retry( + server_name, exc, _call_once, "resources/list", + ) if recovered is not None: return recovered logger.error( @@ -1867,6 +2112,11 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float): recovered = _handle_auth_error_and_retry( server_name, exc, _call_once, "resources/read", ) + if recovered is not None: + return recovered + recovered = _handle_session_expired_and_retry( + server_name, exc, _call_once, "resources/read", + ) if recovered is not None: return recovered logger.error( @@ -1924,6 +2174,11 @@ def _make_list_prompts_handler(server_name: str, tool_timeout: float): recovered = _handle_auth_error_and_retry( server_name, exc, _call_once, "prompts/list", ) + if recovered is not None: + return recovered + recovered = _handle_session_expired_and_retry( + server_name, exc, _call_once, "prompts/list", + ) if recovered is not None: return recovered logger.error( @@ -1989,6 +2244,11 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float): recovered = _handle_auth_error_and_retry( server_name, exc, _call_once, "prompts/get", ) + if recovered is not None: + return recovered + recovered = _handle_session_expired_and_retry( + server_name, exc, _call_once, "prompts/get", + ) if recovered is not None: return recovered logger.error( @@ -2019,14 +2279,92 @@ def _make_check_fn(server_name: str): # --------------------------------------------------------------------------- def _normalize_mcp_input_schema(schema: dict | None) -> dict: - """Normalize MCP input schemas for LLM tool-calling compatibility.""" + """Normalize MCP input schemas for LLM tool-calling compatibility. + + MCP servers can emit plain JSON Schema with ``definitions`` / + ``#/definitions/...`` references. Kimi / Moonshot rejects that form and + requires local refs to point into ``#/$defs/...`` instead. Normalize the + common draft-07 shape here so MCP tool schemas remain portable across + OpenAI-compatible providers. + + Additional MCP-server robustness repairs applied recursively: + + * Missing or ``null`` ``type`` on an object-shaped node is coerced to + ``"object"`` (some servers omit it). See PR #4897. + * When an ``object`` node lacks ``properties``, an empty ``properties`` + dict is added so ``required`` entries don't dangle. + * ``required`` arrays are pruned to only names that exist in + ``properties``; otherwise Google AI Studio / Gemini 400s with + ``property is not defined``. See PR #4651. + + All repairs are provider-agnostic and ideally produce a schema valid on + OpenAI, Anthropic, Gemini, and Moonshot in one pass. + """ if not schema: return {"type": "object", "properties": {}} - if schema.get("type") == "object" and "properties" not in schema: - return {**schema, "properties": {}} + def _rewrite_local_refs(node): + if isinstance(node, dict): + normalized = {} + for key, value in node.items(): + out_key = "$defs" if key == "definitions" else key + normalized[out_key] = _rewrite_local_refs(value) + ref = normalized.get("$ref") + if isinstance(ref, str) and ref.startswith("#/definitions/"): + normalized["$ref"] = "#/$defs/" + ref[len("#/definitions/"):] + return normalized + if isinstance(node, list): + return [_rewrite_local_refs(item) for item in node] + return node - return schema + def _repair_object_shape(node): + """Recursively repair object-shaped nodes: fill type, prune required.""" + if isinstance(node, list): + return [_repair_object_shape(item) for item in node] + if not isinstance(node, dict): + return node + + repaired = {k: _repair_object_shape(v) for k, v in node.items()} + + # Coerce missing / null type when the shape is clearly an object + # (has properties or required but no type). + if not repaired.get("type") and ( + "properties" in repaired or "required" in repaired + ): + repaired["type"] = "object" + + if repaired.get("type") == "object": + # Ensure properties exists so required can reference it safely + if "properties" not in repaired or not isinstance( + repaired.get("properties"), dict + ): + repaired["properties"] = {} if "properties" not in repaired else repaired["properties"] + if not isinstance(repaired.get("properties"), dict): + repaired["properties"] = {} + + # Prune required to only include names that exist in properties + required = repaired.get("required") + if isinstance(required, list): + props = repaired.get("properties") or {} + valid = [r for r in required if isinstance(r, str) and r in props] + if len(valid) != len(required): + if valid: + repaired["required"] = valid + else: + repaired.pop("required", None) + + return repaired + + normalized = _rewrite_local_refs(schema) + normalized = _repair_object_shape(normalized) + + # Ensure top-level is a well-formed object schema + if not isinstance(normalized, dict): + return {"type": "object", "properties": {}} + if normalized.get("type") == "object" and "properties" not in normalized: + normalized = {**normalized, "properties": {}} + + return normalized def sanitize_mcp_name_component(value: str) -> str: @@ -2057,7 +2395,7 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict: return { "name": prefixed_name, "description": mcp_tool.description or f"MCP tool {mcp_tool.name} from {server_name}", - "parameters": _normalize_mcp_input_schema(mcp_tool.inputSchema), + "parameters": _normalize_mcp_input_schema(getattr(mcp_tool, "inputSchema", None)), } @@ -2122,6 +2460,8 @@ def _build_utility_schemas(server_name: str) -> List[dict]: "arguments": { "type": "object", "description": "Optional arguments to pass to the prompt", + "properties": {}, + "additionalProperties": True, }, }, "required": ["name"], @@ -2635,6 +2975,11 @@ def _kill_orphaned_mcp_children() -> None: pids = dict(_stdio_pids) _stdio_pids.clear() + # Fast path: no tracked stdio PIDs to reap. Skip the SIGTERM/sleep/SIGKILL + # dance entirely — otherwise every MCP-free shutdown pays a 2s sleep tax. + if not pids: + return + # Phase 1: SIGTERM (graceful) for pid, server_name in pids.items(): try: diff --git a/tools/schema_sanitizer.py b/tools/schema_sanitizer.py new file mode 100644 index 000000000..67648c204 --- /dev/null +++ b/tools/schema_sanitizer.py @@ -0,0 +1,186 @@ +"""Sanitize tool JSON schemas for broad LLM-backend compatibility. + +Some local inference backends (notably llama.cpp's ``json-schema-to-grammar`` +converter used to build GBNF tool-call parsers) are strict about what JSON +Schema shapes they accept. Schemas that OpenAI / Anthropic / most cloud +providers silently accept can make llama.cpp fail the entire request with: + + HTTP 400: Unable to generate parser for this template. + Automatic parser generation failed: JSON schema conversion failed: + Unrecognized schema: "object" + +The failure modes we've seen in the wild: + +* ``{"type": "object"}`` with no ``properties`` — rejected as a node the + grammar generator can't constrain. +* A schema value that is the bare string ``"object"`` instead of a dict + (malformed MCP server output, e.g. ``additionalProperties: "object"``). +* ``"type": ["string", "null"]`` array types — many converters only accept + single-string ``type``. +* Unconstrained ``additionalProperties`` on objects with empty properties. + +This module walks the final tool schema tree (after MCP-level normalization +and any per-tool dynamic rebuilds) and fixes the known-hostile constructs +in-place on a deep copy. It is intentionally conservative: it only modifies +shapes the LLM backend couldn't use anyway. +""" + +from __future__ import annotations + +import copy +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +def sanitize_tool_schemas(tools: list[dict]) -> list[dict]: + """Return a copy of ``tools`` with each tool's parameter schema sanitized. + + Input is an OpenAI-format tool list: + ``[{"type": "function", "function": {"name": ..., "parameters": {...}}}]`` + + The returned list is a deep copy — callers can safely mutate it without + affecting the original registry entries. + """ + if not tools: + return tools + + sanitized: list[dict] = [] + for tool in tools: + sanitized.append(_sanitize_single_tool(tool)) + return sanitized + + +def _sanitize_single_tool(tool: dict) -> dict: + """Deep-copy and sanitize a single OpenAI-format tool entry.""" + out = copy.deepcopy(tool) + fn = out.get("function") if isinstance(out, dict) else None + if not isinstance(fn, dict): + return out + + params = fn.get("parameters") + # Missing / non-dict parameters → substitute the minimal valid shape. + if not isinstance(params, dict): + fn["parameters"] = {"type": "object", "properties": {}} + return out + + fn["parameters"] = _sanitize_node(params, path=fn.get("name", "")) + # After recursion, guarantee the top-level is an object with properties. + top = fn["parameters"] + if not isinstance(top, dict): + fn["parameters"] = {"type": "object", "properties": {}} + else: + if top.get("type") != "object": + top["type"] = "object" + if "properties" not in top or not isinstance(top.get("properties"), dict): + top["properties"] = {} + return out + + +def _sanitize_node(node: Any, path: str) -> Any: + """Recursively sanitize a JSON-Schema fragment. + + - Replaces bare-string schema values ("object", "string", ...) with + ``{"type": }`` so downstream consumers see a dict. + - Injects ``properties: {}`` into object-typed nodes missing it. + - Normalizes ``type: [X, "null"]`` arrays to single ``type: X`` (keeping + ``nullable: true`` as a hint). + - Recurses into ``properties``, ``items``, ``additionalProperties``, + ``anyOf``, ``oneOf``, ``allOf``, and ``$defs`` / ``definitions``. + """ + # Malformed: the schema position holds a bare string like "object". + if isinstance(node, str): + if node in {"object", "string", "number", "integer", "boolean", "array", "null"}: + logger.debug( + "schema_sanitizer[%s]: replacing bare-string schema %r " + "with {'type': %r}", + path, node, node, + ) + return {"type": node} if node != "object" else { + "type": "object", + "properties": {}, + } + # Any other stray string is not a schema — drop it by replacing with + # a permissive object schema rather than propagate something the + # backend will reject. + logger.debug( + "schema_sanitizer[%s]: replacing non-schema string %r " + "with empty object schema", path, node, + ) + return {"type": "object", "properties": {}} + + if isinstance(node, list): + return [_sanitize_node(item, f"{path}[{i}]") for i, item in enumerate(node)] + + if not isinstance(node, dict): + return node + + out: dict = {} + for key, value in node.items(): + # type: [X, "null"] → type: X (the backend's tool-call parser only + # accepts singular string types; nullable is lost but the call still + # succeeds, and the model can still pass null on its own.) + if key == "type" and isinstance(value, list): + non_null = [t for t in value if t != "null"] + if len(non_null) == 1 and isinstance(non_null[0], str): + out["type"] = non_null[0] + if "null" in value: + out.setdefault("nullable", True) + continue + # Fallback: pick the first string type, drop the rest. + first_str = next((t for t in value if isinstance(t, str) and t != "null"), None) + if first_str: + out["type"] = first_str + continue + # All-null or empty list → treat as object. + out["type"] = "object" + continue + + if key in {"properties", "$defs", "definitions"} and isinstance(value, dict): + out[key] = { + sub_k: _sanitize_node(sub_v, f"{path}.{key}.{sub_k}") + for sub_k, sub_v in value.items() + } + elif key in {"items", "additionalProperties"}: + if isinstance(value, bool): + # Keep bool ``additionalProperties`` as-is — it's a valid form + # and widely accepted. ``items: true/false`` is non-standard + # but we preserve rather than drop. + out[key] = value + else: + out[key] = _sanitize_node(value, f"{path}.{key}") + elif key in {"anyOf", "oneOf", "allOf"} and isinstance(value, list): + out[key] = [ + _sanitize_node(item, f"{path}.{key}[{i}]") + for i, item in enumerate(value) + ] + elif key in {"required", "enum", "examples"}: + # Schema "sibling" keywords whose values are NOT schemas: + # - ``required``: list of property-name strings + # - ``enum``: list of literal values (any JSON type) + # - ``examples``: list of example values (any JSON type) + # Recursing into these with _sanitize_node() would mis-interpret + # literal strings like "path" as bare-string schemas and replace + # them with {"type": "object"} dicts. Pass through unchanged. + out[key] = copy.deepcopy(value) if isinstance(value, (list, dict)) else value + else: + out[key] = _sanitize_node(value, f"{path}.{key}") if isinstance(value, (dict, list)) else value + + # Object nodes without properties: inject empty properties dict. + # llama.cpp's grammar generator can't constrain a free-form object. + if out.get("type") == "object" and not isinstance(out.get("properties"), dict): + out["properties"] = {} + + # Prune ``required`` entries that don't exist in properties (defense + # against malformed MCP schemas; also caught upstream for MCP tools, but + # built-in tools or plugin tools may not have been through that path). + if out.get("type") == "object" and isinstance(out.get("required"), list): + props = out.get("properties") or {} + valid = [r for r in out["required"] if isinstance(r, str) and r in props] + if not valid: + out.pop("required", None) + elif len(valid) != len(out["required"]): + out["required"] = valid + + return out diff --git a/tools/skills_tool.py b/tools/skills_tool.py index 8bf92ef08..89fe698a7 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -743,6 +743,9 @@ def _serve_plugin_skill( skill_md: Path, namespace: str, bare: str, + *, + preprocess: bool = True, + session_id: str | None = None, ) -> str: """Read a plugin-provided skill, apply guards, return JSON.""" from hermes_cli.plugins import _get_disabled_plugins, get_plugin_manager @@ -812,11 +815,26 @@ def _serve_plugin_skill( except Exception: banner = "" + rendered_content = content + if preprocess: + try: + from agent.skill_preprocessing import preprocess_skill_content + + rendered_content = preprocess_skill_content( + content, + skill_md.parent, + session_id=session_id, + ) + except Exception: + logger.debug( + "Could not preprocess plugin skill %s:%s", namespace, bare, exc_info=True + ) + return json.dumps( { "success": True, "name": f"{namespace}:{bare}", - "content": f"{banner}{content}" if banner else content, + "content": f"{banner}{rendered_content}" if banner else rendered_content, "description": description, "linked_files": None, "readiness_status": SkillReadinessStatus.AVAILABLE.value, @@ -825,7 +843,12 @@ def _serve_plugin_skill( ) -def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: +def skill_view( + name: str, + file_path: str = None, + task_id: str = None, + preprocess: bool = True, +) -> str: """ View the content of a skill or a specific file within a skill directory. @@ -834,6 +857,9 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: Qualified names like "plugin:skill" resolve to plugin-provided skills. file_path: Optional path to a specific file within the skill (e.g., "references/api.md") task_id: Optional task identifier used to probe the active backend + preprocess: Apply configured SKILL.md template and inline shell rendering + to main skill content. Internal slash/preload callers disable this + because they render the skill message themselves. Returns: JSON string with skill content or error message @@ -879,7 +905,13 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: }, ensure_ascii=False, ) - return _serve_plugin_skill(plugin_skill_md, namespace, bare) + return _serve_plugin_skill( + plugin_skill_md, + namespace, + bare, + preprocess=preprocess, + session_id=task_id, + ) # Plugin exists but this specific skill is missing? available = pm.list_plugin_skills(namespace) @@ -1280,13 +1312,28 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: exc_info=True, ) + rendered_content = content + if preprocess: + try: + from agent.skill_preprocessing import preprocess_skill_content + + rendered_content = preprocess_skill_content( + content, + skill_dir, + session_id=task_id, + ) + except Exception: + logger.debug( + "Could not preprocess skill content for %s", skill_name, exc_info=True + ) + result = { "success": True, "name": skill_name, "description": frontmatter.get("description", ""), "tags": tags, "related_skills": related_skills, - "content": content, + "content": rendered_content, "path": rel_path, "skill_dir": str(skill_dir) if skill_dir else None, "linked_files": linked_files if linked_files else None, diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 22c8dcbc6..b288d4ad9 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1805,7 +1805,8 @@ def terminal_tool( pass # Truncate output if too long, keeping both head and tail - MAX_OUTPUT_CHARS = 50000 + from tools.tool_output_limits import get_max_bytes + MAX_OUTPUT_CHARS = get_max_bytes() if len(output) > MAX_OUTPUT_CHARS: head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early) tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output) diff --git a/tools/tool_output_limits.py b/tools/tool_output_limits.py new file mode 100644 index 000000000..fd24a2da3 --- /dev/null +++ b/tools/tool_output_limits.py @@ -0,0 +1,92 @@ +"""Configurable tool-output truncation limits. + +Ported from anomalyco/opencode PR #23770 (``feat(truncate): allow +configuring tool output truncation limits``). + +OpenCode hardcoded ``MAX_LINES = 2000`` and ``MAX_BYTES = 50 * 1024`` +as tool-output truncation thresholds. Hermes-agent had the same +hardcoded constants in two places: + +* ``tools/terminal_tool.py`` — ``MAX_OUTPUT_CHARS = 50000`` (terminal + stdout/stderr cap) +* ``tools/file_operations.py`` — ``MAX_LINES = 2000`` / + ``MAX_LINE_LENGTH = 2000`` (read_file pagination cap + per-line cap) + +This module centralises those values behind a single config section +(``tool_output`` in ``config.yaml``) so power users can tune them +without patching the source. The existing hardcoded numbers remain as +defaults, so behaviour is unchanged when the config key is absent. + +Example ``config.yaml``:: + + tool_output: + max_bytes: 100000 # terminal output cap (chars) + max_lines: 5000 # read_file pagination + truncation cap + max_line_length: 2000 # per-line length cap before '... [truncated]' + +The limits reader is defensive: any error (missing config file, invalid +value type, etc.) falls back to the built-in defaults so tools never +fail because of a malformed config. +""" + +from __future__ import annotations + +from typing import Any, Dict + +# Hardcoded defaults — these match the pre-existing values, so adding +# this module is behaviour-preserving for users who don't set +# ``tool_output`` in config.yaml. +DEFAULT_MAX_BYTES = 50_000 # terminal_tool.MAX_OUTPUT_CHARS +DEFAULT_MAX_LINES = 2000 # file_operations.MAX_LINES +DEFAULT_MAX_LINE_LENGTH = 2000 # file_operations.MAX_LINE_LENGTH + + +def _coerce_positive_int(value: Any, default: int) -> int: + """Return ``value`` as a positive int, or ``default`` on any issue.""" + try: + iv = int(value) + except (TypeError, ValueError): + return default + if iv <= 0: + return default + return iv + + +def get_tool_output_limits() -> Dict[str, int]: + """Return resolved tool-output limits, reading ``tool_output`` from config. + + Keys: ``max_bytes``, ``max_lines``, ``max_line_length``. Missing or + invalid entries fall through to the ``DEFAULT_*`` constants. This + function NEVER raises. + """ + try: + from hermes_cli.config import load_config + cfg = load_config() or {} + section = cfg.get("tool_output") if isinstance(cfg, dict) else None + if not isinstance(section, dict): + section = {} + except Exception: + section = {} + + return { + "max_bytes": _coerce_positive_int(section.get("max_bytes"), DEFAULT_MAX_BYTES), + "max_lines": _coerce_positive_int(section.get("max_lines"), DEFAULT_MAX_LINES), + "max_line_length": _coerce_positive_int( + section.get("max_line_length"), DEFAULT_MAX_LINE_LENGTH + ), + } + + +def get_max_bytes() -> int: + """Shortcut for terminal-tool callers that only need the byte cap.""" + return get_tool_output_limits()["max_bytes"] + + +def get_max_lines() -> int: + """Shortcut for file-ops callers that only need the line cap.""" + return get_tool_output_limits()["max_lines"] + + +def get_max_line_length() -> int: + """Shortcut for file-ops callers that only need the per-line cap.""" + return get_tool_output_limits()["max_line_length"] diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py index f57e191e3..9e8ad6927 100644 --- a/tools/transcription_tools.py +++ b/tools/transcription_tools.py @@ -313,6 +313,66 @@ def _validate_audio_file(file_path: str) -> Optional[Dict[str, Any]]: # --------------------------------------------------------------------------- +# Substrings that identify a missing/unloadable CUDA runtime library. When +# ctranslate2 (the backend for faster-whisper) cannot dlopen one of these, the +# "auto" device picker has already committed to CUDA and the model can no +# longer be used — we fall back to CPU and reload. +# +# Deliberately narrow: we match on library-name tokens and dlopen phrasing so +# we DO NOT accidentally catch legitimate runtime failures like "CUDA out of +# memory" — those should surface to the user, not silently fall back to CPU +# (a 32GB audio clip on CPU at int8 isn't useful either). +_CUDA_LIB_ERROR_MARKERS = ( + "libcublas", + "libcudnn", + "libcudart", + "cannot be loaded", + "cannot open shared object", + "no kernel image is available", + "no CUDA-capable device", + "CUDA driver version is insufficient", +) + + +def _looks_like_cuda_lib_error(exc: BaseException) -> bool: + """Heuristic: is this exception a missing/broken CUDA runtime library? + + ctranslate2 raises plain RuntimeError with messages like + ``Library libcublas.so.12 is not found or cannot be loaded``. We want to + catch missing/unloadable shared libs and driver-mismatch errors, NOT + legitimate runtime failures ("CUDA out of memory", model bugs, etc.). + """ + msg = str(exc) + return any(marker in msg for marker in _CUDA_LIB_ERROR_MARKERS) + + +def _load_local_whisper_model(model_name: str): + """Load faster-whisper with graceful CUDA → CPU fallback. + + faster-whisper's ``device="auto"`` picks CUDA when the ctranslate2 wheel + ships CUDA shared libs, even on hosts where the NVIDIA runtime + (``libcublas.so.12`` / ``libcudnn*``) isn't installed — common on WSL2 + without CUDA-on-WSL, headless servers, and CPU-only developer machines. + On those hosts the load itself sometimes succeeds and the dlopen failure + only surfaces at first ``transcribe()`` call. + + We try ``auto`` first (fast CUDA path when it works), and on any CUDA + library load failure fall back to CPU + int8. + """ + from faster_whisper import WhisperModel + try: + return WhisperModel(model_name, device="auto", compute_type="auto") + except Exception as exc: + if not _looks_like_cuda_lib_error(exc): + raise + logger.warning( + "faster-whisper CUDA load failed (%s) — falling back to CPU (int8). " + "Install the NVIDIA CUDA runtime (libcublas/libcudnn) to use GPU.", + exc, + ) + return WhisperModel(model_name, device="cpu", compute_type="int8") + + def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]: """Transcribe using faster-whisper (local, free).""" global _local_model, _local_model_name @@ -321,11 +381,10 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]: return {"success": False, "transcript": "", "error": "faster-whisper not installed"} try: - from faster_whisper import WhisperModel # Lazy-load the model (downloads on first use, ~150 MB for 'base') if _local_model is None or _local_model_name != model_name: logger.info("Loading faster-whisper model '%s' (first load downloads the model)...", model_name) - _local_model = WhisperModel(model_name, device="auto", compute_type="auto") + _local_model = _load_local_whisper_model(model_name) _local_model_name = model_name # Language: config.yaml (stt.local.language) > env var > auto-detect. @@ -338,8 +397,29 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]: if _forced_lang: transcribe_kwargs["language"] = _forced_lang - segments, info = _local_model.transcribe(file_path, **transcribe_kwargs) - transcript = " ".join(segment.text.strip() for segment in segments) + try: + segments, info = _local_model.transcribe(file_path, **transcribe_kwargs) + transcript = " ".join(segment.text.strip() for segment in segments) + except Exception as exc: + # CUDA runtime libs sometimes only fail at dlopen-on-first-use, + # AFTER the model loaded successfully. Evict the broken cached + # model, reload on CPU, retry once. Without this the module- + # global `_local_model` is poisoned and every subsequent voice + # message on this process fails identically until restart. + if not _looks_like_cuda_lib_error(exc): + raise + logger.warning( + "faster-whisper CUDA runtime failed mid-transcribe (%s) — " + "evicting cached model and retrying on CPU (int8).", + exc, + ) + _local_model = None + _local_model_name = None + from faster_whisper import WhisperModel + _local_model = WhisperModel(model_name, device="cpu", compute_type="int8") + _local_model_name = model_name + segments, info = _local_model.transcribe(file_path, **transcribe_kwargs) + transcript = " ".join(segment.text.strip() for segment in segments) logger.info( "Transcribed %s via local whisper (%s, lang=%s, %.1fs audio)", diff --git a/toolsets.py b/toolsets.py index f1dc7fca1..65f560bfe 100644 --- a/toolsets.py +++ b/toolsets.py @@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [ "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", # Text-to-speech "text_to_speech", # Planning & memory @@ -115,7 +115,8 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", "web_search" + "browser_vision", "browser_console", "browser_cdp", + "browser_dialog", "web_search" ], "includes": [] }, @@ -216,6 +217,15 @@ TOOLSETS = { "includes": [] }, + "spotify": { + "description": "Native Spotify playback, search, playlist, album, and library tools", + "tools": [ + "spotify_playback", "spotify_devices", "spotify_queue", "spotify_search", + "spotify_playlists", "spotify_albums", "spotify_library", + ], + "includes": [] + }, + # Scenario-specific toolsets @@ -249,7 +259,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", "todo", "memory", "session_search", "execute_code", "delegate_task", @@ -274,7 +284,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "browser_cdp", + "browser_vision", "browser_console", "browser_cdp", "browser_dialog", # Planning & memory "todo", "memory", # Session history search @@ -295,7 +305,18 @@ TOOLSETS = { "tools": _HERMES_CORE_TOOLS, "includes": [] }, - + + "hermes-cron": { + # Mirrors hermes-cli so cron's "default" toolset is the same set of + # core tools users see interactively — then `hermes tools` filters + # them down per the platform config. _DEFAULT_OFF_TOOLSETS (moa, + # homeassistant, rl) are excluded by _get_platform_tools() unless + # the user explicitly enables them. + "description": "Default cron toolset - same core tools as hermes-cli; gated by `hermes tools`", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + "hermes-telegram": { "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)", "tools": _HERMES_CORE_TOOLS, diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index d2b82b9da..4e03224ee 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -1,19 +1,116 @@ import json +import os import signal import sys +import time +import traceback -from tui_gateway.server import dispatch, resolve_skin, write_json +from tui_gateway import server +from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json +from tui_gateway.transport import TeeTransport -signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +def _install_sidecar_publisher() -> None: + """Mirror every dispatcher emit to the dashboard sidebar via WS. + + Activated by `HERMES_TUI_SIDECAR_URL`, set by the dashboard's + ``/api/pty`` endpoint when a chat tab passes a ``channel`` query param. + Best-effort: connect failure or runtime drop falls back to stdio-only. + """ + url = os.environ.get("HERMES_TUI_SIDECAR_URL") + + if not url: + return + + from tui_gateway.event_publisher import WsPublisherTransport + + server._stdio_transport = TeeTransport( + server._stdio_transport, WsPublisherTransport(url) + ) + + +def _log_signal(signum: int, frame) -> None: + """Capture WHICH thread and WHERE a termination signal hit us. + + SIG_DFL for SIGPIPE kills the process silently the instant any + background thread (TTS playback, beep, voice status emitter, etc.) + writes to a stdout the TUI has stopped reading. Without this + handler the gateway-exited banner in the TUI has no trace — the + crash log never sees a Python exception because the kernel reaps + the process before the interpreter runs anything. + """ + name = { + signal.SIGPIPE: "SIGPIPE", + signal.SIGTERM: "SIGTERM", + signal.SIGHUP: "SIGHUP", + }.get(signum, f"signal {signum}") + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + if frame is not None: + f.write("main-thread stack at signal delivery:\n") + traceback.print_stack(frame, file=f) + # All live threads — signal may have been triggered by a + # background thread (write to broken stdout from TTS, etc.). + import threading as _threading + for tid, th in _threading._active.items(): + f.write(f"\n--- thread {th.name} (id={tid}) ---\n") + f.write("".join(traceback.format_stack(sys._current_frames().get(tid)))) + except Exception: + pass + print(f"[gateway-signal] {name}", file=sys.stderr, flush=True) + sys.exit(0) + + +# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process +# silently whenever a *background* thread (TTS playback chain, voice +# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone +# quiet on — even though the main thread was perfectly fine waiting on +# stdin. Ignoring the signal lets Python raise BrokenPipeError on the +# offending write (write_json already handles that with a clean +# sys.exit(0) + _log_exit), which keeps the gateway alive as long as +# the main command pipe is still readable. Terminal signals still +# route through _log_signal so kills and hangups are diagnosable. +signal.signal(signal.SIGPIPE, signal.SIG_IGN) +signal.signal(signal.SIGTERM, _log_signal) +signal.signal(signal.SIGHUP, _log_signal) signal.signal(signal.SIGINT, signal.SIG_IGN) +def _log_exit(reason: str) -> None: + """Record why the gateway subprocess is shutting down. + + Three exit paths (startup write fail, parse-error-response write fail, + dispatch-response write fail, stdin EOF) all collapse into a silent + sys.exit(0) here. Without this trail the TUI shows "gateway exited" + with no actionable clue about WHICH broken pipe or WHICH message + triggered it — the main reason voice-mode turns look like phantom + crashes when the real story is "TUI read pipe closed on this event". + """ + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== gateway exit · {time.strftime('%Y-%m-%d %H:%M:%S')} " + f"· reason={reason} ===\n" + ) + except Exception: + pass + print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True) + + def main(): + _install_sidecar_publisher() + if not write_json({ "jsonrpc": "2.0", "method": "event", "params": {"type": "gateway.ready", "payload": {"skin": resolve_skin()}}, }): + _log_exit("startup write failed (broken stdout pipe before first event)") sys.exit(0) for raw in sys.stdin: @@ -25,14 +122,19 @@ def main(): req = json.loads(line) except json.JSONDecodeError: if not write_json({"jsonrpc": "2.0", "error": {"code": -32700, "message": "parse error"}, "id": None}): + _log_exit("parse-error-response write failed (broken stdout pipe)") sys.exit(0) continue + method = req.get("method") if isinstance(req, dict) else None resp = dispatch(req) if resp is not None: if not write_json(resp): + _log_exit(f"response write failed for method={method!r} (broken stdout pipe)") sys.exit(0) + _log_exit("stdin EOF (TUI closed the command pipe)") + if __name__ == "__main__": main() diff --git a/tui_gateway/event_publisher.py b/tui_gateway/event_publisher.py new file mode 100644 index 000000000..8510b8eac --- /dev/null +++ b/tui_gateway/event_publisher.py @@ -0,0 +1,126 @@ +"""Best-effort WebSocket publisher transport for the PTY-side gateway. + +The dashboard's `/api/pty` spawns `hermes --tui` as a child process, which +spawns its own ``tui_gateway.entry``. Tool/reasoning/status events fire on +*that* gateway's transport — three processes removed from the dashboard +server itself. To surface them in the dashboard sidebar (`/api/events`), +the PTY-side gateway opens a back-WS to the dashboard at startup and +mirrors every emit through this transport. + +Wire protocol: newline-framed JSON dicts (the same shape the dispatcher +already passes to ``write``). No JSON-RPC envelope here — the dashboard's +``/api/pub`` endpoint just rebroadcasts the bytes verbatim to subscribers. + +Failure mode: silent. The agent loop must never block waiting for the +sidecar to drain. A dead WS short-circuits all subsequent writes. +Actual ``send`` calls run on a daemon thread so the TeeTransport's +``write`` returns after enqueueing (best-effort; drop when the queue is full). +""" + +from __future__ import annotations + +import json +import logging +import queue +import threading +from typing import Optional + +try: + from websockets.sync.client import connect as ws_connect +except ImportError: # pragma: no cover - websockets is a required install path + ws_connect = None # type: ignore[assignment] + +_log = logging.getLogger(__name__) + +_DRAIN_STOP = object() + +_QUEUE_MAX = 256 + + +class WsPublisherTransport: + __slots__ = ("_url", "_lock", "_ws", "_dead", "_q", "_worker") + + def __init__(self, url: str, *, connect_timeout: float = 2.0) -> None: + self._url = url + self._lock = threading.Lock() + self._ws: Optional[object] = None + self._dead = False + self._q: queue.Queue[object] = queue.Queue(maxsize=_QUEUE_MAX) + self._worker: Optional[threading.Thread] = None + + if ws_connect is None: + self._dead = True + + return + + try: + self._ws = ws_connect(url, open_timeout=connect_timeout, max_size=None) + except Exception as exc: + _log.debug("event publisher connect failed: %s", exc) + self._dead = True + self._ws = None + + return + + self._worker = threading.Thread( + target=self._drain, + name="hermes-ws-pub", + daemon=True, + ) + self._worker.start() + + def _drain(self) -> None: + while True: + item = self._q.get() + if item is _DRAIN_STOP: + return + if not isinstance(item, str): + continue + if self._ws is None: + continue + try: + with self._lock: + if self._ws is not None: + self._ws.send(item) # type: ignore[union-attr] + except Exception as exc: + _log.debug("event publisher write failed: %s", exc) + self._dead = True + self._ws = None + + def write(self, obj: dict) -> bool: + if self._dead or self._ws is None or self._worker is None: + return False + + line = json.dumps(obj, ensure_ascii=False) + + try: + self._q.put_nowait(line) + + return True + except queue.Full: + return False + + def close(self) -> None: + self._dead = True + w = self._worker + if w is not None and w.is_alive(): + try: + self._q.put_nowait(_DRAIN_STOP) + except queue.Full: + # Best-effort: if the queue is wedged, the daemon thread + # will be torn down with the process. + pass + w.join(timeout=3.0) + self._worker = None + + if self._ws is None: + return + + try: + with self._lock: + if self._ws is not None: + self._ws.close() # type: ignore[union-attr] + except Exception: + pass + + self._ws = None diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 165b47bf9..7bc0fb2e0 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1,5 +1,6 @@ import atexit import concurrent.futures +import contextvars import copy import json import logging @@ -12,9 +13,17 @@ import time import uuid from datetime import datetime from pathlib import Path +from typing import Optional from hermes_constants import get_hermes_home from hermes_cli.env_loader import load_hermes_dotenv +from tui_gateway.transport import ( + StdioTransport, + Transport, + bind_transport, + current_transport, + reset_transport, +) logger = logging.getLogger(__name__) @@ -23,6 +32,79 @@ load_hermes_dotenv( hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env" ) + +# ── Panic logger ───────────────────────────────────────────────────── +# Gateway crashes in a TUI session leave no forensics: stdout is the +# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger +# only catches handled warnings, and the subprocess exits before stderr +# flushes through the stderr->gateway.stderr event pump. This hook +# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log +# AND re-emits a one-line summary to stderr so the TUI can surface it in +# Activity — exactly what was missing when the voice-mode turns started +# exiting the gateway mid-TTS. +_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log") + + +def _panic_hook(exc_type, exc_value, exc_tb): + import traceback + + trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb)) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + f.write(trace) + except Exception: + pass + # Stderr goes through to the TUI as a gateway.stderr Activity line — + # the first line here is what the user will see without opening any + # log files. Rest of the stack is still in the log for full context. + first = ( + str(exc_value).strip().splitlines()[0] + if str(exc_value).strip() + else exc_type.__name__ + ) + print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True) + # Chain to the default hook so the process still terminates normally. + sys.__excepthook__(exc_type, exc_value, exc_tb) + + +sys.excepthook = _panic_hook + + +def _thread_panic_hook(args): + # threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread) + import traceback + + trace = "".join( + traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback) + ) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} " + f"· thread={args.thread.name} ===\n" + ) + f.write(trace) + except Exception: + pass + first_line = ( + str(args.exc_value).strip().splitlines()[0] + if str(args.exc_value).strip() + else args.exc_type.__name__ + ) + print( + f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}", + file=sys.stderr, + flush=True, + ) + + +threading.excepthook = _thread_panic_hook + try: from hermes_cli.banner import prefetch_update_check @@ -78,6 +160,11 @@ atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True)) _real_stdout = sys.stdout sys.stdout = sys.stderr +# Module-level stdio transport — fallback sink when no transport is bound via +# contextvar or session. Stream resolved through a lambda so runtime monkey- +# patches of `_real_stdout` (used extensively in tests) still land correctly. +_stdio_transport = StdioTransport(lambda: _real_stdout, _stdout_lock) + class _SlashWorker: """Persistent HermesCLI subprocess for slash commands.""" @@ -197,14 +284,24 @@ def _db_unavailable_error(rid, *, code: int): def write_json(obj: dict) -> bool: - line = json.dumps(obj, ensure_ascii=False) + "\n" - try: - with _stdout_lock: - _real_stdout.write(line) - _real_stdout.flush() - return True - except BrokenPipeError: - return False + """Emit one JSON frame. Routes via the most-specific transport available. + + Precedence: + + 1. Event frames with a session id → the transport stored on that session, + so async events land with the client that owns the session even if + the emitting thread has no contextvar binding. + 2. Otherwise the transport bound on the current context (set by + :func:`dispatch` for the lifetime of a request). + 3. Otherwise the module-level stdio transport, matching the historical + behaviour and keeping tests that monkey-patch ``_real_stdout`` green. + """ + if obj.get("method") == "event": + sid = ((obj.get("params") or {}).get("session_id")) or "" + if sid and (t := (_sessions.get(sid) or {}).get("transport")) is not None: + return t.write(obj) + + return (current_transport() or _stdio_transport).write(obj) def _emit(event: str, sid: str, payload: dict | None = None): @@ -274,27 +371,40 @@ def handle_request(req: dict) -> dict | None: return fn(req.get("id"), req.get("params", {})) -def dispatch(req: dict) -> dict | None: +def dispatch(req: dict, transport: Optional[Transport] = None) -> dict | None: """Route inbound RPCs — long handlers to the pool, everything else inline. Returns a response dict when handled inline. Returns None when the - handler was scheduled on the pool; the worker writes its own - response via write_json when done. + handler was scheduled on the pool; the worker writes its own response + via the bound transport when done. + + *transport* (optional): pins every write produced by this request — + including any events emitted by the handler — to the given transport. + Omitting it falls back to the module-level stdio transport, preserving + the original behaviour for ``tui_gateway.entry``. """ - if req.get("method") not in _LONG_HANDLERS: - return handle_request(req) + t = transport or _stdio_transport + token = bind_transport(t) + try: + if req.get("method") not in _LONG_HANDLERS: + return handle_request(req) - def run(): - try: - resp = handle_request(req) - except Exception as exc: - resp = _err(req.get("id"), -32000, f"handler error: {exc}") - if resp is not None: - write_json(resp) + # Snapshot the context so the pool worker sees the bound transport. + ctx = contextvars.copy_context() - _pool.submit(run) + def run(): + try: + resp = handle_request(req) + except Exception as exc: + resp = _err(req.get("id"), -32000, f"handler error: {exc}") + if resp is not None: + t.write(resp) - return None + _pool.submit(lambda: ctx.run(run)) + + return None + finally: + reset_transport(token) def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None: @@ -487,13 +597,17 @@ def _coerce_statusbar(raw) -> str: def _load_reasoning_config() -> dict | None: from hermes_constants import parse_reasoning_effort - effort = str(_load_cfg().get("agent", {}).get("reasoning_effort", "") or "").strip() + effort = str( + (_load_cfg().get("agent") or {}).get("reasoning_effort", "") or "" + ).strip() return parse_reasoning_effort(effort) def _load_service_tier() -> str | None: raw = ( - str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower() + str((_load_cfg().get("agent") or {}).get("service_tier", "") or "") + .strip() + .lower() ) if not raw or raw in {"normal", "default", "standard", "off", "none"}: return None @@ -503,11 +617,11 @@ def _load_service_tier() -> str | None: def _load_show_reasoning() -> bool: - return bool(_load_cfg().get("display", {}).get("show_reasoning", False)) + return bool((_load_cfg().get("display") or {}).get("show_reasoning", False)) def _load_tool_progress_mode() -> str: - raw = _load_cfg().get("display", {}).get("tool_progress", "all") + raw = (_load_cfg().get("display") or {}).get("tool_progress", "all") if raw is False: return "off" if raw is True: @@ -521,8 +635,14 @@ def _load_enabled_toolsets() -> list[str] | None: from hermes_cli.config import load_config from hermes_cli.tools_config import _get_platform_tools + # Runtime toolset resolution must include default MCP servers so the + # agent can actually call them. Passing ``False`` here is the + # config-editing variant — used when we need to persist a toolset + # list without baking in implicit MCP defaults. Using the wrong + # variant at agent creation time makes MCP tools silently missing + # from the TUI. See PR #3252 for the original design split. enabled = sorted( - _get_platform_tools(load_config(), "cli", include_default_mcp_servers=False) + _get_platform_tools(load_config(), "cli", include_default_mcp_servers=True) ) return enabled or None except Exception: @@ -704,6 +824,39 @@ def _probe_credentials(agent) -> str: return "" +def _probe_config_health(cfg: dict) -> str: + """Flag bare YAML keys (`agent:` with no value → None) that silently + drop nested settings. Returns warning or ''.""" + if not isinstance(cfg, dict): + return "" + warnings: list[str] = [] + null_keys = sorted(k for k, v in cfg.items() if v is None) + if not null_keys: + pass + else: + keys = ", ".join(f"`{k}`" for k in null_keys) + warnings.append( + f"config.yaml has empty section(s): {keys}. " + f"Remove the line(s) or set them to `{{}}` — " + f"empty sections silently drop nested settings." + ) + display_cfg = cfg.get("display") + agent_cfg = cfg.get("agent") + if isinstance(display_cfg, dict): + personality = str(display_cfg.get("personality", "") or "").strip().lower() + if ( + personality + and personality not in {"default", "none", "neutral"} + and isinstance(agent_cfg, dict) + and agent_cfg.get("personalities") is None + ): + warnings.append( + "`display.personality` is set but `agent.personalities` is empty/null; " + "personality overlay will be skipped." + ) + return " ".join(warnings).strip() + + def _session_info(agent) -> dict: info: dict = { "model": getattr(agent, "model", ""), @@ -990,28 +1143,6 @@ def _wire_callbacks(sid: str): set_secret_capture_callback(secret_cb) -def _resolve_personality_prompt(cfg: dict) -> str: - """Resolve the active personality into a system prompt string.""" - name = (cfg.get("display", {}).get("personality", "") or "").strip().lower() - if not name or name in ("default", "none", "neutral"): - return "" - try: - from cli import load_cli_config - - personalities = load_cli_config().get("agent", {}).get("personalities", {}) - except Exception: - try: - from hermes_cli.config import load_config as _load_full_cfg - - personalities = _load_full_cfg().get("agent", {}).get("personalities", {}) - except Exception: - personalities = cfg.get("agent", {}).get("personalities", {}) - pval = personalities.get(name) - if pval is None: - return "" - return _render_personality_prompt(pval) - - def _render_personality_prompt(value) -> str: if isinstance(value, dict): parts = [value.get("system_prompt", "")] @@ -1027,15 +1158,15 @@ def _available_personalities(cfg: dict | None = None) -> dict: try: from cli import load_cli_config - return load_cli_config().get("agent", {}).get("personalities", {}) or {} + return (load_cli_config().get("agent") or {}).get("personalities", {}) or {} except Exception: try: from hermes_cli.config import load_config as _load_full_cfg - return _load_full_cfg().get("agent", {}).get("personalities", {}) or {} + return (_load_full_cfg().get("agent") or {}).get("personalities", {}) or {} except Exception: cfg = cfg or _load_cfg() - return cfg.get("agent", {}).get("personalities", {}) or {} + return (cfg.get("agent") or {}).get("personalities", {}) or {} def _validate_personality(value: str, cfg: dict | None = None) -> tuple[str, str]: @@ -1145,9 +1276,7 @@ def _make_agent(sid: str, key: str, session_id: str | None = None): from hermes_cli.runtime_provider import resolve_runtime_provider cfg = _load_cfg() - system_prompt = cfg.get("agent", {}).get("system_prompt", "") or "" - if not system_prompt: - system_prompt = _resolve_personality_prompt(cfg) + system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip() runtime = resolve_runtime_provider(requested=None) return AIAgent( model=_resolve_model(), @@ -1187,6 +1316,9 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80): "tool_progress_mode": _load_tool_progress_mode(), "edit_snapshots": {}, "tool_started_at": {}, + # Pin async event emissions to whichever transport created the + # session (stdio for Ink, JSON-RPC WS for the dashboard sidebar). + "transport": current_transport() or _stdio_transport, } try: _sessions[sid]["slash_worker"] = _SlashWorker( @@ -1329,6 +1461,7 @@ def _(rid, params: dict) -> dict: "slash_worker": None, "tool_progress_mode": _load_tool_progress_mode(), "tool_started_at": {}, + "transport": current_transport() or _stdio_transport, } def _build() -> None: @@ -1387,6 +1520,10 @@ def _(rid, params: dict) -> dict: warn = _probe_credentials(agent) if warn: info["credential_warning"] = warn + cfg_warn = _probe_config_health(_load_cfg()) + if cfg_warn: + info["config_warning"] = cfg_warn + logger.warning(cfg_warn) _emit("session.info", sid, info) except Exception as e: session["agent_error"] = str(e) @@ -1533,9 +1670,7 @@ def _(rid, params: dict) -> dict: return _db_unavailable_error(rid, code=5007) title, key = params.get("title", ""), session["session_key"] if not title: - return _ok( - rid, {"title": db.get_session_title(key) or "", "session_key": key} - ) + return _ok(rid, {"title": db.get_session_title(key) or "", "session_key": key}) try: db.set_session_title(key, title) return _ok(rid, {"title": title}) @@ -2126,7 +2261,45 @@ def _(rid, params: dict) -> dict: if rendered: payload["rendered"] = rendered _emit("message.complete", sid, payload) + + # CLI parity: when voice-mode TTS is on, speak the agent reply + # (cli.py:_voice_speak_response). Only the final text — tool + # calls / reasoning already stream separately and would be + # noisy to read aloud. + if ( + status == "complete" + and isinstance(raw, str) + and raw.strip() + and _voice_tts_enabled() + ): + try: + from hermes_cli.voice import speak_text + + spoken = raw + threading.Thread( + target=speak_text, args=(spoken,), daemon=True + ).start() + except ImportError: + logger.warning("voice TTS skipped: hermes_cli.voice unavailable") + except Exception as e: + logger.warning("voice TTS dispatch failed: %s", e) except Exception as e: + import traceback + + trace = traceback.format_exc() + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== turn-dispatcher exception · " + f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n" + ) + f.write(trace) + except Exception: + pass + print( + f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True + ) _emit("error", sid, {"message": str(e)}) finally: try: @@ -2537,6 +2710,39 @@ def _(rid, params: dict) -> dict: _write_config_key("display.details_mode", nv) return _ok(rid, {"key": key, "value": nv}) + if key.startswith("details_mode."): + # Per-section override: `details_mode.
` writes to + # `display.sections.
`. Empty value clears the override + # and lets the section fall back to the global details_mode. + section = key.split(".", 1)[1] + allowed_sections = frozenset({"thinking", "tools", "subagents", "activity"}) + if section not in allowed_sections: + return _err(rid, 4002, f"unknown section: {section}") + + cfg = _load_cfg() + display = cfg.get("display") if isinstance(cfg.get("display"), dict) else {} + sections_cfg = ( + display.get("sections") if isinstance(display.get("sections"), dict) else {} + ) + + nv = str(value or "").strip().lower() + if not nv: + sections_cfg.pop(section, None) + display["sections"] = sections_cfg + cfg["display"] = display + _save_cfg(cfg) + return _ok(rid, {"key": key, "value": ""}) + + allowed_dm = frozenset({"hidden", "collapsed", "expanded"}) + if nv not in allowed_dm: + return _err(rid, 4002, f"unknown details_mode: {value}") + + sections_cfg[section] = nv + display["sections"] = sections_cfg + cfg["display"] = display + _save_cfg(cfg) + return _ok(rid, {"key": key, "value": nv}) + if key == "thinking_mode": nv = str(value or "").strip().lower() allowed_tm = frozenset({"collapsed", "truncated", "full"}) @@ -2651,18 +2857,21 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"prompt": _load_cfg().get("custom_prompt", "")}) if key == "skin": return _ok( - rid, {"value": _load_cfg().get("display", {}).get("skin", "default")} + rid, {"value": (_load_cfg().get("display") or {}).get("skin", "default")} ) if key == "personality": return _ok( - rid, {"value": _load_cfg().get("display", {}).get("personality", "default")} + rid, + {"value": (_load_cfg().get("display") or {}).get("personality", "default")}, ) if key == "reasoning": cfg = _load_cfg() - effort = str(cfg.get("agent", {}).get("reasoning_effort", "medium") or "medium") + effort = str( + (cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium" + ) display = ( "show" - if bool(cfg.get("display", {}).get("show_reasoning", False)) + if bool((cfg.get("display") or {}).get("show_reasoning", False)) else "hide" ) return _ok(rid, {"value": effort, "display": display}) @@ -2670,7 +2879,7 @@ def _(rid, params: dict) -> dict: allowed_dm = frozenset({"hidden", "collapsed", "expanded"}) raw = ( str( - _load_cfg().get("display", {}).get("details_mode", "collapsed") + (_load_cfg().get("display") or {}).get("details_mode", "collapsed") or "collapsed" ) .strip() @@ -2681,13 +2890,17 @@ def _(rid, params: dict) -> dict: if key == "thinking_mode": allowed_tm = frozenset({"collapsed", "truncated", "full"}) cfg = _load_cfg() - raw = str(cfg.get("display", {}).get("thinking_mode", "") or "").strip().lower() + raw = ( + str((cfg.get("display") or {}).get("thinking_mode", "") or "") + .strip() + .lower() + ) if raw in allowed_tm: nv = raw else: dm = ( str( - cfg.get("display", {}).get("details_mode", "collapsed") + (cfg.get("display") or {}).get("details_mode", "collapsed") or "collapsed" ) .strip() @@ -2696,7 +2909,7 @@ def _(rid, params: dict) -> dict: nv = "full" if dm == "expanded" else "collapsed" return _ok(rid, {"value": nv}) if key == "compact": - on = bool(_load_cfg().get("display", {}).get("tui_compact", False)) + on = bool((_load_cfg().get("display") or {}).get("tui_compact", False)) return _ok(rid, {"value": "on" if on else "off"}) if key == "statusbar": display = _load_cfg().get("display") @@ -3091,29 +3304,6 @@ def _(rid, params: dict) -> dict: # Fallback: no active run, treat as next-turn message return _ok(rid, {"type": "send", "message": arg}) - if name == "plan": - try: - from agent.skill_commands import ( - build_skill_invocation_message as _bsim, - build_plan_path, - ) - - user_instruction = arg or "" - plan_path = build_plan_path(user_instruction) - msg = _bsim( - "/plan", - user_instruction, - task_id=session.get("session_key", "") if session else "", - runtime_note=( - "Save the markdown plan with write_file to this exact relative path " - f"inside the active workspace/backend cwd: {plan_path}" - ), - ) - if msg: - return _ok(rid, {"type": "send", "message": msg}) - except Exception as e: - return _err(rid, 5030, f"plan skill failed: {e}") - return _err(rid, 4018, f"not a quick/plugin/skill command: {name}") @@ -3151,6 +3341,173 @@ def _(rid, params: dict) -> dict: # ── Methods: complete ───────────────────────────────────────────────── +_FUZZY_CACHE_TTL_S = 5.0 +_FUZZY_CACHE_MAX_FILES = 20000 +_FUZZY_FALLBACK_EXCLUDES = frozenset( + { + ".git", + ".hg", + ".svn", + ".next", + ".cache", + ".venv", + "venv", + "node_modules", + "__pycache__", + "dist", + "build", + "target", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + } +) +_fuzzy_cache_lock = threading.Lock() +_fuzzy_cache: dict[str, tuple[float, list[str]]] = {} + + +def _list_repo_files(root: str) -> list[str]: + """Return file paths relative to ``root``. + + Uses ``git ls-files`` from the repo top (resolved via + ``rev-parse --show-toplevel``) so the listing covers tracked + untracked + files anywhere in the repo, then converts each path back to be relative + to ``root``. Files outside ``root`` (parent directories of cwd, sibling + subtrees) are excluded so the picker stays scoped to what's reachable + from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when + ``root`` isn't inside a git repo. Result cached per-root for + ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes. + """ + now = time.monotonic() + with _fuzzy_cache_lock: + cached = _fuzzy_cache.get(root) + if cached and now - cached[0] < _FUZZY_CACHE_TTL_S: + return cached[1] + + files: list[str] = [] + try: + top_result = subprocess.run( + ["git", "-C", root, "rev-parse", "--show-toplevel"], + capture_output=True, + timeout=2.0, + check=False, + ) + if top_result.returncode == 0: + top = top_result.stdout.decode("utf-8", "replace").strip() + list_result = subprocess.run( + [ + "git", + "-C", + top, + "ls-files", + "-z", + "--cached", + "--others", + "--exclude-standard", + ], + capture_output=True, + timeout=2.0, + check=False, + ) + if list_result.returncode == 0: + for p in list_result.stdout.decode("utf-8", "replace").split("\0"): + if not p: + continue + rel = os.path.relpath(os.path.join(top, p), root).replace( + os.sep, "/" + ) + # Skip parents/siblings of cwd — keep the picker scoped + # to root-and-below, matching Cmd-P workspace semantics. + if rel.startswith("../"): + continue + files.append(rel) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + except (OSError, subprocess.TimeoutExpired): + pass + + if not files: + # Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays + # tractable. Dotfiles themselves survive — the ranker decides based + # on whether the query starts with `.`. + try: + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + dirnames[:] = [ + d + for d in dirnames + if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".") + ] + rel_dir = os.path.relpath(dirpath, root) + for f in filenames: + rel = f if rel_dir == "." else f"{rel_dir}/{f}" + files.append(rel.replace(os.sep, "/")) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + except OSError: + pass + + with _fuzzy_cache_lock: + _fuzzy_cache[root] = (now, files) + + return files + + +def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None: + """Rank ``name`` against ``query``; lower is better. Returns None to reject. + + Tiers (kind): + 0 — exact basename + 1 — basename prefix (e.g. `app` → `appChrome.tsx`) + 2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`) + 3 — substring anywhere in basename + 4 — subsequence match (every query char appears in order) + + Secondary key is `len(name)` so shorter names win ties. + """ + if not query: + return (3, len(name)) + + nl = name.lower() + ql = query.lower() + + if nl == ql: + return (0, len(name)) + + if nl.startswith(ql): + return (1, len(name)) + + # Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"]. + # camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation; + # falls through to substring/subsequence if it misses. + parts: list[str] = [] + buf = "" + for ch in name: + if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()): + if buf: + parts.append(buf) + buf = ch if ch not in "-_." else "" + else: + buf += ch + if buf: + parts.append(buf) + for p in parts: + if p.lower().startswith(ql): + return (2, len(name)) + + if ql in nl: + return (3, len(name)) + + i = 0 + for ch in nl: + if ch == ql[i]: + i += 1 + if i == len(ql): + return (4, len(name)) + + return None + @method("complete.path") def _(rid, params: dict) -> dict: @@ -3186,6 +3543,37 @@ def _(rid, params: dict) -> dict: prefix_tag = "" path_part = query if is_context else query + # Fuzzy basename search across the repo when the user types a bare + # name with no path separator — `@appChrome` surfaces every file + # whose basename matches, regardless of directory depth. Matches what + # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with + # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing + # path so explicit navigation intent is preserved. + if is_context and path_part and "/" not in path_part and prefix_tag != "folder": + root = os.getcwd() + ranked: list[tuple[tuple[int, int], str, str]] = [] + for rel in _list_repo_files(root): + basename = os.path.basename(rel) + if basename.startswith(".") and not path_part.startswith("."): + continue + rank = _fuzzy_basename_rank(basename, path_part) + if rank is None: + continue + ranked.append((rank, rel, basename)) + + ranked.sort(key=lambda r: (r[0], len(r[1]), r[1])) + tag = prefix_tag or "file" + for _, rel, basename in ranked[:30]: + items.append( + { + "text": f"@{tag}:{rel}", + "display": basename, + "meta": os.path.dirname(rel), + } + ) + + return _ok(rid, {"items": items}) + expanded = _normalize_completion_path(path_part) if path_part else "." if expanded == "." or not expanded: search_dir, match = ".", "" @@ -3365,7 +3753,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str: _apply_personality_to_session(sid, session, new_prompt) elif name == "prompt" and agent: cfg = _load_cfg() - new_prompt = cfg.get("agent", {}).get("system_prompt", "") or "" + new_prompt = (cfg.get("agent") or {}).get("system_prompt", "") or "" agent.ephemeral_system_prompt = new_prompt or None agent._cached_system_prompt = None elif name == "compress" and agent: @@ -3455,43 +3843,153 @@ def _(rid, params: dict) -> dict: # ── Methods: voice ─────────────────────────────────────────────────── +_voice_sid_lock = threading.Lock() +_voice_event_sid: str = "" + + +def _voice_emit(event: str, payload: dict | None = None) -> None: + """Emit a voice event toward the session that most recently turned the + mode on. Voice is process-global (one microphone), so there's only ever + one sid to target; the TUI handler treats an empty sid as "active + session". Kept separate from _emit to make the lack of per-call sid + argument explicit.""" + with _voice_sid_lock: + sid = _voice_event_sid + _emit(event, sid, payload) + + +def _voice_mode_enabled() -> bool: + """Current voice-mode flag (runtime-only, CLI parity). + + cli.py initialises ``_voice_mode = False`` at startup and only flips + it via ``/voice on``; it never reads a persisted enable bit from + config.yaml. We match that: no config lookup, env var only. This + avoids the TUI auto-starting in REC the next time the user opens it + just because they happened to enable voice in a prior session. + """ + return os.environ.get("HERMES_VOICE", "").strip() == "1" + + +def _voice_tts_enabled() -> bool: + """Whether agent replies should be spoken back via TTS (runtime only).""" + return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1" + + @method("voice.toggle") def _(rid, params: dict) -> dict: + """CLI parity for the ``/voice`` slash command. + + Subcommands: + + * ``status`` — report mode + TTS flags (default when action is unknown). + * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it + off also tears down any active continuous recording loop. Does NOT + start recording on its own; recording is driven by ``voice.record`` + (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split. + * ``tts`` — toggle speech-output of agent replies. Requires mode on + (mirrors CLI's _toggle_voice_tts guard). + """ action = params.get("action", "status") + if action == "status": - env = os.environ.get("HERMES_VOICE", "").strip() - if env in {"0", "1"}: - return _ok(rid, {"enabled": env == "1"}) - return _ok( - rid, - { - "enabled": bool( - _load_cfg().get("display", {}).get("voice_enabled", False) - ) - }, - ) + # Mirror CLI's _show_voice_status: include STT/TTS provider + # availability so the user can tell at a glance *why* voice mode + # isn't working ("STT provider: MISSING ..." is the common case). + payload: dict = { + "enabled": _voice_mode_enabled(), + "tts": _voice_tts_enabled(), + } + try: + from tools.voice_mode import check_voice_requirements + + reqs = check_voice_requirements() + payload["available"] = bool(reqs.get("available")) + payload["audio_available"] = bool(reqs.get("audio_available")) + payload["stt_available"] = bool(reqs.get("stt_available")) + payload["details"] = reqs.get("details") or "" + except Exception as e: + # check_voice_requirements pulls optional transcription deps — + # swallow so /voice status always returns something useful. + logger.warning("voice.toggle status: requirements probe failed: %s", e) + + return _ok(rid, payload) + if action in ("on", "off"): enabled = action == "on" + # Runtime-only flag (CLI parity) — no _write_config_key, so the + # next TUI launch starts with voice OFF instead of auto-REC from a + # persisted stale toggle. os.environ["HERMES_VOICE"] = "1" if enabled else "0" - _write_config_key("display.voice_enabled", enabled) - return _ok(rid, {"enabled": action == "on"}) + + if not enabled: + # Disabling the mode must tear the continuous loop down; the + # loop holds the microphone and would otherwise keep running. + try: + from hermes_cli.voice import stop_continuous + + stop_continuous() + except ImportError: + pass + except Exception as e: + logger.warning("voice: stop_continuous failed during toggle off: %s", e) + + return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()}) + + if action == "tts": + if not _voice_mode_enabled(): + return _err(rid, 4014, "enable voice mode first: /voice on") + new_value = not _voice_tts_enabled() + # Runtime-only flag (CLI parity) — see voice.toggle on/off above. + os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0" + return _ok(rid, {"enabled": True, "tts": new_value}) + return _err(rid, 4013, f"unknown voice action: {action}") @method("voice.record") def _(rid, params: dict) -> dict: + """VAD-driven continuous record loop, CLI-parity. + + ``start`` turns on a VAD loop that emits ``voice.transcript`` events + for each detected utterance and auto-restarts for the next turn. + ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while- + recording branch clearing ``_voice_continuous``). Three consecutive + silent cycles stop the loop automatically and emit a + ``voice.transcript`` with ``no_speech_limit=True``. + """ action = params.get("action", "start") + + if action not in {"start", "stop"}: + return _err(rid, 4019, f"unknown voice action: {action}") + try: if action == "start": - from hermes_cli.voice import start_recording + if not _voice_mode_enabled(): + return _err(rid, 4015, "voice mode is off — enable with /voice on") - start_recording() + with _voice_sid_lock: + global _voice_event_sid + _voice_event_sid = params.get("session_id") or _voice_event_sid + + from hermes_cli.voice import start_continuous + + voice_cfg = _load_cfg().get("voice", {}) + start_continuous( + on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}), + on_status=lambda s: _voice_emit("voice.status", {"state": s}), + on_silent_limit=lambda: _voice_emit( + "voice.transcript", {"no_speech_limit": True} + ), + silence_threshold=voice_cfg.get("silence_threshold", 200), + silence_duration=voice_cfg.get("silence_duration", 3.0), + ) return _ok(rid, {"status": "recording"}) - if action == "stop": - from hermes_cli.voice import stop_and_transcribe - return _ok(rid, {"text": stop_and_transcribe() or ""}) - return _err(rid, 4019, f"unknown voice action: {action}") + # action == "stop" + from hermes_cli.voice import stop_continuous + + stop_continuous() + return _ok(rid, {"status": "stopped"}) except ImportError: return _err( rid, 5025, "voice module not available — install audio dependencies" diff --git a/tui_gateway/transport.py b/tui_gateway/transport.py new file mode 100644 index 000000000..a1b4b283d --- /dev/null +++ b/tui_gateway/transport.py @@ -0,0 +1,127 @@ +"""Transport abstraction for the tui_gateway JSON-RPC server. + +Historically the gateway wrote every JSON frame directly to real stdout. This +module decouples the I/O sink from the handler logic so the same dispatcher +can be driven over stdio (``tui_gateway.entry``) or WebSocket +(``tui_gateway.ws``) without duplicating code. + +A :class:`Transport` is anything that can accept a JSON-serialisable dict and +forward it to its peer. The active transport for the current request is +tracked in a :class:`contextvars.ContextVar` so handlers — including those +dispatched onto the worker pool — route their writes to the right peer. + +Backward compatibility +---------------------- +``tui_gateway.server.write_json`` still works without any transport bound. +When nothing is on the contextvar and no session-level transport is found, +it falls back to the module-level :class:`StdioTransport`, which wraps the +original ``_real_stdout`` + ``_stdout_lock`` pair. Tests that monkey-patch +``server._real_stdout`` continue to work because the stdio transport resolves +the stream lazily through a callback. +""" + +from __future__ import annotations + +import contextvars +import json +import threading +from typing import Any, Callable, Optional, Protocol, runtime_checkable + + +@runtime_checkable +class Transport(Protocol): + """Minimal interface every transport implements.""" + + def write(self, obj: dict) -> bool: + """Emit one JSON frame. Return ``False`` when the peer is gone.""" + + def close(self) -> None: + """Release any resources owned by this transport.""" + + +_current_transport: contextvars.ContextVar[Optional[Transport]] = ( + contextvars.ContextVar( + "hermes_gateway_transport", + default=None, + ) +) + + +def current_transport() -> Optional[Transport]: + """Return the transport bound for the current request, if any.""" + return _current_transport.get() + + +def bind_transport(transport: Optional[Transport]): + """Bind *transport* for the current context. Returns a token for :func:`reset_transport`.""" + return _current_transport.set(transport) + + +def reset_transport(token) -> None: + """Restore the transport binding captured by :func:`bind_transport`.""" + _current_transport.reset(token) + + +class StdioTransport: + """Writes JSON frames to a stream (usually ``sys.stdout``). + + The stream is resolved via a callable so runtime monkey-patches of the + underlying stream continue to work — this preserves the behaviour the + existing test suite relies on (``monkeypatch.setattr(server, "_real_stdout", ...)``). + """ + + __slots__ = ("_stream_getter", "_lock") + + def __init__(self, stream_getter: Callable[[], Any], lock: threading.Lock) -> None: + self._stream_getter = stream_getter + self._lock = lock + + def write(self, obj: dict) -> bool: + line = json.dumps(obj, ensure_ascii=False) + "\n" + try: + with self._lock: + stream = self._stream_getter() + stream.write(line) + stream.flush() + return True + except BrokenPipeError: + return False + + def close(self) -> None: + return None + + +class TeeTransport: + """Mirrors writes to one primary plus N best-effort secondaries. + + The primary's return value (and exceptions) determine the result — + secondaries swallow failures so a wedged sidecar never stalls the + main IO path. Used by the PTY child so every dispatcher emit lands + on stdio (Ink) AND on a back-WS feeding the dashboard sidebar. + """ + + __slots__ = ("_primary", "_secondaries") + + def __init__(self, primary: "Transport", *secondaries: "Transport") -> None: + self._primary = primary + self._secondaries = secondaries + + def write(self, obj: dict) -> bool: + # Primary first so a slow sidecar (WS publisher) never delays Ink/stdio. + ok = self._primary.write(obj) + for sec in self._secondaries: + try: + sec.write(obj) + except Exception: + pass + return ok + + def close(self) -> None: + try: + self._primary.close() + finally: + for sec in self._secondaries: + try: + sec.close() + except Exception: + pass diff --git a/tui_gateway/ws.py b/tui_gateway/ws.py new file mode 100644 index 000000000..1661811db --- /dev/null +++ b/tui_gateway/ws.py @@ -0,0 +1,174 @@ +"""WebSocket transport for the tui_gateway JSON-RPC server. + +Reuses :func:`tui_gateway.server.dispatch` verbatim so every RPC method, every +slash command, every approval/clarify/sudo flow, and every agent event flows +through the same handlers whether the client is Ink over stdio or an iOS / +web client over WebSocket. + +Wire protocol +------------- +Identical to stdio: newline-delimited JSON-RPC in both directions. The server +emits a ``gateway.ready`` event immediately after connection accept, then +echoes responses/events for inbound requests. No framing differences. + +Mounting +-------- + from fastapi import WebSocket + from tui_gateway.ws import handle_ws + + @app.websocket("/api/ws") + async def ws(ws: WebSocket): + await handle_ws(ws) +""" + +from __future__ import annotations + +import asyncio +import json +import logging +from typing import Any + +from tui_gateway import server + +_log = logging.getLogger(__name__) + +# Max seconds a pool-dispatched handler will block waiting for the event loop +# to flush a WS frame before we mark the transport dead. Protects handler +# threads from a wedged socket. +_WS_WRITE_TIMEOUT_S = 10.0 + +# Keep starlette optional at import time; handle_ws uses the real class when +# it's available and falls back to a generic Exception sentinel otherwise. +try: + from starlette.websockets import WebSocketDisconnect as _WebSocketDisconnect +except ImportError: # pragma: no cover - starlette is a required install path + _WebSocketDisconnect = Exception # type: ignore[assignment] + + +class WSTransport: + """Per-connection WS transport. + + ``write`` is safe to call from any thread *other than* the event loop + thread that owns the socket. Pool workers (the only real caller) run in + their own threads, so marshalling onto the loop via + :func:`asyncio.run_coroutine_threadsafe` + ``future.result()`` is correct + and deadlock-free there. + + When called from the loop thread itself (e.g. by ``handle_ws`` for an + inline response) the same call would deadlock: we'd schedule work onto + the loop we're currently blocking. We detect that case and fire-and- + forget instead. Callers that need to know when the bytes are on the wire + should use :meth:`write_async` from the loop thread. + """ + + def __init__(self, ws: Any, loop: asyncio.AbstractEventLoop) -> None: + self._ws = ws + self._loop = loop + self._closed = False + + def write(self, obj: dict) -> bool: + if self._closed: + return False + + line = json.dumps(obj, ensure_ascii=False) + + try: + on_loop = asyncio.get_running_loop() is self._loop + except RuntimeError: + on_loop = False + + if on_loop: + # Fire-and-forget — don't block the loop waiting on itself. + self._loop.create_task(self._safe_send(line)) + return True + + try: + fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop) + fut.result(timeout=_WS_WRITE_TIMEOUT_S) + return not self._closed + except Exception as exc: + self._closed = True + _log.debug("ws write failed: %s", exc) + return False + + async def write_async(self, obj: dict) -> bool: + """Send from the owning event loop. Awaits until the frame is on the wire.""" + if self._closed: + return False + await self._safe_send(json.dumps(obj, ensure_ascii=False)) + return not self._closed + + async def _safe_send(self, line: str) -> None: + try: + await self._ws.send_text(line) + except Exception as exc: + self._closed = True + _log.debug("ws send failed: %s", exc) + + def close(self) -> None: + self._closed = True + + +async def handle_ws(ws: Any) -> None: + """Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``.""" + await ws.accept() + + transport = WSTransport(ws, asyncio.get_running_loop()) + + await transport.write_async( + { + "jsonrpc": "2.0", + "method": "event", + "params": { + "type": "gateway.ready", + "payload": {"skin": server.resolve_skin()}, + }, + } + ) + + try: + while True: + try: + raw = await ws.receive_text() + except _WebSocketDisconnect: + break + + line = raw.strip() + if not line: + continue + + try: + req = json.loads(line) + except json.JSONDecodeError: + ok = await transport.write_async( + { + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "parse error"}, + "id": None, + } + ) + if not ok: + break + continue + + # dispatch() may schedule long handlers on the pool; it returns + # None in that case and the worker writes the response itself via + # the transport we pass in (a separate thread, so transport.write + # is the safe path there). For inline handlers it returns the + # response dict, which we write here from the loop. + resp = await asyncio.to_thread(server.dispatch, req, transport) + if resp is not None and not await transport.write_async(resp): + break + finally: + transport.close() + + # Detach the transport from any sessions it owned so later emits + # fall back to stdio instead of crashing into a closed socket. + for _, sess in list(server._sessions.items()): + if sess.get("transport") is transport: + sess["transport"] = server._stdio_transport + + try: + await ws.close() + except Exception: + pass diff --git a/ui-tui/packages/hermes-ink/src/ink/components/Text.test.ts b/ui-tui/packages/hermes-ink/src/ink/components/Text.test.ts new file mode 100644 index 000000000..9869189ed --- /dev/null +++ b/ui-tui/packages/hermes-ink/src/ink/components/Text.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from 'vitest' + +import { shouldUseAnsiDim } from './Text.js' + +describe('shouldUseAnsiDim', () => { + it('disables ANSI dim on VTE terminals by default', () => { + expect(shouldUseAnsiDim({ VTE_VERSION: '7603' } as NodeJS.ProcessEnv)).toBe(false) + }) + + it('keeps ANSI dim enabled elsewhere by default', () => { + expect(shouldUseAnsiDim({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true) + }) + + it('honors explicit env override', () => { + expect(shouldUseAnsiDim({ HERMES_TUI_DIM: '1', VTE_VERSION: '7603' } as NodeJS.ProcessEnv)).toBe(true) + expect(shouldUseAnsiDim({ HERMES_TUI_DIM: '0' } as NodeJS.ProcessEnv)).toBe(false) + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx b/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx index 9459b78a2..d6b7fdccd 100644 --- a/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/components/Text.tsx @@ -3,6 +3,9 @@ import React from 'react' import { c as _c } from 'react/compiler-runtime' import type { Color, Styles } from '../styles.js' + +const ENV_ON_RE = /^(?:1|true|yes|on)$/i +const ENV_OFF_RE = /^(?:0|false|no|off)$/i type BaseProps = { /** * Change text color. Accepts a raw color value (rgb, hex, ansi). @@ -62,6 +65,20 @@ type WeightProps = } export type Props = BaseProps & WeightProps +export function shouldUseAnsiDim(env: NodeJS.ProcessEnv = process.env): boolean { + const override = (env.HERMES_TUI_DIM ?? '').trim() + + if (ENV_ON_RE.test(override)) { + return true + } + + if (ENV_OFF_RE.test(override)) { + return false + } + + return !env.VTE_VERSION +} + const memoizedStylesForWrap: Record, Styles> = { wrap: { flexGrow: 0, @@ -143,6 +160,7 @@ export default function Text(t0: Props) { const strikethrough = t3 === undefined ? false : t3 const inverse = t4 === undefined ? false : t4 const wrap = t5 === undefined ? 'wrap' : t5 + const effectiveDim = dim && shouldUseAnsiDim() if (children === undefined || children === null) { return null @@ -174,11 +192,11 @@ export default function Text(t0: Props) { let t8 - if ($[4] !== dim) { - t8 = dim && { - dim + if ($[4] !== effectiveDim) { + t8 = effectiveDim && { + dim: effectiveDim } - $[4] = dim + $[4] = effectiveDim $[5] = t8 } else { t8 = $[5] diff --git a/ui-tui/packages/hermes-ink/src/ink/dom.ts b/ui-tui/packages/hermes-ink/src/ink/dom.ts index 735ab0b0c..9ff1be411 100644 --- a/ui-tui/packages/hermes-ink/src/ink/dom.ts +++ b/ui-tui/packages/hermes-ink/src/ink/dom.ts @@ -83,6 +83,10 @@ export type DOMElement = { // Only set on ink-root. The document owns focus — any node can // reach it by walking parentNode, like browser getRootNode(). focusManager?: FocusManager + // Measurement cache for ink-text nodes: avoids re-squashing and re-wrapping + // text when yoga calls measureFunc multiple times per frame with different + // widths during flex re-pass. Keyed by `${width}|${widthMode}`. + _textMeasureCache?: { gen: number; entries: Map } } & InkNode export type TextNode = { @@ -311,10 +315,42 @@ export const createTextNode = (text: string): TextNode => { return node } +const MEASURE_CACHE_CAP = 16 + const measureTextNode = function ( node: DOMNode, width: number, widthMode: LayoutMeasureMode +): { width: number; height: number } { + const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode + if (elem && elem.nodeName === 'ink-text') { + let cache = elem._textMeasureCache + if (!cache) { + cache = { gen: 0, entries: new Map() } + elem._textMeasureCache = cache + } + const key = `${width}|${widthMode}` + const hit = cache.entries.get(key) + if (hit && hit._gen === cache.gen) { + return hit.result + } + const result = computeTextMeasure(node, width, widthMode) + // Enforce cap with FIFO eviction to avoid unbounded growth during + // pathological frames where yoga probes many widths. + if (cache.entries.size >= MEASURE_CACHE_CAP) { + const firstKey = cache.entries.keys().next().value + cache.entries.delete(firstKey) + } + cache.entries.set(key, { _gen: cache.gen, result }) + return result + } + return computeTextMeasure(node, width, widthMode) +} + +const computeTextMeasure = function ( + node: DOMNode, + width: number, + widthMode: LayoutMeasureMode ): { width: number; height: number } { const rawText = node.nodeName === '#text' ? node.nodeValue : squashTextNodes(node) @@ -378,13 +414,19 @@ export const markDirty = (node?: DOMNode): void => { while (current) { if (current.nodeName !== '#text') { - ;(current as DOMElement).dirty = true + const elem = current as DOMElement + elem.dirty = true // Only mark yoga dirty on leaf nodes that have measure functions - if (!markedYoga && (current.nodeName === 'ink-text' || current.nodeName === 'ink-raw-ansi') && current.yogaNode) { - current.yogaNode.markDirty() + if (!markedYoga && (elem.nodeName === 'ink-text' || elem.nodeName === 'ink-raw-ansi') && elem.yogaNode) { + elem.yogaNode.markDirty() markedYoga = true } + + // Invalidate text measurement cache — child text or style changed. + if (elem._textMeasureCache) { + elem._textMeasureCache.gen++ + } } current = current.parentNode @@ -433,6 +475,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => { for (const child of node.childNodes) { clearYogaNodeReferences(child) } + node._textMeasureCache = undefined } node.yogaNode = undefined diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts b/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts new file mode 100644 index 000000000..02ea9ebd2 --- /dev/null +++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest' + +import { shouldEmitClipboardSequence } from './osc.js' + +describe('shouldEmitClipboardSequence', () => { + it('suppresses local multiplexer clipboard OSC by default', () => { + expect(shouldEmitClipboardSequence({ TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe(false) + expect(shouldEmitClipboardSequence({ STY: '1234.pts-0.host' } as NodeJS.ProcessEnv)).toBe(false) + }) + + it('keeps OSC enabled for remote or plain local terminals', () => { + expect(shouldEmitClipboardSequence({ SSH_CONNECTION: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe( + true + ) + expect(shouldEmitClipboardSequence({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true) + }) + + it('honors explicit env override', () => { + expect(shouldEmitClipboardSequence({ HERMES_TUI_CLIPBOARD_OSC52: '1', TMUX: '/tmp/tmux-1/default,1,0' } as NodeJS.ProcessEnv)).toBe( + true + ) + expect(shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe( + false + ) + }) +}) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts b/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts index 49f222395..3230767e7 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts @@ -11,6 +11,8 @@ import { BEL, ESC, ESC_TYPE, SEP } from './ansi.js' import type { Action, Color, TabStatusAction } from './types.js' export const OSC_PREFIX = ESC + String.fromCharCode(ESC_TYPE.OSC) +const ENV_ON_RE = /^(?:1|true|yes|on)$/i +const ENV_OFF_RE = /^(?:0|false|no|off)$/i /** String Terminator (ESC \) - alternative to BEL for terminating OSC */ export const ST = ESC + '\\' @@ -81,6 +83,20 @@ export function getClipboardPath(): ClipboardPath { return 'osc52' } +export function shouldEmitClipboardSequence(env: NodeJS.ProcessEnv = process.env): boolean { + const override = (env.HERMES_TUI_CLIPBOARD_OSC52 ?? env.HERMES_TUI_COPY_OSC52 ?? '').trim() + + if (ENV_ON_RE.test(override)) { + return true + } + + if (ENV_OFF_RE.test(override)) { + return false + } + + return !!env['SSH_CONNECTION'] || (!env['TMUX'] && !env['STY']) +} + /** * Wrap a payload in tmux's DCS passthrough: ESC P tmux ; ESC \ * tmux forwards the payload to the outer terminal, bypassing its own parser. @@ -152,6 +168,7 @@ export async function tmuxLoadBuffer(text: string): Promise { export async function setClipboard(text: string): Promise { const b64 = Buffer.from(text, 'utf8').toString('base64') const raw = osc(OSC.CLIPBOARD, 'c', b64) + const emitSequence = shouldEmitClipboardSequence(process.env) // Native safety net — fire FIRST, before the tmux await, so a quick // focus-switch after selecting doesn't race pbcopy. Previously this ran @@ -170,10 +187,10 @@ export async function setClipboard(text: string): Promise { // Inner OSC uses BEL directly (not osc()) — ST's ESC would need doubling // too, and BEL works everywhere for OSC 52. if (tmuxBufferLoaded) { - return tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) + return emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '' } - return raw + return emitSequence ? raw : '' } // Linux clipboard tool: undefined = not yet probed, null = none available. diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index 23f7c4646..f8d88a50f 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) => composer: { dequeue: () => undefined, queueEditRef: ref(null), - sendQueued: vi.fn() + sendQueued: vi.fn(), + setInput: vi.fn() }, gateway: { gw: { request: vi.fn() }, @@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) => resumeById: vi.fn(), setCatalog: vi.fn() }, + submission: { + submitRef: { current: vi.fn() } + }, system: { bellOnComplete: false, sys: vi.fn() @@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) => panel: (title: string, sections: any[]) => appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }), setHistoryItems: vi.fn() + }, + voice: { + setProcessing: vi.fn(), + setRecording: vi.fn(), + setVoiceEnabled: vi.fn() } }) as any @@ -143,91 +152,82 @@ describe('createGatewayEventHandler', () => { expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer)) }) - it('attaches inline_diff to the assistant completion body', () => { + it('anchors inline_diff as its own segment where the edit happened', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new' const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' + const block = `\`\`\`diff\n${cleaned}\n\`\`\`` - onEvent({ - payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, - type: 'tool.start' - } as any) - onEvent({ - payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) + // Narration → tool → tool-complete → more narration → message-complete. + // The diff MUST land between the two narration segments, not tacked + // onto the final one. + onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any) + onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any) + onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) - // Diff is buffered for message.complete and sanitized (ANSI stripped). + // Diff is already committed to segmentMessages as its own segment. expect(appended).toHaveLength(0) - expect(turnController.pendingInlineDiffs).toEqual([cleaned]) + expect(turnController.segmentMessages).toEqual([ + { role: 'assistant', text: 'Editing the file' }, + { kind: 'diff', role: 'assistant', text: block } + ]) - onEvent({ - payload: { text: 'patch applied' }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any) - // Diff is rendered in the same assistant message body as the completion. - expect(appended).toHaveLength(1) - expect(appended[0]).toMatchObject({ role: 'assistant' }) - expect(appended[0]?.text).toContain('patch applied') - expect(appended[0]?.text).toContain('```diff') - expect(appended[0]?.text).toContain(cleaned) + // Four transcript messages: pre-tool narration → tool trail → diff + // (kind='diff', so MessageLine gives it blank-line breathing room) → + // post-tool narration. The final message does NOT contain a diff. + expect(appended).toHaveLength(4) + expect(appended[0]?.text).toBe('Editing the file') + expect(appended[1]).toMatchObject({ kind: 'trail' }) + expect(appended[1]?.tools?.[0]).toContain('Patch') + expect(appended[2]).toMatchObject({ kind: 'diff', text: block }) + expect(appended[3]?.text).toBe('patch applied') + expect(appended[3]?.text).not.toContain('```diff') }) - it('does not append inline_diff twice when assistant text already contains it', () => { + it('drops the diff segment when the final assistant text narrates the same diff', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\`` - onEvent({ - payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) - onEvent({ - payload: { text: assistantText }, - type: 'message.complete' - } as any) + onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) + onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) + // Only the final message — diff-only segment dropped so we don't + // render two stacked copies of the same patch. expect(appended).toHaveLength(1) expect(appended[0]?.text).toBe(assistantText) expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1) }) - it('strips the CLI "┊ review diff" header from queued inline diffs', () => { + it('strips the CLI "┊ review diff" header from inline diff segments', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const raw = ' \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' - onEvent({ - payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) - onEvent({ - payload: { text: 'done' }, - type: 'message.complete' - } as any) + onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) + onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - expect(appended).toHaveLength(1) - expect(appended[0]?.text).not.toContain('┊ review diff') - expect(appended[0]?.text).toContain('--- a/foo.ts') + // Tool trail first, then diff segment (kind='diff'), then final narration. + expect(appended).toHaveLength(3) + expect(appended[0]?.kind).toBe('trail') + expect(appended[1]?.kind).toBe('diff') + expect(appended[1]?.text).not.toContain('┊ review diff') + expect(appended[1]?.text).toContain('--- a/foo.ts') + expect(appended[2]?.text).toBe('done') }) - it('suppresses inline_diff when assistant already wrote a diff fence', () => { + it('drops the diff segment when assistant writes its own ```diff fence', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```' - onEvent({ - payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) - onEvent({ - payload: { text: assistantText }, - type: 'message.complete' - } as any) + onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) + onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) expect(appended).toHaveLength(1) expect(appended[0]?.text).toBe(assistantText) @@ -243,15 +243,19 @@ describe('createGatewayEventHandler', () => { payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' }, type: 'tool.complete' } as any) - onEvent({ - payload: { text: 'done' }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - expect(appended).toHaveLength(1) + // Tool row is now placed before the diff, so telemetry does not render + // below the patch that came from that tool. + expect(appended).toHaveLength(3) + expect(appended[0]?.kind).toBe('trail') expect(appended[0]?.tools?.[0]).toContain('Review Diff') expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts') - expect(appended[0]?.text).toContain('```diff') + expect(appended[1]?.kind).toBe('diff') + expect(appended[1]?.text).toContain('```diff') + expect(appended[1]?.tools ?? []).toEqual([]) + expect(appended[2]?.text).toBe('done') + expect(appended[2]?.tools ?? []).toEqual([]) }) it('shows setup panel for missing provider startup error', () => { diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts index 901564f73..eba1d56d8 100644 --- a/ui-tui/src/__tests__/createSlashHandler.test.ts +++ b/ui-tui/src/__tests__/createSlashHandler.test.ts @@ -88,6 +88,41 @@ describe('createSlashHandler', () => { expect(ctx.transcript.sys).toHaveBeenCalledWith('details: expanded') }) + it('sets a per-section override and persists it under details_mode.
', () => { + const ctx = buildCtx() + + expect(createSlashHandler(ctx)('/details activity hidden')).toBe(true) + expect(getUiState().sections.activity).toBe('hidden') + expect(ctx.gateway.rpc).toHaveBeenCalledWith('config.set', { + key: 'details_mode.activity', + value: 'hidden' + }) + expect(ctx.transcript.sys).toHaveBeenCalledWith('details activity: hidden') + }) + + it('clears a per-section override on /details
reset', () => { + const ctx = buildCtx() + createSlashHandler(ctx)('/details tools expanded') + expect(getUiState().sections.tools).toBe('expanded') + + createSlashHandler(ctx)('/details tools reset') + expect(getUiState().sections.tools).toBeUndefined() + expect(ctx.gateway.rpc).toHaveBeenLastCalledWith('config.set', { + key: 'details_mode.tools', + value: '' + }) + expect(ctx.transcript.sys).toHaveBeenCalledWith('details tools: reset') + }) + + it('rejects unknown section modes with a usage hint', () => { + const ctx = buildCtx() + createSlashHandler(ctx)('/details tools blink') + expect(getUiState().sections.tools).toBeUndefined() + expect(ctx.transcript.sys).toHaveBeenCalledWith( + 'usage: /details
[hidden|collapsed|expanded|reset]' + ) + }) + it('shows tool enable usage when names are missing', () => { const ctx = buildCtx() @@ -246,36 +281,6 @@ describe('createSlashHandler', () => { expect(ctx.transcript.page).not.toHaveBeenCalled() expect(ctx.transcript.sys).toHaveBeenCalledWith('no conversation yet') }) - - it('handles send-type dispatch for /plan command', async () => { - const planMessage = 'Plan skill content loaded' - - const ctx = buildCtx({ - gateway: { - gw: { - getLogTail: vi.fn(() => ''), - request: vi.fn((method: string) => { - if (method === 'slash.exec') { - return Promise.reject(new Error('pending-input command')) - } - - if (method === 'command.dispatch') { - return Promise.resolve({ type: 'send', message: planMessage }) - } - - return Promise.resolve({}) - }) - }, - rpc: vi.fn(() => Promise.resolve({})) - } - }) - - const h = createSlashHandler(ctx) - expect(h('/plan create a REST API')).toBe(true) - await vi.waitFor(() => { - expect(ctx.transcript.send).toHaveBeenCalledWith(planMessage) - }) - }) }) const buildCtx = (overrides: Partial = {}): Ctx => ({ diff --git a/ui-tui/src/__tests__/details.test.ts b/ui-tui/src/__tests__/details.test.ts new file mode 100644 index 000000000..15ef681dc --- /dev/null +++ b/ui-tui/src/__tests__/details.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from 'vitest' + +import { isSectionName, parseDetailsMode, resolveSections, sectionMode, SECTION_NAMES } from '../domain/details.js' + +describe('parseDetailsMode', () => { + it('accepts the canonical modes case-insensitively', () => { + expect(parseDetailsMode('hidden')).toBe('hidden') + expect(parseDetailsMode(' COLLAPSED ')).toBe('collapsed') + expect(parseDetailsMode('Expanded')).toBe('expanded') + }) + + it('rejects junk', () => { + expect(parseDetailsMode('truncated')).toBeNull() + expect(parseDetailsMode('')).toBeNull() + expect(parseDetailsMode(undefined)).toBeNull() + expect(parseDetailsMode(42)).toBeNull() + }) +}) + +describe('isSectionName', () => { + it('only lets the four canonical sections through', () => { + expect(isSectionName('thinking')).toBe(true) + expect(isSectionName('tools')).toBe(true) + expect(isSectionName('subagents')).toBe(true) + expect(isSectionName('activity')).toBe(true) + + expect(isSectionName('Thinking')).toBe(false) // case-sensitive on purpose + expect(isSectionName('bogus')).toBe(false) + expect(isSectionName('')).toBe(false) + expect(isSectionName(7)).toBe(false) + }) + + it('SECTION_NAMES exposes them all', () => { + expect([...SECTION_NAMES].sort()).toEqual(['activity', 'subagents', 'thinking', 'tools']) + }) +}) + +describe('resolveSections', () => { + it('parses a well-formed sections object', () => { + expect( + resolveSections({ + thinking: 'expanded', + tools: 'expanded', + subagents: 'collapsed', + activity: 'hidden' + }) + ).toEqual({ + thinking: 'expanded', + tools: 'expanded', + subagents: 'collapsed', + activity: 'hidden' + }) + }) + + it('drops unknown section names and unknown modes', () => { + expect( + resolveSections({ + thinking: 'expanded', + tools: 'maximised', + bogus: 'hidden', + activity: 'hidden' + }) + ).toEqual({ thinking: 'expanded', activity: 'hidden' }) + }) + + it('treats nullish/non-objects as empty overrides', () => { + expect(resolveSections(undefined)).toEqual({}) + expect(resolveSections(null)).toEqual({}) + expect(resolveSections('hidden')).toEqual({}) + expect(resolveSections([])).toEqual({}) + }) +}) + +describe('sectionMode', () => { + it('falls back to the global mode for sections without a built-in default', () => { + expect(sectionMode('subagents', 'collapsed', {})).toBe('collapsed') + expect(sectionMode('subagents', 'expanded', undefined)).toBe('expanded') + expect(sectionMode('subagents', 'hidden', {})).toBe('hidden') + }) + + it('streams thinking + tools expanded by default regardless of global mode', () => { + expect(sectionMode('thinking', 'collapsed', {})).toBe('expanded') + expect(sectionMode('thinking', 'hidden', undefined)).toBe('expanded') + expect(sectionMode('tools', 'collapsed', {})).toBe('expanded') + expect(sectionMode('tools', 'hidden', undefined)).toBe('expanded') + }) + + it('hides the activity panel by default regardless of global mode', () => { + expect(sectionMode('activity', 'collapsed', {})).toBe('hidden') + expect(sectionMode('activity', 'expanded', undefined)).toBe('hidden') + expect(sectionMode('activity', 'hidden', {})).toBe('hidden') + }) + + it('honours per-section overrides over both the section default and global mode', () => { + expect(sectionMode('thinking', 'collapsed', { thinking: 'collapsed' })).toBe('collapsed') + expect(sectionMode('tools', 'collapsed', { tools: 'hidden' })).toBe('hidden') + expect(sectionMode('activity', 'collapsed', { activity: 'expanded' })).toBe('expanded') + expect(sectionMode('activity', 'expanded', { activity: 'collapsed' })).toBe('collapsed') + }) + + it('lets per-section overrides escape the global hidden mode', () => { + // Regression for the case where global details_mode: hidden used to + // short-circuit the entire accordion and prevent overrides from + // surfacing — `sections.tools: expanded` must still resolve to expanded. + expect(sectionMode('subagents', 'hidden', { subagents: 'expanded' })).toBe('expanded') + expect(sectionMode('thinking', 'hidden', { thinking: 'collapsed' })).toBe('collapsed') + expect(sectionMode('activity', 'hidden', { activity: 'expanded' })).toBe('expanded') + }) +}) diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts index dbb6f0fe6..8995b9c6f 100644 --- a/ui-tui/src/__tests__/platform.test.ts +++ b/ui-tui/src/__tests__/platform.test.ts @@ -31,6 +31,36 @@ describe('platform action modifier', () => { }) }) +describe('isVoiceToggleKey', () => { + it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'B')).toBe(true) + }) + + it('matches Cmd+B on macOS (preserve platform muscle memory)', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: false, meta: true, super: false }, 'b')).toBe(true) + expect(isVoiceToggleKey({ ctrl: false, meta: false, super: true }, 'b')).toBe(true) + }) + + it('matches Ctrl+B on non-macOS platforms', async () => { + const { isVoiceToggleKey } = await importPlatform('linux') + + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true) + }) + + it('does not match unmodified b or other Ctrl combos', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: false, meta: false, super: false }, 'b')).toBe(false) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'a')).toBe(false) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'c')).toBe(false) + }) +}) + describe('isMacActionFallback', () => { it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => { const { isMacActionFallback } = await importPlatform('darwin') diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts index c5a0a97dc..568251744 100644 --- a/ui-tui/src/__tests__/useConfigSync.test.ts +++ b/ui-tui/src/__tests__/useConfigSync.test.ts @@ -62,6 +62,53 @@ describe('applyDisplay', () => { expect(s.showReasoning).toBe(false) expect(s.statusBar).toBe('top') expect(s.streaming).toBe(true) + expect(s.sections).toEqual({}) + }) + + it('parses display.sections into per-section overrides', () => { + const setBell = vi.fn() + + applyDisplay( + { + config: { + display: { + details_mode: 'collapsed', + sections: { + activity: 'hidden', + tools: 'expanded', + thinking: 'expanded', + bogus: 'expanded' + } + } + } + }, + setBell + ) + + const s = $uiState.get() + expect(s.detailsMode).toBe('collapsed') + expect(s.sections).toEqual({ + activity: 'hidden', + tools: 'expanded', + thinking: 'expanded' + }) + }) + + it('drops invalid section modes', () => { + const setBell = vi.fn() + + applyDisplay( + { + config: { + display: { + sections: { tools: 'maximised' as unknown as string, activity: 'hidden' } + } + } + }, + setBell + ) + + expect($uiState.get().sections).toEqual({ activity: 'hidden' }) }) it('treats a null config like an empty display block', () => { diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 1ec123f11..15cf00a5a 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session const { bellOnComplete, stdout, sys } = ctx.system const { appendMessage, panel, setHistoryItems } = ctx.transcript + const { setInput } = ctx.composer + const { submitRef } = ctx.submission + const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice let pendingThinkingStatus = '' let thinkingStatusTimer: null | ReturnType = null @@ -261,6 +264,57 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } + case 'voice.status': { + // Continuous VAD loop reports its internal state so the status bar + // can show listening / transcribing / idle without polling. + const state = String(ev.payload?.state ?? '') + + if (state === 'listening') { + setVoiceRecording(true) + setVoiceProcessing(false) + } else if (state === 'transcribing') { + setVoiceRecording(false) + setVoiceProcessing(true) + } else { + setVoiceRecording(false) + setVoiceProcessing(false) + } + + return + } + + case 'voice.transcript': { + // CLI parity: the 3-strikes silence detector flipped off automatically. + // Mirror that on the UI side and tell the user why the mode is off. + if (ev.payload?.no_speech_limit) { + setVoiceEnabled(false) + setVoiceRecording(false) + setVoiceProcessing(false) + sys('voice: no speech detected 3 times, continuous mode stopped') + + return + } + + const text = String(ev.payload?.text ?? '').trim() + + if (!text) { + return + } + + // CLI parity: _pending_input.put(transcript) unconditionally feeds + // the transcript to the agent as its next turn — draft handling + // doesn't apply because voice-mode users are speaking, not typing. + // + // We can't branch on composer input from inside a setInput updater + // (React strict mode double-invokes it, duplicating the submit). + // Just clear + defer submit so the cleared input is committed before + // submit reads it. + setInput('') + setTimeout(() => submitRef.current(text), 0) + + return + } + case 'gateway.start_timeout': { const { cwd, python } = ev.payload ?? {} const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : '' @@ -331,10 +385,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } - // Keep inline diffs attached to the assistant completion body so - // they render in the same message flow, not as a standalone system - // artifact that can look out-of-place around tool rows. - turnController.queueInlineDiff(inlineDiffText) + // Anchor the diff to where the edit happened in the turn — between + // the narration that preceded the tool call and whatever the agent + // streams afterwards. The previous end-merge put the diff at the + // bottom of the final message even when the edit fired mid-turn, + // which read as "the agent wrote this after saying that". + turnController.pushInlineDiffSegment(inlineDiffText) return } diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts index c1c427739..92529ca79 100644 --- a/ui-tui/src/app/interfaces.ts +++ b/ui-tui/src/app/interfaces.ts @@ -16,6 +16,7 @@ import type { Msg, PanelSection, SecretReq, + SectionVisibility, SessionInfo, SlashCatalog, SubagentProgress, @@ -87,6 +88,7 @@ export interface UiState { detailsMode: DetailsMode info: null | SessionInfo inlineDiffs: boolean + sections: SectionVisibility showCost: boolean showReasoning: boolean sid: null | string @@ -189,9 +191,11 @@ export interface InputHandlerContext { stdout?: NodeJS.WriteStream } voice: { + enabled: boolean recording: boolean setProcessing: StateSetter setRecording: StateSetter + setVoiceEnabled: StateSetter } wheelStep: number } @@ -201,6 +205,9 @@ export interface InputHandlerResult { } export interface GatewayEventHandlerContext { + composer: { + setInput: StateSetter + } gateway: GatewayServices session: { STARTUP_RESUME_ID: string @@ -210,6 +217,9 @@ export interface GatewayEventHandlerContext { resumeById: (id: string) => void setCatalog: StateSetter } + submission: { + submitRef: MutableRefObject<(value: string) => void> + } system: { bellOnComplete: boolean stdout?: NodeJS.WriteStream @@ -220,6 +230,11 @@ export interface GatewayEventHandlerContext { panel: (title: string, sections: PanelSection[]) => void setHistoryItems: StateSetter } + voice: { + setProcessing: StateSetter + setRecording: StateSetter + setVoiceEnabled: StateSetter + } } export interface SlashHandlerContext { diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts index 904882c21..870e2000c 100644 --- a/ui-tui/src/app/slash/commands/core.ts +++ b/ui-tui/src/app/slash/commands/core.ts @@ -1,7 +1,7 @@ import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js' import { dailyFortune, randomFortune } from '../../../content/fortunes.js' import { HOTKEYS } from '../../../content/hotkeys.js' -import { nextDetailsMode, parseDetailsMode } from '../../../domain/details.js' +import { SECTION_NAMES, isSectionName, nextDetailsMode, parseDetailsMode } from '../../../domain/details.js' import type { ConfigGetValueResponse, ConfigSetResponse, @@ -10,7 +10,7 @@ import type { } from '../../../gatewayTypes.js' import { writeOsc52Clipboard } from '../../../lib/osc52.js' import { configureDetectedTerminalKeybindings, configureTerminalKeybindings } from '../../../lib/terminalSetup.js' -import type { DetailsMode, Msg, PanelSection } from '../../../types.js' +import type { Msg, PanelSection } from '../../../types.js' import type { StatusBarMode } from '../../interfaces.js' import { patchOverlayState } from '../../overlayStore.js' import { patchUiState } from '../../uiStore.js' @@ -38,7 +38,11 @@ const flagFromArg = (arg: string, current: boolean): boolean | null => { return null } -const DETAIL_MODES = new Set(['collapsed', 'cycle', 'expanded', 'hidden', 'toggle']) +const RESET_WORDS = new Set(['reset', 'clear', 'default']) +const CYCLE_WORDS = new Set(['cycle', 'toggle']) +const DETAILS_USAGE = + 'usage: /details [hidden|collapsed|expanded|cycle] or /details
[hidden|collapsed|expanded|reset]' +const DETAILS_SECTION_USAGE = 'usage: /details
[hidden|collapsed|expanded|reset]' export const coreCommands: SlashCommand[] = [ { @@ -57,7 +61,11 @@ export const coreCommands: SlashCommand[] = [ sections.push( { rows: [ - ['/details [hidden|collapsed|expanded|cycle]', 'set agent detail visibility mode'], + ['/details [hidden|collapsed|expanded|cycle]', 'set global agent detail visibility mode'], + [ + '/details
[hidden|collapsed|expanded|reset]', + 'override one section (thinking/tools/subagents/activity)' + ], ['/fortune [random|daily]', 'show a random or daily local fortune'] ], title: 'TUI' @@ -140,7 +148,7 @@ export const coreCommands: SlashCommand[] = [ { aliases: ['detail'], - help: 'control agent detail visibility', + help: 'control agent detail visibility (global or per-section)', name: 'details', run: (arg, ctx) => { const { gateway, transcript, ui } = ctx @@ -149,31 +157,48 @@ export const coreCommands: SlashCommand[] = [ gateway .rpc('config.get', { key: 'details_mode' }) .then(r => { - if (ctx.stale()) { - return - } + if (ctx.stale()) return const mode = parseDetailsMode(r?.value) ?? ui.detailsMode - patchUiState({ detailsMode: mode }) - transcript.sys(`details: ${mode}`) - }) - .catch(() => { - if (!ctx.stale()) { - transcript.sys(`details: ${ui.detailsMode}`) - } + + const overrides = SECTION_NAMES.filter(s => ui.sections[s]) + .map(s => `${s}=${ui.sections[s]}`) + .join(' ') + + transcript.sys(`details: ${mode}${overrides ? ` (${overrides})` : ''}`) }) + .catch(() => !ctx.stale() && transcript.sys(`details: ${ui.detailsMode}`)) return } - const mode = arg.trim().toLowerCase() + const [first, second] = arg.trim().toLowerCase().split(/\s+/) - if (!DETAIL_MODES.has(mode)) { - return transcript.sys('usage: /details [hidden|collapsed|expanded|cycle]') + if (second && isSectionName(first)) { + const reset = RESET_WORDS.has(second) + const mode = reset ? null : parseDetailsMode(second) + + if (!reset && !mode) { + return transcript.sys(DETAILS_SECTION_USAGE) + } + + const { [first]: _drop, ...rest } = ui.sections + + patchUiState({ sections: mode ? { ...rest, [first]: mode } : rest }) + gateway + .rpc('config.set', { key: `details_mode.${first}`, value: mode ?? '' }) + .catch(() => {}) + transcript.sys(`details ${first}: ${mode ?? 'reset'}`) + + return } - const next = mode === 'cycle' || mode === 'toggle' ? nextDetailsMode(ui.detailsMode) : (mode as DetailsMode) + const next = CYCLE_WORDS.has(first ?? '') ? nextDetailsMode(ui.detailsMode) : parseDetailsMode(first) + + if (!next) { + return transcript.sys(DETAILS_USAGE) + } patchUiState({ detailsMode: next }) gateway.rpc('config.set', { key: 'details_mode', value: next }).catch(() => {}) @@ -221,7 +246,7 @@ export const coreCommands: SlashCommand[] = [ } writeOsc52Clipboard(target.text) - sys('sent OSC52 copy sequence (terminal support required)') + sys(`copied ${target.text.length} chars`) } }, diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts index 5f17667f0..cf36fee6c 100644 --- a/ui-tui/src/app/slash/commands/session.ts +++ b/ui-tui/src/app/slash/commands/session.ts @@ -184,15 +184,64 @@ export const sessionCommands: SlashCommand[] = [ }, { - help: 'toggle voice input', + help: 'voice mode: [on|off|tts|status]', name: 'voice', run: (arg, ctx) => { - const action = arg === 'on' || arg === 'off' ? arg : 'status' + const normalized = (arg ?? '').trim().toLowerCase() + + const action = + normalized === 'on' || normalized === 'off' || normalized === 'tts' || normalized === 'status' + ? normalized + : 'status' ctx.gateway.rpc('voice.toggle', { action }).then( ctx.guarded(r => { ctx.voice.setVoiceEnabled(!!r.enabled) - ctx.transcript.sys(`voice: ${r.enabled ? 'on' : 'off'}`) + + // Match CLI's _show_voice_status / _enable_voice_mode / + // _toggle_voice_tts output shape so users don't have to learn + // two vocabularies. + if (action === 'status') { + const mode = r.enabled ? 'ON' : 'OFF' + const tts = r.tts ? 'ON' : 'OFF' + ctx.transcript.sys('Voice Mode Status') + ctx.transcript.sys(` Mode: ${mode}`) + ctx.transcript.sys(` TTS: ${tts}`) + ctx.transcript.sys(' Record key: Ctrl+B') + + // CLI's "Requirements:" block — surfaces STT/audio setup issues + // so the user sees "STT provider: MISSING ..." instead of + // silently failing on every Ctrl+B press. + if (r.details) { + ctx.transcript.sys('') + ctx.transcript.sys(' Requirements:') + + for (const line of r.details.split('\n')) { + if (line.trim()) { + ctx.transcript.sys(` ${line}`) + } + } + } + + return + } + + if (action === 'tts') { + ctx.transcript.sys(`Voice TTS ${r.tts ? 'enabled' : 'disabled'}.`) + + return + } + + // on/off — mirror cli.py:_enable_voice_mode's 3-line output + if (r.enabled) { + const tts = r.tts ? ' (TTS enabled)' : '' + ctx.transcript.sys(`Voice mode enabled${tts}`) + ctx.transcript.sys(' Ctrl+B to start/stop recording') + ctx.transcript.sys(' /voice tts to toggle speech output') + ctx.transcript.sys(' /voice off to disable voice mode') + } else { + ctx.transcript.sys('Voice mode disabled.') + } }) ) } diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index 804394bb1..f45cab241 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -19,6 +19,26 @@ const INTERRUPT_COOLDOWN_MS = 1500 const ACTIVITY_LIMIT = 8 const TRAIL_LIMIT = 8 +// Extracts the raw patch from a diff-only segment produced by +// pushInlineDiffSegment. Used at message.complete to dedupe against final +// assistant text that narrates the same patch. Returns null for anything +// else so real assistant narration never gets touched. +const diffSegmentBody = (msg: Msg): null | string => { + if (msg.kind !== 'diff') { + return null + } + + const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/) + + return m ? m[1]! : null +} + +const insertBeforeFirstDiff = (segments: Msg[], msg: Msg): Msg[] => { + const index = segments.findIndex(segment => segment.kind === 'diff') + + return index < 0 ? [...segments, msg] : [...segments.slice(0, index), msg, ...segments.slice(index)] +} + export interface InterruptDeps { appendMessage: (msg: Msg) => void gw: { request: (method: string, params?: Record) => Promise } @@ -40,7 +60,6 @@ class TurnController { bufRef = '' interrupted = false lastStatusNote = '' - pendingInlineDiffs: string[] = [] persistedToolLabels = new Set() persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise protocolWarned = false @@ -79,7 +98,6 @@ class TurnController { this.activeTools = [] this.streamTimer = clear(this.streamTimer) this.bufRef = '' - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.segmentMessages = [] @@ -186,18 +204,35 @@ class TurnController { }, REASONING_PULSE_MS) } - queueInlineDiff(diffText: string) { + pushInlineDiffSegment(diffText: string) { // Strip CLI chrome the gateway emits before the unified diff (e.g. a // leading "┊ review diff" header written by `_emit_inline_diff` for the // terminal printer). That header only makes sense as stdout dressing, // not inside a markdown ```diff block. - const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim() + const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim() - if (!text || this.pendingInlineDiffs.includes(text)) { + if (!stripped) { return } - this.pendingInlineDiffs = [...this.pendingInlineDiffs, text] + // Flush any in-progress streaming text as its own segment first, so the + // diff lands BETWEEN the assistant narration that preceded the edit and + // whatever the agent streams afterwards — not glued onto the final + // message. This is the whole point of segment-anchored diffs: the diff + // renders where the edit actually happened. + this.flushStreamingSegment() + + const block = `\`\`\`diff\n${stripped}\n\`\`\`` + + // Skip consecutive duplicates (same tool firing tool.complete twice, or + // two edits producing the same patch). Keeping this cheap — deeper + // dedupe against the final assistant text happens at message.complete. + if (this.segmentMessages.at(-1)?.text === block) { + return + } + + this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }] + patchTurnState({ streamSegments: this.segmentMessages }) } pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) { @@ -234,7 +269,6 @@ class TurnController { this.idle() this.clearReasoning() this.clearStatusTimer() - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.segmentMessages = [] this.turnTools = [] @@ -245,35 +279,49 @@ class TurnController { const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart() const split = splitReasoning(rawText) const finalText = split.text - // Skip appending if the assistant already narrated the diff inside a - // markdown fence of its own — otherwise we render two stacked diff - // blocks for the same edit. - const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText) - - const remainingInlineDiffs = assistantAlreadyHasDiff - ? [] - : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff)) - - const inlineDiffBlock = remainingInlineDiffs.length - ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\`` - : '' - - const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n') const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim() const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n') const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0 const savedToolTokens = this.toolTokenAcc const tools = this.pendingSegmentTools - const finalMessages = [...this.segmentMessages] - if (mergedText) { + // Drop diff-only segments the agent is about to narrate in the final + // reply. Without this, a closing "here's the diff …" message would + // render two stacked copies of the same patch. Only touches segments + // with `kind: 'diff'` emitted by pushInlineDiffSegment — real + // assistant narration stays put. + const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText) + + const segments = this.segmentMessages.filter(msg => { + const body = diffSegmentBody(msg) + + return body === null || (!finalHasOwnDiffFence && !finalText.includes(body)) + }) + + const hasDiffSegment = segments.some(msg => msg.kind === 'diff') + const detailsBelongBeforeDiff = hasDiffSegment && (tools.length > 0 || Boolean(savedReasoning)) + const finalMessages = detailsBelongBeforeDiff + ? insertBeforeFirstDiff(segments, { + kind: 'trail', + role: 'system', + text: '', + thinking: savedReasoning || undefined, + thinkingTokens: savedReasoning ? savedReasoningTokens : undefined, + toolTokens: savedToolTokens || undefined, + ...(tools.length && { tools }) + }) + : [...segments] + + if (finalText) { finalMessages.push({ role: 'assistant', - text: mergedText, - thinking: savedReasoning || undefined, - thinkingTokens: savedReasoning ? savedReasoningTokens : undefined, - toolTokens: savedToolTokens || undefined, - ...(tools.length && { tools }) + text: finalText, + ...(!detailsBelongBeforeDiff && { + thinking: savedReasoning || undefined, + thinkingTokens: savedReasoning ? savedReasoningTokens : undefined, + toolTokens: savedToolTokens || undefined, + ...(tools.length && { tools }) + }) }) } @@ -300,7 +348,7 @@ class TurnController { this.bufRef = '' patchTurnState({ activity: [], outcome: '' }) - return { finalMessages, finalText: mergedText, wasInterrupted } + return { finalMessages, finalText, wasInterrupted } } recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) { @@ -406,7 +454,6 @@ class TurnController { this.bufRef = '' this.interrupted = false this.lastStatusNote = '' - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.protocolWarned = false this.segmentMessages = [] @@ -452,7 +499,6 @@ class TurnController { this.endReasoningPhase() this.clearReasoning() this.activeTools = [] - this.pendingInlineDiffs = [] this.turnTools = [] this.toolTokenAcc = 0 this.persistedToolLabels.clear() diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts index fcf2e5d88..0b3fd9740 100644 --- a/ui-tui/src/app/uiStore.ts +++ b/ui-tui/src/app/uiStore.ts @@ -12,6 +12,7 @@ const buildUiState = (): UiState => ({ detailsMode: 'collapsed', info: null, inlineDiffs: true, + sections: {}, showCost: false, showReasoning: false, sid: null, diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts index 9e7c93ce9..cb98eed81 100644 --- a/ui-tui/src/app/useConfigSync.ts +++ b/ui-tui/src/app/useConfigSync.ts @@ -1,6 +1,6 @@ import { useEffect, useRef } from 'react' -import { resolveDetailsMode } from '../domain/details.js' +import { resolveDetailsMode, resolveSections } from '../domain/details.js' import type { GatewayClient } from '../gatewayClient.js' import type { ConfigFullResponse, @@ -46,6 +46,7 @@ export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolea compact: !!d.tui_compact, detailsMode: resolveDetailsMode(d), inlineDiffs: d.inline_diffs !== false, + sections: resolveSections(d.sections), showCost: !!d.show_cost, showReasoning: !!d.show_reasoning, statusBar: normalizeStatusBar(d.tui_statusbar), diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index 72cd5b9e5..47fe8a216 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -8,7 +8,7 @@ import type { SudoRespondResponse, VoiceRecordResponse } from '../gatewayTypes.js' -import { isAction, isMac } from '../lib/platform.js' +import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js' import { getInputSelection } from './inputSelectionStore.js' import type { InputHandlerContext, InputHandlerResult } from './interfaces.js' @@ -134,45 +134,43 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { } } - const voiceStop = () => { - voice.setRecording(false) - voice.setProcessing(true) + // CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop + // (NOT the voice-mode umbrella bit). The mode is enabled via /voice on; + // Ctrl+B while the mode is off sys-nudges the user. While the mode is + // on, the first press starts a continuous loop (gateway → start_continuous, + // VAD auto-stop → transcribe → auto-restart), a subsequent press stops it. + // The gateway publishes voice.status + voice.transcript events that + // createGatewayEventHandler turns into UI badges and composer injection. + const voiceRecordToggle = () => { + if (!voice.enabled) { + return actions.sys('voice: mode is off — enable with /voice on') + } + + const starting = !voice.recording + const action = starting ? 'start' : 'stop' + + // Optimistic UI — flip the REC badge immediately so the user gets + // feedback while the RPC round-trips; the voice.status event is the + // authoritative source and may correct us. + if (starting) { + voice.setRecording(true) + } else { + voice.setRecording(false) + voice.setProcessing(false) + } gateway - .rpc('voice.record', { action: 'stop' }) - .then(r => { - if (!r) { - return + .rpc('voice.record', { action }) + .catch((e: Error) => { + // Revert optimistic UI on failure. + if (starting) { + voice.setRecording(false) } - const transcript = String(r.text || '').trim() - - if (!transcript) { - return actions.sys('voice: no speech detected') - } - - cActions.setInput(prev => (prev ? `${prev}${/\s$/.test(prev) ? '' : ' '}${transcript}` : transcript)) - }) - .catch((e: Error) => actions.sys(`voice error: ${e.message}`)) - .finally(() => { - voice.setProcessing(false) - patchUiState({ status: 'ready' }) + actions.sys(`voice error: ${e.message}`) }) } - const voiceStart = () => - gateway - .rpc('voice.record', { action: 'start' }) - .then(r => { - if (!r) { - return - } - - voice.setRecording(true) - patchUiState({ status: 'recording…' }) - }) - .catch((e: Error) => actions.sys(`voice error: ${e.message}`)) - useInput((ch, key) => { const live = getUiState() @@ -370,8 +368,8 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { return actions.newSession() } - if (isAction(key, ch, 'b')) { - return voice.recording ? voiceStop() : voiceStart() + if (isVoiceToggleKey(key, ch)) { + return voiceRecordToggle() } if (isAction(key, ch, 'g')) { diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 41edcc828..d2e5494a9 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -4,6 +4,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { STARTUP_RESUME_ID } from '../config/env.js' import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js' +import { SECTION_NAMES, sectionMode } from '../domain/details.js' import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js' import { fmtCwdBranch, shortCwd } from '../domain/paths.js' import { type GatewayClient } from '../gatewayClient.js' @@ -454,13 +455,20 @@ export function useMainApp(gw: GatewayClient) { composer: { actions: composerActions, refs: composerRefs, state: composerState }, gateway, terminal: { hasSelection, scrollRef, scrollWithSelection, selection, stdout }, - voice: { recording: voiceRecording, setProcessing: setVoiceProcessing, setRecording: setVoiceRecording }, + voice: { + enabled: voiceEnabled, + recording: voiceRecording, + setProcessing: setVoiceProcessing, + setRecording: setVoiceRecording, + setVoiceEnabled + }, wheelStep: WHEEL_SCROLL_STEP }) const onEvent = useMemo( () => createGatewayEventHandler({ + composer: { setInput: composerActions.setInput }, gateway, session: { STARTUP_RESUME_ID, @@ -470,18 +478,29 @@ export function useMainApp(gw: GatewayClient) { resumeById: session.resumeById, setCatalog }, + submission: { submitRef }, system: { bellOnComplete, stdout, sys }, - transcript: { appendMessage, panel, setHistoryItems } + transcript: { appendMessage, panel, setHistoryItems }, + voice: { + setProcessing: setVoiceProcessing, + setRecording: setVoiceRecording, + setVoiceEnabled + } }), [ appendMessage, bellOnComplete, + composerActions.setInput, gateway, panel, session.newSession, session.resetSession, session.resumeById, + setVoiceEnabled, + setVoiceProcessing, + setVoiceRecording, stdout, + submitRef, sys ] ) @@ -612,11 +631,15 @@ export function useMainApp(gw: GatewayClient) { const hasReasoning = Boolean(turn.reasoning.trim()) - const showProgressArea = - ui.detailsMode === 'hidden' - ? turn.activity.some(item => item.tone !== 'info') - : Boolean( - ui.busy || + // Per-section overrides win over the global mode — when every section is + // resolved to hidden, the only thing ToolTrail will surface is the + // floating-alert backstop (errors/warnings). Mirror that so we don't + // render an empty wrapper Box above the streaming area in quiet mode. + const anyPanelVisible = SECTION_NAMES.some(s => sectionMode(s, ui.detailsMode, ui.sections) !== 'hidden') + + const showProgressArea = anyPanelVisible + ? Boolean( + ui.busy || turn.outcome || turn.streamPendingTools.length || turn.streamSegments.length || @@ -625,7 +648,8 @@ export function useMainApp(gw: GatewayClient) { turn.turnTrail.length || hasReasoning || turn.activity.length - ) + ) + : turn.activity.some(item => item.tone !== 'info') const appActions = useMemo( () => ({ @@ -698,7 +722,9 @@ export function useMainApp(gw: GatewayClient) { statusColor: statusColorOf(ui.status, ui.theme.color), stickyPrompt, turnStartedAt: ui.sid ? turnStartedAt : null, - voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}` + // CLI parity: the classic prompt_toolkit status bar shows a red dot + // on REC (cli.py:_get_voice_status_fragments line 2344). + voiceLabel: voiceRecording ? '● REC' : voiceProcessing ? '◉ STT' : `voice ${voiceEnabled ? 'on' : 'off'}` }), [ cwd, diff --git a/ui-tui/src/app/useSessionLifecycle.ts b/ui-tui/src/app/useSessionLifecycle.ts index acd10135e..baaf3fc3c 100644 --- a/ui-tui/src/app/useSessionLifecycle.ts +++ b/ui-tui/src/app/useSessionLifecycle.ts @@ -142,6 +142,10 @@ export function useSessionLifecycle(opts: UseSessionLifecycleOptions) { sys(`warning: ${info.credential_warning}`) } + if (info?.config_warning) { + sys(`warning: ${info.config_warning}`) + } + if (msg) { sys(msg) } diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx index 8de2a6301..7b697eedc 100644 --- a/ui-tui/src/components/appChrome.tsx +++ b/ui-tui/src/components/appChrome.tsx @@ -215,7 +215,20 @@ export function StatusRule({ ) : null} - {voiceLabel ? │ {voiceLabel} : null} + {voiceLabel ? ( + + {' │ '} + {voiceLabel} + + ) : null} {bgCount > 0 ? │ {bgCount} bg : null} {showCost && typeof usage.cost_usd === 'number' ? ( │ ${usage.cost_usd.toFixed(4)} diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx index 164ef5dd4..d85645175 100644 --- a/ui-tui/src/components/appLayout.tsx +++ b/ui-tui/src/components/appLayout.tsx @@ -8,7 +8,7 @@ import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStor import { $uiState } from '../app/uiStore.js' import { PLACEHOLDER } from '../content/placeholders.js' import type { Theme } from '../theme.js' -import type { DetailsMode } from '../types.js' +import type { DetailsMode, SectionVisibility } from '../types.js' import { AgentsOverlay } from './agentsOverlay.js' import { GoodVibesHeart, StatusRule, StickyPromptTracker, TranscriptScrollbar } from './appChrome.js' @@ -25,6 +25,7 @@ const StreamingAssistant = memo(function StreamingAssistant({ compact, detailsMode, progress, + sections, t }: StreamingAssistantProps) { if (!progress.showProgressArea && !progress.showStreamingArea) { @@ -34,7 +35,15 @@ const StreamingAssistant = memo(function StreamingAssistant({ return ( <> {progress.streamSegments.map((msg, i) => ( - + ))} {progress.showProgressArea && ( @@ -48,6 +57,7 @@ const StreamingAssistant = memo(function StreamingAssistant({ reasoningActive={progress.reasoningActive} reasoningStreaming={progress.reasoningStreaming} reasoningTokens={progress.reasoningTokens} + sections={sections} subagents={progress.subagents} t={t} tools={progress.tools} @@ -68,6 +78,7 @@ const StreamingAssistant = memo(function StreamingAssistant({ text: progress.streaming, ...(progress.streamPendingTools.length && { tools: progress.streamPendingTools }) }} + sections={sections} t={t} /> )} @@ -78,6 +89,7 @@ const StreamingAssistant = memo(function StreamingAssistant({ compact={compact} detailsMode={detailsMode} msg={{ kind: 'trail', role: 'system', text: '', tools: progress.streamPendingTools }} + sections={sections} t={t} /> )} @@ -115,6 +127,7 @@ const TranscriptPane = memo(function TranscriptPane({ compact={ui.compact} detailsMode={ui.detailsMode} msg={row.msg} + sections={ui.sections} t={ui.theme} /> )} @@ -129,6 +142,7 @@ const TranscriptPane = memo(function TranscriptPane({ compact={ui.compact} detailsMode={ui.detailsMode} progress={progress} + sections={ui.sections} t={ui.theme} /> @@ -185,56 +199,58 @@ const ComposerPane = memo(function ComposerPane({ - {!isBlocked && ( - - + + - {composer.inputBuf.map((line, i) => ( - - - {i === 0 ? `${ui.theme.brand.prompt} ` : ' '} + {!isBlocked && ( + <> + {composer.inputBuf.map((line, i) => ( + + + {i === 0 ? `${ui.theme.brand.prompt} ` : ' '} + + + {line || ' '} + + ))} + + + + {sh ? ( + $ + ) : ( + + {composer.inputBuf.length ? ' ' : `${ui.theme.brand.prompt} `} + + )} - {line || ' '} - - ))} + + {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */} + - - - {sh ? ( - $ - ) : ( - - {composer.inputBuf.length ? ' ' : `${ui.theme.brand.prompt} `} - - )} - - - - {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */} - - - - + + + - - - )} + + )} + {!composer.empty && !ui.sid && ⚕ {ui.status}} @@ -335,5 +351,6 @@ interface StreamingAssistantProps { compact?: boolean detailsMode: DetailsMode progress: AppLayoutProgressProps + sections?: SectionVisibility t: Theme } diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx index 8d77a49e5..3fc40528a 100644 --- a/ui-tui/src/components/messageLine.tsx +++ b/ui-tui/src/components/messageLine.tsx @@ -1,12 +1,13 @@ import { Ansi, Box, NoSelect, Text } from '@hermes/ink' import { memo } from 'react' +import { sectionMode } from '../domain/details.js' import { LONG_MSG } from '../config/limits.js' import { userDisplay } from '../domain/messages.js' import { ROLE } from '../domain/roles.js' import { compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js' import type { Theme } from '../theme.js' -import type { DetailsMode, Msg } from '../types.js' +import type { DetailsMode, Msg, SectionVisibility } from '../types.js' import { Md } from './markdown.js' import { ToolTrail } from './thinking.js' @@ -17,14 +18,35 @@ export const MessageLine = memo(function MessageLine({ detailsMode = 'collapsed', isStreaming = false, msg, + sections, t }: MessageLineProps) { - if (msg.kind === 'trail' && msg.tools?.length) { - return detailsMode === 'hidden' ? null : ( + // Per-section overrides win over the global mode, so resolve each section + // we might consume here once and gate visibility on the *content-bearing* + // sections only — never on the global mode. A `trail` message feeds Tool + // calls + Activity; an assistant message with thinking/tools metadata + // feeds Thinking + Tool calls. Gating on every section would let + // `thinking` (expanded by default) keep an empty wrapper alive when only + // `tools` is hidden — exactly the empty-Box bug Copilot caught. + const thinkingMode = sectionMode('thinking', detailsMode, sections) + const toolsMode = sectionMode('tools', detailsMode, sections) + const activityMode = sectionMode('activity', detailsMode, sections) + const thinking = msg.thinking?.trim() ?? '' + + if (msg.kind === 'trail' && (msg.tools?.length || thinking)) { + return thinkingMode !== 'hidden' || toolsMode !== 'hidden' || activityMode !== 'hidden' ? ( - + - ) + ) : null } if (msg.role === 'tool') { @@ -48,8 +70,10 @@ export const MessageLine = memo(function MessageLine({ } const { body, glyph, prefix } = ROLE[msg.role](t) - const thinking = msg.thinking?.trim() ?? '' - const showDetails = detailsMode !== 'hidden' && (Boolean(msg.tools?.length) || Boolean(thinking)) + + const showDetails = + (toolsMode !== 'hidden' && Boolean(msg.tools?.length)) || + (thinkingMode !== 'hidden' && Boolean(thinking)) const content = (() => { if (msg.kind === 'slash') { @@ -81,11 +105,16 @@ export const MessageLine = memo(function MessageLine({ return {msg.text} })() + // Diff segments (emitted by pushInlineDiffSegment between narration + // segments) need a blank line on both sides so the patch doesn't butt up + // against the prose around it. + const isDiffSegment = msg.kind === 'diff' + return ( {showDetails && ( @@ -93,6 +122,7 @@ export const MessageLine = memo(function MessageLine({ detailsMode={detailsMode} reasoning={thinking} reasoningTokens={msg.thinkingTokens} + sections={sections} t={t} toolTokens={msg.toolTokens} trail={msg.tools} @@ -119,5 +149,6 @@ interface MessageLineProps { detailsMode?: DetailsMode isStreaming?: boolean msg: Msg + sections?: SectionVisibility t: Theme } diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx index e91143c00..394c3c67a 100644 --- a/ui-tui/src/components/textInput.tsx +++ b/ui-tui/src/components/textInput.tsx @@ -623,7 +623,19 @@ export function TextInput({ return } - if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) { + // Ctrl+B is the documented voice-recording toggle (see platform.ts → + // isVoiceToggleKey). Pass it through so the app-level handler in + // useInputHandlers receives it instead of being swallowed here as + // either backward-word nav (line below) or a literal 'b' insertion. + if ( + (k.ctrl && inp === 'c') || + (k.ctrl && inp === 'b') || + k.tab || + (k.shift && k.tab) || + k.pageUp || + k.pageDown || + k.escape + ) { return } diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx index a59cdc41d..e2cfc4766 100644 --- a/ui-tui/src/components/thinking.tsx +++ b/ui-tui/src/components/thinking.tsx @@ -1,8 +1,9 @@ import { Box, NoSelect, Text } from '@hermes/ink' -import { memo, type ReactNode, useEffect, useMemo, useState } from 'react' +import { memo, useEffect, useMemo, useState, type ReactNode } from 'react' import spinners, { type BrailleSpinnerName } from 'unicode-animations' import { THINKING_COT_MAX } from '../config/limits.js' +import { sectionMode } from '../domain/details.js' import { buildSubagentTree, fmtCost, @@ -25,7 +26,15 @@ import { toolTrailLabel } from '../lib/text.js' import type { Theme } from '../theme.js' -import type { ActiveTool, ActivityItem, DetailsMode, SubagentNode, SubagentProgress, ThinkingMode } from '../types.js' +import type { + ActiveTool, + ActivityItem, + DetailsMode, + SectionVisibility, + SubagentNode, + SubagentProgress, + ThinkingMode +} from '../types.js' const THINK: BrailleSpinnerName[] = ['helix', 'breathe', 'orbit', 'dna', 'waverows', 'snake', 'pulse'] const TOOL: BrailleSpinnerName[] = ['cascade', 'scan', 'diagswipe', 'fillsweep', 'rain', 'columns', 'sparkle'] @@ -383,6 +392,9 @@ function SubagentAccordion({ const hasTools = item.tools.length > 0 const noteRows = [...(summary ? [summary] : []), ...item.notes] const hasNotes = noteRows.length > 0 + // `showChildren` only seeds the recursive `expanded` prop for nested + // subagents — it MUST NOT be OR-ed into the local section toggles, or + // expand-all permanently locks the inner chevrons open. const showChildren = expanded || deep const noteColor = statusTone === 'error' ? t.color.error : statusTone === 'warn' ? t.color.warn : t.color.dim @@ -405,13 +417,13 @@ function SubagentAccordion({ setOpenThinking(v => !v) } }} - open={showChildren || openThinking} + open={openThinking} t={t} title="Thinking" /> ), key: 'thinking', - open: showChildren || openThinking, + open: openThinking, render: childRails => ( !v) } }} - open={showChildren || openTools} + open={openTools} t={t} title="Tool calls" /> ), key: 'tools', - open: showChildren || openTools, + open: openTools, render: childRails => ( {item.tools.map((line, index) => ( @@ -479,14 +491,14 @@ function SubagentAccordion({ setOpenNotes(v => !v) } }} - open={showChildren || openNotes} + open={openNotes} t={t} title="Progress" tone={statusTone} /> ), key: 'notes', - open: showChildren || openNotes, + open: openNotes, render: childRails => ( {noteRows.map((line, index) => ( @@ -519,14 +531,14 @@ function SubagentAccordion({ setOpenKids(v => !v) } }} - open={showChildren || openKids} + open={openKids} suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`} t={t} title="Spawned" /> ), key: 'subagents', - open: showChildren || openKids, + open: openKids, render: childRails => ( {children.map((child, i) => ( @@ -675,6 +687,7 @@ export const ToolTrail = memo(function ToolTrail({ reasoning = '', reasoningTokens, reasoningStreaming = false, + sections, subagents = [], t, tools = [], @@ -689,6 +702,7 @@ export const ToolTrail = memo(function ToolTrail({ reasoning?: string reasoningTokens?: number reasoningStreaming?: boolean + sections?: SectionVisibility subagents?: SubagentProgress[] t: Theme tools?: ActiveTool[] @@ -696,38 +710,46 @@ export const ToolTrail = memo(function ToolTrail({ trail?: string[] activity?: ActivityItem[] }) { + const visible = useMemo( + () => ({ + thinking: sectionMode('thinking', detailsMode, sections), + tools: sectionMode('tools', detailsMode, sections), + subagents: sectionMode('subagents', detailsMode, sections), + activity: sectionMode('activity', detailsMode, sections) + }), + [detailsMode, sections] + ) + const [now, setNow] = useState(() => Date.now()) - const [openThinking, setOpenThinking] = useState(false) - const [openTools, setOpenTools] = useState(false) - const [openSubagents, setOpenSubagents] = useState(false) - const [deepSubagents, setDeepSubagents] = useState(false) - const [openMeta, setOpenMeta] = useState(false) + // Local toggles own the open state once mounted. Init from the resolved + // section visibility so default-expanded sections (thinking/tools) render + // open on first paint; the useEffect below re-syncs when the user mutates + // visibility at runtime via /details. NEVER OR these against + // `visible.X === 'expanded'` at render time — that locks the panel open + // and silently breaks manual chevron clicks for default-expanded + // sections (regression caught after #14968). + const [openThinking, setOpenThinking] = useState(visible.thinking === 'expanded') + const [openTools, setOpenTools] = useState(visible.tools === 'expanded') + const [openSubagents, setOpenSubagents] = useState(visible.subagents === 'expanded') + const [deepSubagents, setDeepSubagents] = useState(visible.subagents === 'expanded') + const [openMeta, setOpenMeta] = useState(visible.activity === 'expanded') useEffect(() => { - if (!tools.length || (detailsMode === 'collapsed' && !openTools)) { + if (!tools.length || (visible.tools !== 'expanded' && !openTools)) { return } const id = setInterval(() => setNow(Date.now()), 500) return () => clearInterval(id) - }, [detailsMode, openTools, tools.length]) + }, [openTools, tools.length, visible.tools]) useEffect(() => { - if (detailsMode === 'expanded') { - setOpenThinking(true) - setOpenTools(true) - setOpenSubagents(true) - setOpenMeta(true) - } - - if (detailsMode === 'hidden') { - setOpenThinking(false) - setOpenTools(false) - setOpenSubagents(false) - setOpenMeta(false) - } - }, [detailsMode]) + setOpenThinking(visible.thinking === 'expanded') + setOpenTools(visible.tools === 'expanded') + setOpenSubagents(visible.subagents === 'expanded') + setOpenMeta(visible.activity === 'expanded') + }, [visible]) const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning]) @@ -862,9 +884,22 @@ export const ToolTrail = memo(function ToolTrail({ const delegateGroups = groups.filter(g => g.label.startsWith('Delegate Task')) const inlineDelegateKey = hasSubagents && delegateGroups.length === 1 ? delegateGroups[0]!.key : null - // ── Hidden: errors/warnings only ────────────────────────────── + // ── Backstop: floating alerts when every panel is hidden ───────── + // + // Per-section overrides win over the global details_mode (they're computed + // by sectionMode), so we only collapse to nothing when EVERY section is + // resolved to hidden — that way `details_mode: hidden` + `sections.tools: + // expanded` still renders the tools panel. When all panels are hidden + // AND ambient errors/warnings exist, surface them as a compact inline + // backstop so quiet-mode users aren't blind to failures. - if (detailsMode === 'hidden') { + const allHidden = + visible.thinking === 'hidden' && + visible.tools === 'hidden' && + visible.subagents === 'hidden' && + visible.activity === 'hidden' + + if (allHidden) { const alerts = activity.filter(i => i.tone !== 'info').slice(-2) return alerts.length ? ( @@ -879,13 +914,18 @@ export const ToolTrail = memo(function ToolTrail({ } // ── Tree render fragments ────────────────────────────────────── + // + // Shift+click on any chevron expands every NON-hidden section at once — + // hidden sections stay hidden so the override is honoured. const expandAll = () => { - setOpenThinking(true) - setOpenTools(true) - setOpenSubagents(true) - setDeepSubagents(true) - setOpenMeta(true) + if (visible.thinking !== 'hidden') setOpenThinking(true) + if (visible.tools !== 'hidden') setOpenTools(true) + if (visible.subagents !== 'hidden') { + setOpenSubagents(true) + setDeepSubagents(true) + } + if (visible.activity !== 'hidden') setOpenMeta(true) } const metaTone: 'dim' | 'error' | 'warn' = activity.some(i => i.tone === 'error') @@ -899,7 +939,7 @@ export const ToolTrail = memo(function ToolTrail({ {spawnTree.map((node, index) => ( ) - const sections: { + const panels: { header: ReactNode key: string open: boolean render: (rails: boolean[]) => ReactNode }[] = [] - if (hasThinking) { - sections.push({ + if (hasThinking && visible.thinking !== 'hidden') { + panels.push({ header: ( { @@ -930,7 +970,7 @@ export const ToolTrail = memo(function ToolTrail({ }} > - {detailsMode === 'expanded' || openThinking ? '▾ ' : '▸ '} + {openThinking ? '▾ ' : '▸ '} {thinkingLive ? ( Thinking @@ -950,7 +990,7 @@ export const ToolTrail = memo(function ToolTrail({ ), key: 'thinking', - open: detailsMode === 'expanded' || openThinking, + open: openThinking, render: rails => ( !v) } }} - open={detailsMode === 'expanded' || openTools} + open={openTools} suffix={toolTokensLabel} t={t} title="Tool calls" /> ), key: 'tools', - open: detailsMode === 'expanded' || openTools, + open: openTools, render: rails => ( {groups.map((group, index) => { @@ -1024,12 +1064,12 @@ export const ToolTrail = memo(function ToolTrail({ }) } - if (hasSubagents && !inlineDelegateKey) { + if (hasSubagents && !inlineDelegateKey && visible.subagents !== 'hidden') { // Spark + summary give a one-line read on the branch shape before // opening the subtree. `/agents` opens the full-screen audit overlay. const suffix = spawnSpark ? `${spawnSummaryLabel} ${spawnSpark} (/agents)` : `${spawnSummaryLabel} (/agents)` - sections.push({ + panels.push({ header: ( ), key: 'subagents', - open: detailsMode === 'expanded' || openSubagents, + open: openSubagents, render: renderSubagentList }) } - if (hasMeta) { - sections.push({ + if (hasMeta && visible.activity !== 'hidden') { + panels.push({ header: ( !v) } }} - open={detailsMode === 'expanded' || openMeta} + open={openMeta} t={t} title="Activity" tone={metaTone} /> ), key: 'meta', - open: detailsMode === 'expanded' || openMeta, + open: openMeta, render: rails => ( {meta.map((row, index) => ( @@ -1092,19 +1132,19 @@ export const ToolTrail = memo(function ToolTrail({ }) } - const topCount = sections.length + (totalTokensLabel ? 1 : 0) + const topCount = panels.length + (totalTokensLabel ? 1 : 0) return ( - {sections.map((section, index) => ( + {panels.map((panel, index) => ( - {section.render} + {panel.render} ))} {totalTokensLabel ? ( diff --git a/ui-tui/src/domain/details.ts b/ui-tui/src/domain/details.ts index fa01092f5..079b08ea7 100644 --- a/ui-tui/src/domain/details.ts +++ b/ui-tui/src/domain/details.ts @@ -1,26 +1,65 @@ -import type { DetailsMode } from '../types.js' +import type { DetailsMode, SectionName, SectionVisibility } from '../types.js' const MODES = ['hidden', 'collapsed', 'expanded'] as const +export const SECTION_NAMES = ['thinking', 'tools', 'subagents', 'activity'] as const + +// Out-of-the-box per-section defaults — applied when the user hasn't pinned +// an explicit override and layered ABOVE the global details_mode: +// +// - thinking / tools: expanded — stream open so the turn reads like a +// live transcript (reasoning + tool calls side by side) instead of a +// wall of chevrons the user has to click every turn. +// - activity: hidden — ambient meta (gateway hints, terminal-parity +// nudges, background notifications) is noise for typical use. Tool +// failures still render inline on the failing tool row, and ambient +// errors/warnings surface via the floating-alert backstop when every +// panel resolves to hidden. +// - subagents: not set — falls through to the global details_mode so +// Spawn trees stay under a chevron until a delegation actually happens. +// +// Opt out of any of these with `display.sections.` in config.yaml +// or at runtime via `/details collapsed|hidden`. +const SECTION_DEFAULTS: SectionVisibility = { + thinking: 'expanded', + tools: 'expanded', + activity: 'hidden' +} + const THINKING_FALLBACK: Record = { collapsed: 'collapsed', full: 'expanded', truncated: 'collapsed' } -export const parseDetailsMode = (v: unknown): DetailsMode | null => { - const s = typeof v === 'string' ? v.trim().toLowerCase() : '' +const norm = (v: unknown) => + String(v ?? '') + .trim() + .toLowerCase() - return MODES.find(m => m === s) ?? null -} +export const parseDetailsMode = (v: unknown): DetailsMode | null => MODES.find(m => m === norm(v)) ?? null + +export const isSectionName = (v: unknown): v is SectionName => + typeof v === 'string' && (SECTION_NAMES as readonly string[]).includes(v) export const resolveDetailsMode = (d?: { details_mode?: unknown; thinking_mode?: unknown } | null): DetailsMode => - parseDetailsMode(d?.details_mode) ?? - THINKING_FALLBACK[ - String(d?.thinking_mode ?? '') - .trim() - .toLowerCase() - ] ?? - 'collapsed' + parseDetailsMode(d?.details_mode) ?? THINKING_FALLBACK[norm(d?.thinking_mode)] ?? 'collapsed' + +// Build SectionVisibility from a free-form blob. Unknown section names and +// invalid modes are dropped silently — partial overrides are intentional, so +// missing keys fall through to SECTION_DEFAULTS / global at lookup time. +export const resolveSections = (raw: unknown): SectionVisibility => + raw && typeof raw === 'object' && !Array.isArray(raw) + ? (Object.fromEntries( + Object.entries(raw as Record) + .map(([k, v]) => [k, parseDetailsMode(v)] as const) + .filter(([k, m]) => !!m && isSectionName(k)) + ) as SectionVisibility) + : {} + +// Effective mode for one section: explicit override → SECTION_DEFAULTS → global. +// Single source of truth for "is this section open by default / rendered at all". +export const sectionMode = (name: SectionName, global: DetailsMode, sections?: SectionVisibility): DetailsMode => + sections?.[name] ?? SECTION_DEFAULTS[name] ?? global export const nextDetailsMode = (m: DetailsMode): DetailsMode => MODES[(MODES.indexOf(m) + 1) % MODES.length]! diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 1dc8ea5be..91fced32a 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -55,6 +55,7 @@ export interface ConfigDisplayConfig { bell_on_complete?: boolean details_mode?: string inline_diffs?: boolean + sections?: Record show_cost?: boolean show_reasoning?: boolean streaming?: boolean @@ -92,7 +93,7 @@ export interface SetupStatusResponse { // ── Session lifecycle ──────────────────────────────────────────────── export interface SessionCreateResponse { - info?: SessionInfo & { credential_warning?: string } + info?: SessionInfo & { config_warning?: string; credential_warning?: string } session_id: string } @@ -236,10 +237,16 @@ export interface ImageAttachResponse { // ── Voice ──────────────────────────────────────────────────────────── export interface VoiceToggleResponse { + audio_available?: boolean + available?: boolean + details?: string enabled?: boolean + stt_available?: boolean + tts?: boolean } export interface VoiceRecordResponse { + status?: string text?: string } @@ -368,6 +375,8 @@ export type GatewayEvent = | { payload?: { text?: string }; session_id?: string; type: 'thinking.delta' } | { payload?: undefined; session_id?: string; type: 'message.start' } | { payload?: { kind?: string; text?: string }; session_id?: string; type: 'status.update' } + | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' } + | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' } | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' } | { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' } | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' } diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts index ab694baaf..9e85da16f 100644 --- a/ui-tui/src/lib/platform.ts +++ b/ui-tui/src/lib/platform.ts @@ -33,3 +33,17 @@ export const isMacActionFallback = ( /** Match action-modifier + a single character (case-insensitive). */ export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean => isActionMod(key) && ch.toLowerCase() === target + +/** + * Voice recording toggle key (Ctrl+B). + * + * Documented as "Ctrl+B" everywhere: tips.py, config.yaml's voice.record_key + * default, and the Python CLI prompt_toolkit handler. We accept raw Ctrl+B on + * every platform so the TUI matches those docs. On macOS we additionally + * accept Cmd+B (the platform action modifier) so existing macOS muscle memory + * keeps working. + */ +export const isVoiceToggleKey = ( + key: { ctrl: boolean; meta: boolean; super?: boolean }, + ch: string +): boolean => (key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b' diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index 63d6c6d4f..3fdb39b82 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -102,7 +102,7 @@ export interface ClarifyReq { export interface Msg { info?: SessionInfo - kind?: 'intro' | 'panel' | 'slash' | 'trail' + kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail' panelData?: PanelData role: Role text: string @@ -116,6 +116,14 @@ export type Role = 'assistant' | 'system' | 'tool' | 'user' export type DetailsMode = 'hidden' | 'collapsed' | 'expanded' export type ThinkingMode = 'collapsed' | 'truncated' | 'full' +// Per-section overrides for the agent details accordion. Resolution order +// at lookup time is: explicit `display.sections.` → built-in +// SECTION_DEFAULTS → global `details_mode`. Today the built-in defaults +// expand `thinking`/`tools` and hide `activity`; `subagents` falls through +// to the global mode. Any explicit value still wins for that one section. +export type SectionName = 'thinking' | 'tools' | 'subagents' | 'activity' +export type SectionVisibility = Partial> + export interface McpServerStatus { connected: boolean name: string diff --git a/uv.lock b/uv.lock index 080aefeb1..dfb2f786b 100644 --- a/uv.lock +++ b/uv.lock @@ -9,7 +9,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-16T11:49:00.318115Z" +exclude-newer = "2026-04-17T16:49:45.944715922Z" exclude-newer-span = "P7D" [[package]] @@ -1870,7 +1870,7 @@ wheels = [ [[package]] name = "hermes-agent" -version = "0.10.0" +version = "0.11.0" source = { editable = "." } dependencies = [ { name = "anthropic" }, diff --git a/web/package-lock.json b/web/package-lock.json index c522d8ba0..436b17bb7 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -8,10 +8,15 @@ "name": "web", "version": "0.0.0", "dependencies": { - "@nous-research/ui": "^0.3.0", + "@nous-research/ui": "^0.4.0", "@observablehq/plot": "^0.6.17", "@react-three/fiber": "^9.6.0", "@tailwindcss/vite": "^4.2.1", + "@xterm/addon-fit": "^0.11.0", + "@xterm/addon-unicode11": "^0.9.0", + "@xterm/addon-web-links": "^0.12.0", + "@xterm/addon-webgl": "^0.19.0", + "@xterm/xterm": "^6.0.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "gsap": "^3.15.0", @@ -70,6 +75,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -212,23 +218,23 @@ } }, "node_modules/@babel/helpers": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.28.6.tgz", - "integrity": "sha512-xOBvwq86HHdB7WUDTfKfT/Vuxh7gElQ+Sfti2Cy6yIWNW05P8iUslOVcZ4/sKbE+/jQaukQAdz/gf3724kYdqw==", + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.29.2.tgz", + "integrity": "sha512-HoGuUs4sCZNezVEKdVcwqmZN8GoHirLUcLaYVNBK2J0DadGtdcqgr3BCbvH8+XUo4NGjNl3VOtSjEKNzqfFgKw==", "dev": true, "license": "MIT", "dependencies": { "@babel/template": "^7.28.6", - "@babel/types": "^7.28.6" + "@babel/types": "^7.29.0" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/parser": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.0.tgz", - "integrity": "sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==", + "version": "7.29.2", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz", + "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==", "dev": true, "license": "MIT", "dependencies": { @@ -331,9 +337,9 @@ } }, "node_modules/@esbuild/aix-ppc64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.4.tgz", - "integrity": "sha512-cQPwL2mp2nSmHHJlCyoXgHGhbEPMrEEU5xhkcy3Hs/O7nGZqEpZ2sUtLaL9MORLtDfRvVl2/3PAuEkYZH0Ty8Q==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", "cpu": [ "ppc64" ], @@ -347,9 +353,9 @@ } }, "node_modules/@esbuild/android-arm": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.4.tgz", - "integrity": "sha512-X9bUgvxiC8CHAGKYufLIHGXPJWnr0OCdR0anD2e21vdvgCI8lIfqFbnoeOz7lBjdrAGUhqLZLcQo6MLhTO2DKQ==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", "cpu": [ "arm" ], @@ -363,9 +369,9 @@ } }, "node_modules/@esbuild/android-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.4.tgz", - "integrity": "sha512-gdLscB7v75wRfu7QSm/zg6Rx29VLdy9eTr2t44sfTW7CxwAtQghZ4ZnqHk3/ogz7xao0QAgrkradbBzcqFPasw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", "cpu": [ "arm64" ], @@ -379,9 +385,9 @@ } }, "node_modules/@esbuild/android-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.4.tgz", - "integrity": "sha512-PzPFnBNVF292sfpfhiyiXCGSn9HZg5BcAz+ivBuSsl6Rk4ga1oEXAamhOXRFyMcjwr2DVtm40G65N3GLeH1Lvw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", "cpu": [ "x64" ], @@ -395,9 +401,9 @@ } }, "node_modules/@esbuild/darwin-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.4.tgz", - "integrity": "sha512-b7xaGIwdJlht8ZFCvMkpDN6uiSmnxxK56N2GDTMYPr2/gzvfdQN8rTfBsvVKmIVY/X7EM+/hJKEIbbHs9oA4tQ==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", "cpu": [ "arm64" ], @@ -411,9 +417,9 @@ } }, "node_modules/@esbuild/darwin-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.4.tgz", - "integrity": "sha512-sR+OiKLwd15nmCdqpXMnuJ9W2kpy0KigzqScqHI3Hqwr7IXxBp3Yva+yJwoqh7rE8V77tdoheRYataNKL4QrPw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", "cpu": [ "x64" ], @@ -427,9 +433,9 @@ } }, "node_modules/@esbuild/freebsd-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.4.tgz", - "integrity": "sha512-jnfpKe+p79tCnm4GVav68A7tUFeKQwQyLgESwEAUzyxk/TJr4QdGog9sqWNcUbr/bZt/O/HXouspuQDd9JxFSw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", "cpu": [ "arm64" ], @@ -443,9 +449,9 @@ } }, "node_modules/@esbuild/freebsd-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.4.tgz", - "integrity": "sha512-2kb4ceA/CpfUrIcTUl1wrP/9ad9Atrp5J94Lq69w7UwOMolPIGrfLSvAKJp0RTvkPPyn6CIWrNy13kyLikZRZQ==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", "cpu": [ "x64" ], @@ -459,9 +465,9 @@ } }, "node_modules/@esbuild/linux-arm": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.4.tgz", - "integrity": "sha512-aBYgcIxX/wd5n2ys0yESGeYMGF+pv6g0DhZr3G1ZG4jMfruU9Tl1i2Z+Wnj9/KjGz1lTLCcorqE2viePZqj4Eg==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", "cpu": [ "arm" ], @@ -475,9 +481,9 @@ } }, "node_modules/@esbuild/linux-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.4.tgz", - "integrity": "sha512-7nQOttdzVGth1iz57kxg9uCz57dxQLHWxopL6mYuYthohPKEK0vU0C3O21CcBK6KDlkYVcnDXY099HcCDXd9dA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", "cpu": [ "arm64" ], @@ -491,9 +497,9 @@ } }, "node_modules/@esbuild/linux-ia32": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.4.tgz", - "integrity": "sha512-oPtixtAIzgvzYcKBQM/qZ3R+9TEUd1aNJQu0HhGyqtx6oS7qTpvjheIWBbes4+qu1bNlo2V4cbkISr8q6gRBFA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", "cpu": [ "ia32" ], @@ -507,9 +513,9 @@ } }, "node_modules/@esbuild/linux-loong64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.4.tgz", - "integrity": "sha512-8mL/vh8qeCoRcFH2nM8wm5uJP+ZcVYGGayMavi8GmRJjuI3g1v6Z7Ni0JJKAJW+m0EtUuARb6Lmp4hMjzCBWzA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", "cpu": [ "loong64" ], @@ -523,9 +529,9 @@ } }, "node_modules/@esbuild/linux-mips64el": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.4.tgz", - "integrity": "sha512-1RdrWFFiiLIW7LQq9Q2NES+HiD4NyT8Itj9AUeCl0IVCA459WnPhREKgwrpaIfTOe+/2rdntisegiPWn/r/aAw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", "cpu": [ "mips64el" ], @@ -539,9 +545,9 @@ } }, "node_modules/@esbuild/linux-ppc64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.4.tgz", - "integrity": "sha512-tLCwNG47l3sd9lpfyx9LAGEGItCUeRCWeAx6x2Jmbav65nAwoPXfewtAdtbtit/pJFLUWOhpv0FpS6GQAmPrHA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", "cpu": [ "ppc64" ], @@ -555,9 +561,9 @@ } }, "node_modules/@esbuild/linux-riscv64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.4.tgz", - "integrity": "sha512-BnASypppbUWyqjd1KIpU4AUBiIhVr6YlHx/cnPgqEkNoVOhHg+YiSVxM1RLfiy4t9cAulbRGTNCKOcqHrEQLIw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", "cpu": [ "riscv64" ], @@ -571,9 +577,9 @@ } }, "node_modules/@esbuild/linux-s390x": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.4.tgz", - "integrity": "sha512-+eUqgb/Z7vxVLezG8bVB9SfBie89gMueS+I0xYh2tJdw3vqA/0ImZJ2ROeWwVJN59ihBeZ7Tu92dF/5dy5FttA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", "cpu": [ "s390x" ], @@ -587,9 +593,9 @@ } }, "node_modules/@esbuild/linux-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.4.tgz", - "integrity": "sha512-S5qOXrKV8BQEzJPVxAwnryi2+Iq5pB40gTEIT69BQONqR7JH1EPIcQ/Uiv9mCnn05jff9umq/5nqzxlqTOg9NA==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", "cpu": [ "x64" ], @@ -603,9 +609,9 @@ } }, "node_modules/@esbuild/netbsd-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.4.tgz", - "integrity": "sha512-xHT8X4sb0GS8qTqiwzHqpY00C95DPAq7nAwX35Ie/s+LO9830hrMd3oX0ZMKLvy7vsonee73x0lmcdOVXFzd6Q==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", "cpu": [ "arm64" ], @@ -619,9 +625,9 @@ } }, "node_modules/@esbuild/netbsd-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.4.tgz", - "integrity": "sha512-RugOvOdXfdyi5Tyv40kgQnI0byv66BFgAqjdgtAKqHoZTbTF2QqfQrFwa7cHEORJf6X2ht+l9ABLMP0dnKYsgg==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", "cpu": [ "x64" ], @@ -635,9 +641,9 @@ } }, "node_modules/@esbuild/openbsd-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.4.tgz", - "integrity": "sha512-2MyL3IAaTX+1/qP0O1SwskwcwCoOI4kV2IBX1xYnDDqthmq5ArrW94qSIKCAuRraMgPOmG0RDTA74mzYNQA9ow==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", "cpu": [ "arm64" ], @@ -651,9 +657,9 @@ } }, "node_modules/@esbuild/openbsd-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.4.tgz", - "integrity": "sha512-u8fg/jQ5aQDfsnIV6+KwLOf1CmJnfu1ShpwqdwC0uA7ZPwFws55Ngc12vBdeUdnuWoQYx/SOQLGDcdlfXhYmXQ==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", "cpu": [ "x64" ], @@ -667,9 +673,9 @@ } }, "node_modules/@esbuild/openharmony-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.4.tgz", - "integrity": "sha512-JkTZrl6VbyO8lDQO3yv26nNr2RM2yZzNrNHEsj9bm6dOwwu9OYN28CjzZkH57bh4w0I2F7IodpQvUAEd1mbWXg==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", "cpu": [ "arm64" ], @@ -683,9 +689,9 @@ } }, "node_modules/@esbuild/sunos-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.4.tgz", - "integrity": "sha512-/gOzgaewZJfeJTlsWhvUEmUG4tWEY2Spp5M20INYRg2ZKl9QPO3QEEgPeRtLjEWSW8FilRNacPOg8R1uaYkA6g==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", "cpu": [ "x64" ], @@ -699,9 +705,9 @@ } }, "node_modules/@esbuild/win32-arm64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.4.tgz", - "integrity": "sha512-Z9SExBg2y32smoDQdf1HRwHRt6vAHLXcxD2uGgO/v2jK7Y718Ix4ndsbNMU/+1Qiem9OiOdaqitioZwxivhXYg==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", "cpu": [ "arm64" ], @@ -715,9 +721,9 @@ } }, "node_modules/@esbuild/win32-ia32": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.4.tgz", - "integrity": "sha512-DAyGLS0Jz5G5iixEbMHi5KdiApqHBWMGzTtMiJ72ZOLhbu/bzxgAe8Ue8CTS3n3HbIUHQz/L51yMdGMeoxXNJw==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", "cpu": [ "ia32" ], @@ -731,9 +737,9 @@ } }, "node_modules/@esbuild/win32-x64": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.4.tgz", - "integrity": "sha512-+knoa0BDoeXgkNvvV1vvbZX4+hizelrkwmGJBdT17t8FNPwG2lKemmuMZlmaNQ3ws3DKKCxpb4zRZEIp3UxFCg==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", "cpu": [ "x64" ], @@ -942,29 +948,43 @@ "license": "MIT" }, "node_modules/@humanfs/core": { - "version": "0.19.1", - "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", - "integrity": "sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==", + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.2.tgz", + "integrity": "sha512-UhXNm+CFMWcbChXywFwkmhqjs3PRCmcSa/hfBgLIb7oQ5HNb1wS0icWsGtSAUNgefHeI+eBrA8I1fxmbHsGdvA==", "dev": true, "license": "Apache-2.0", + "dependencies": { + "@humanfs/types": "^0.15.0" + }, "engines": { "node": ">=18.18.0" } }, "node_modules/@humanfs/node": { - "version": "0.16.7", - "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.7.tgz", - "integrity": "sha512-/zUx+yOsIrG4Y43Eh2peDeKCxlRt/gET6aHfaKpuq267qXdYDFViVHfMaLyygZOnl0kGWxFIgsBy8QFuTLUXEQ==", + "version": "0.16.8", + "resolved": "https://registry.npmjs.org/@humanfs/node/-/node-0.16.8.tgz", + "integrity": "sha512-gE1eQNZ3R++kTzFUpdGlpmy8kDZD/MLyHqDwqjkVQI0JMdI1D51sy1H958PNXYkM2rAac7e5/CnIKZrHtPh3BQ==", "dev": true, "license": "Apache-2.0", "dependencies": { - "@humanfs/core": "^0.19.1", + "@humanfs/core": "^0.19.2", + "@humanfs/types": "^0.15.0", "@humanwhocodes/retry": "^0.4.0" }, "engines": { "node": ">=18.18.0" } }, + "node_modules/@humanfs/types": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@humanfs/types/-/types-0.15.0.tgz", + "integrity": "sha512-ZZ1w0aoQkwuUuC7Yf+7sdeaNfqQiiLcSRbfI08oAxqLtpXQr9AIVX7Ay7HLDuiLYAaFPu8oBYNq/QIi9URHJ3Q==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18.0" + } + }, "node_modules/@humanwhocodes/module-importer": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", @@ -1058,9 +1078,9 @@ } }, "node_modules/@nous-research/ui": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.3.0.tgz", - "integrity": "sha512-konGgtV9lkzqYkWuoUGnROqavq1svTnGbERLKItvEXmsRz4xRtbAMHI8rK6sjGpHDpwvOUN3olcOhRLTGuVfcA==", + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.4.0.tgz", + "integrity": "sha512-wA9YImWLFjx3yWsb3TsquwG9VKZunupdovkOjnRboFjNAb3Jcf57o67xWafEPEm3VX6k6RP/+Y9zHWX0PUtZ4w==", "license": "MIT", "dependencies": { "@nanostores/react": "^1.0.0", @@ -1103,6 +1123,7 @@ "resolved": "https://registry.npmjs.org/@observablehq/plot/-/plot-0.6.17.tgz", "integrity": "sha512-/qaXP/7mc4MUS0s4cPPFASDRjtsWp85/TbfsciqDgU1HwYixbSbbytNuInD8AcTYC3xaxACgVX06agdfQy9W+g==", "license": "ISC", + "peer": true, "dependencies": { "d3": "^7.9.0", "interval-tree-1d": "^1.0.0", @@ -1755,6 +1776,7 @@ "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.0.tgz", "integrity": "sha512-90abYK2q5/qDM+GACs9zRvc5KhEEpEWqWlHSd64zTPNxg+9wCJvTfyD9x2so7hlQhjRYO1Fa6flR3BC/kpTFkA==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.17.8", "@types/webxr": "*", @@ -1798,35 +1820,6 @@ } } }, - "node_modules/@react-three/fiber/node_modules/zustand": { - "version": "5.0.12", - "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.12.tgz", - "integrity": "sha512-i77ae3aZq4dhMlRhJVCYgMLKuSiZAaUPAct2AksxQ+gOtimhGMdXljRT21P5BNpeT4kXlLIckvkPM029OljD7g==", - "license": "MIT", - "engines": { - "node": ">=12.20.0" - }, - "peerDependencies": { - "@types/react": ">=18.0.0", - "immer": ">=9.0.6", - "react": ">=18.0.0", - "use-sync-external-store": ">=1.2.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "immer": { - "optional": true - }, - "react": { - "optional": true - }, - "use-sync-external-store": { - "optional": true - } - } - }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-rc.3", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.3.tgz", @@ -1835,9 +1828,9 @@ "license": "MIT" }, "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.59.0.tgz", - "integrity": "sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz", + "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==", "cpu": [ "arm" ], @@ -1848,9 +1841,9 @@ ] }, "node_modules/@rollup/rollup-android-arm64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.59.0.tgz", - "integrity": "sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz", + "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==", "cpu": [ "arm64" ], @@ -1861,9 +1854,9 @@ ] }, "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.59.0.tgz", - "integrity": "sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz", + "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==", "cpu": [ "arm64" ], @@ -1874,9 +1867,9 @@ ] }, "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.59.0.tgz", - "integrity": "sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz", + "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==", "cpu": [ "x64" ], @@ -1887,9 +1880,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.59.0.tgz", - "integrity": "sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz", + "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==", "cpu": [ "arm64" ], @@ -1900,9 +1893,9 @@ ] }, "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.59.0.tgz", - "integrity": "sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz", + "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==", "cpu": [ "x64" ], @@ -1913,9 +1906,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.59.0.tgz", - "integrity": "sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz", + "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==", "cpu": [ "arm" ], @@ -1926,9 +1919,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.59.0.tgz", - "integrity": "sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz", + "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==", "cpu": [ "arm" ], @@ -1939,9 +1932,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.59.0.tgz", - "integrity": "sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz", + "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==", "cpu": [ "arm64" ], @@ -1952,9 +1945,9 @@ ] }, "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.59.0.tgz", - "integrity": "sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz", + "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==", "cpu": [ "arm64" ], @@ -1965,9 +1958,9 @@ ] }, "node_modules/@rollup/rollup-linux-loong64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.59.0.tgz", - "integrity": "sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz", + "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==", "cpu": [ "loong64" ], @@ -1978,9 +1971,9 @@ ] }, "node_modules/@rollup/rollup-linux-loong64-musl": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.59.0.tgz", - "integrity": "sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz", + "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==", "cpu": [ "loong64" ], @@ -1991,9 +1984,9 @@ ] }, "node_modules/@rollup/rollup-linux-ppc64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.59.0.tgz", - "integrity": "sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz", + "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==", "cpu": [ "ppc64" ], @@ -2004,9 +1997,9 @@ ] }, "node_modules/@rollup/rollup-linux-ppc64-musl": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.59.0.tgz", - "integrity": "sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz", + "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==", "cpu": [ "ppc64" ], @@ -2017,9 +2010,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.59.0.tgz", - "integrity": "sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz", + "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==", "cpu": [ "riscv64" ], @@ -2030,9 +2023,9 @@ ] }, "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.59.0.tgz", - "integrity": "sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz", + "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==", "cpu": [ "riscv64" ], @@ -2043,9 +2036,9 @@ ] }, "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.59.0.tgz", - "integrity": "sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz", + "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==", "cpu": [ "s390x" ], @@ -2056,9 +2049,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.59.0.tgz", - "integrity": "sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz", + "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==", "cpu": [ "x64" ], @@ -2069,9 +2062,9 @@ ] }, "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.59.0.tgz", - "integrity": "sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz", + "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==", "cpu": [ "x64" ], @@ -2082,9 +2075,9 @@ ] }, "node_modules/@rollup/rollup-openbsd-x64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.59.0.tgz", - "integrity": "sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz", + "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==", "cpu": [ "x64" ], @@ -2095,9 +2088,9 @@ ] }, "node_modules/@rollup/rollup-openharmony-arm64": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.59.0.tgz", - "integrity": "sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz", + "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==", "cpu": [ "arm64" ], @@ -2108,9 +2101,9 @@ ] }, "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.59.0.tgz", - "integrity": "sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz", + "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==", "cpu": [ "arm64" ], @@ -2121,9 +2114,9 @@ ] }, "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.59.0.tgz", - "integrity": "sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz", + "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==", "cpu": [ "ia32" ], @@ -2134,9 +2127,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-gnu": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.59.0.tgz", - "integrity": "sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz", + "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==", "cpu": [ "x64" ], @@ -2147,9 +2140,9 @@ ] }, "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.59.0.tgz", - "integrity": "sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz", + "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==", "cpu": [ "x64" ], @@ -2169,47 +2162,47 @@ } }, "node_modules/@tailwindcss/node": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.2.1.tgz", - "integrity": "sha512-jlx6sLk4EOwO6hHe1oCGm1Q4AN/s0rSrTTPBGPM0/RQ6Uylwq17FuU8IeJJKEjtc6K6O07zsvP+gDO6MMWo7pg==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.2.4.tgz", + "integrity": "sha512-Ai7+yQPxz3ddrDQzFfBKdHEVBg0w3Zl83jnjuwxnZOsnH9pGn93QHQtpU0p/8rYWxvbFZHneni6p1BSLK4DkGA==", "license": "MIT", "dependencies": { "@jridgewell/remapping": "^2.3.5", "enhanced-resolve": "^5.19.0", "jiti": "^2.6.1", - "lightningcss": "1.31.1", + "lightningcss": "1.32.0", "magic-string": "^0.30.21", "source-map-js": "^1.2.1", - "tailwindcss": "4.2.1" + "tailwindcss": "4.2.4" } }, "node_modules/@tailwindcss/oxide": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.2.1.tgz", - "integrity": "sha512-yv9jeEFWnjKCI6/T3Oq50yQEOqmpmpfzG1hcZsAOaXFQPfzWprWrlHSdGPEF3WQTi8zu8ohC9Mh9J470nT5pUw==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.2.4.tgz", + "integrity": "sha512-9El/iI069DKDSXwTvB9J4BwdO5JhRrOweGaK25taBAvBXyXqJAX+Jqdvs8r8gKpsI/1m0LeJLyQYTf/WLrBT1Q==", "license": "MIT", "engines": { "node": ">= 20" }, "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.2.1", - "@tailwindcss/oxide-darwin-arm64": "4.2.1", - "@tailwindcss/oxide-darwin-x64": "4.2.1", - "@tailwindcss/oxide-freebsd-x64": "4.2.1", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.2.1", - "@tailwindcss/oxide-linux-arm64-gnu": "4.2.1", - "@tailwindcss/oxide-linux-arm64-musl": "4.2.1", - "@tailwindcss/oxide-linux-x64-gnu": "4.2.1", - "@tailwindcss/oxide-linux-x64-musl": "4.2.1", - "@tailwindcss/oxide-wasm32-wasi": "4.2.1", - "@tailwindcss/oxide-win32-arm64-msvc": "4.2.1", - "@tailwindcss/oxide-win32-x64-msvc": "4.2.1" + "@tailwindcss/oxide-android-arm64": "4.2.4", + "@tailwindcss/oxide-darwin-arm64": "4.2.4", + "@tailwindcss/oxide-darwin-x64": "4.2.4", + "@tailwindcss/oxide-freebsd-x64": "4.2.4", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.2.4", + "@tailwindcss/oxide-linux-arm64-gnu": "4.2.4", + "@tailwindcss/oxide-linux-arm64-musl": "4.2.4", + "@tailwindcss/oxide-linux-x64-gnu": "4.2.4", + "@tailwindcss/oxide-linux-x64-musl": "4.2.4", + "@tailwindcss/oxide-wasm32-wasi": "4.2.4", + "@tailwindcss/oxide-win32-arm64-msvc": "4.2.4", + "@tailwindcss/oxide-win32-x64-msvc": "4.2.4" } }, "node_modules/@tailwindcss/oxide-android-arm64": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.2.1.tgz", - "integrity": "sha512-eZ7G1Zm5EC8OOKaesIKuw77jw++QJ2lL9N+dDpdQiAB/c/B2wDh0QPFHbkBVrXnwNugvrbJFk1gK2SsVjwWReg==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.2.4.tgz", + "integrity": "sha512-e7MOr1SAn9U8KlZzPi1ZXGZHeC5anY36qjNwmZv9pOJ8E4Q6jmD1vyEHkQFmNOIN7twGPEMXRHmitN4zCMN03g==", "cpu": [ "arm64" ], @@ -2223,9 +2216,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-arm64": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.2.1.tgz", - "integrity": "sha512-q/LHkOstoJ7pI1J0q6djesLzRvQSIfEto148ppAd+BVQK0JYjQIFSK3JgYZJa+Yzi0DDa52ZsQx2rqytBnf8Hw==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.2.4.tgz", + "integrity": "sha512-tSC/Kbqpz/5/o/C2sG7QvOxAKqyd10bq+ypZNf+9Fi2TvbVbv1zNpcEptcsU7DPROaSbVgUXmrzKhurFvo5eDg==", "cpu": [ "arm64" ], @@ -2239,9 +2232,9 @@ } }, "node_modules/@tailwindcss/oxide-darwin-x64": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.2.1.tgz", - "integrity": "sha512-/f/ozlaXGY6QLbpvd/kFTro2l18f7dHKpB+ieXz+Cijl4Mt9AI2rTrpq7V+t04nK+j9XBQHnSMdeQRhbGyt6fw==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.2.4.tgz", + "integrity": "sha512-yPyUXn3yO/ufR6+Kzv0t4fCg2qNr90jxXc5QqBpjlPNd0NqyDXcmQb/6weunH/MEDXW5dhyEi+agTDiqa3WsGg==", "cpu": [ "x64" ], @@ -2255,9 +2248,9 @@ } }, "node_modules/@tailwindcss/oxide-freebsd-x64": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.2.1.tgz", - "integrity": "sha512-5e/AkgYJT/cpbkys/OU2Ei2jdETCLlifwm7ogMC7/hksI2fC3iiq6OcXwjibcIjPung0kRtR3TxEITkqgn0TcA==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.2.4.tgz", + "integrity": "sha512-BoMIB4vMQtZsXdGLVc2z+P9DbETkiopogfWZKbWwM8b/1Vinbs4YcUwo+kM/KeLkX3Ygrf4/PsRndKaYhS8Eiw==", "cpu": [ "x64" ], @@ -2271,9 +2264,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.2.1.tgz", - "integrity": "sha512-Uny1EcVTTmerCKt/1ZuKTkb0x8ZaiuYucg2/kImO5A5Y/kBz41/+j0gxUZl+hTF3xkWpDmHX+TaWhOtba2Fyuw==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.2.4.tgz", + "integrity": "sha512-7pIHBLTHYRAlS7V22JNuTh33yLH4VElwKtB3bwchK/UaKUPpQ0lPQiOWcbm4V3WP2I6fNIJ23vABIvoy2izdwA==", "cpu": [ "arm" ], @@ -2287,9 +2280,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.2.1.tgz", - "integrity": "sha512-CTrwomI+c7n6aSSQlsPL0roRiNMDQ/YzMD9EjcR+H4f0I1SQ8QqIuPnsVp7QgMkC1Qi8rtkekLkOFjo7OlEFRQ==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.2.4.tgz", + "integrity": "sha512-+E4wxJ0ZGOzSH325reXTWB48l42i93kQqMvDyz5gqfRzRZ7faNhnmvlV4EPGJU3QJM/3Ab5jhJ5pCRUsKn6OQw==", "cpu": [ "arm64" ], @@ -2303,9 +2296,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-arm64-musl": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.2.1.tgz", - "integrity": "sha512-WZA0CHRL/SP1TRbA5mp9htsppSEkWuQ4KsSUumYQnyl8ZdT39ntwqmz4IUHGN6p4XdSlYfJwM4rRzZLShHsGAQ==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.2.4.tgz", + "integrity": "sha512-bBADEGAbo4ASnppIziaQJelekCxdMaxisrk+fB7Thit72IBnALp9K6ffA2G4ruj90G9XRS2VQ6q2bCKbfFV82g==", "cpu": [ "arm64" ], @@ -2319,9 +2312,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-gnu": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.2.1.tgz", - "integrity": "sha512-qMFzxI2YlBOLW5PhblzuSWlWfwLHaneBE0xHzLrBgNtqN6mWfs+qYbhryGSXQjFYB1Dzf5w+LN5qbUTPhW7Y5g==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.2.4.tgz", + "integrity": "sha512-7Mx25E4WTfnht0TVRTyC00j3i0M+EeFe7wguMDTlX4mRxafznw0CA8WJkFjWYH5BlgELd1kSjuU2JiPnNZbJDA==", "cpu": [ "x64" ], @@ -2335,9 +2328,9 @@ } }, "node_modules/@tailwindcss/oxide-linux-x64-musl": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.2.1.tgz", - "integrity": "sha512-5r1X2FKnCMUPlXTWRYpHdPYUY6a1Ar/t7P24OuiEdEOmms5lyqjDRvVY1yy9Rmioh+AunQ0rWiOTPE8F9A3v5g==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.2.4.tgz", + "integrity": "sha512-2wwJRF7nyhOR0hhHoChc04xngV3iS+akccHTGtz965FwF0up4b2lOdo6kI1EbDaEXKgvcrFBYcYQQ/rrnWFVfA==", "cpu": [ "x64" ], @@ -2351,9 +2344,9 @@ } }, "node_modules/@tailwindcss/oxide-wasm32-wasi": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.2.1.tgz", - "integrity": "sha512-MGFB5cVPvshR85MTJkEvqDUnuNoysrsRxd6vnk1Lf2tbiqNlXpHYZqkqOQalydienEWOHHFyyuTSYRsLfxFJ2Q==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.2.4.tgz", + "integrity": "sha512-FQsqApeor8Fo6gUEklzmaa9994orJZZDBAlQpK2Mq+DslRKFJeD6AjHpBQ0kZFQohVr8o85PPh8eOy86VlSCmw==", "bundleDependencies": [ "@napi-rs/wasm-runtime", "@emnapi/core", @@ -2380,9 +2373,9 @@ } }, "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.2.1.tgz", - "integrity": "sha512-YlUEHRHBGnCMh4Nj4GnqQyBtsshUPdiNroZj8VPkvTZSoHsilRCwXcVKnG9kyi0ZFAS/3u+qKHBdDc81SADTRA==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.2.4.tgz", + "integrity": "sha512-L9BXqxC4ToVgwMFqj3pmZRqyHEztulpUJzCxUtLjobMCzTPsGt1Fa9enKbOpY2iIyVtaHNeNvAK8ERP/64sqGQ==", "cpu": [ "arm64" ], @@ -2396,9 +2389,9 @@ } }, "node_modules/@tailwindcss/oxide-win32-x64-msvc": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.2.1.tgz", - "integrity": "sha512-rbO34G5sMWWyrN/idLeVxAZgAKWrn5LiR3/I90Q9MkA67s6T1oB0xtTe+0heoBvHSpbU9Mk7i6uwJnpo4u21XQ==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.2.4.tgz", + "integrity": "sha512-ESlKG0EpVJQwRjXDDa9rLvhEAh0mhP1sF7sap9dNZT0yyl9SAG6T7gdP09EH0vIv0UNTlo6jPWyujD6559fZvw==", "cpu": [ "x64" ], @@ -2412,17 +2405,17 @@ } }, "node_modules/@tailwindcss/vite": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.2.1.tgz", - "integrity": "sha512-TBf2sJjYeb28jD2U/OhwdW0bbOsxkWPwQ7SrqGf9sVcoYwZj7rkXljroBO9wKBut9XnmQLXanuDUeqQK0lGg/w==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/@tailwindcss/vite/-/vite-4.2.4.tgz", + "integrity": "sha512-pCvohwOCspk3ZFn6eJzrrX3g4n2JY73H6MmYC87XfGPyTty4YsCjYTMArRZm/zOI8dIt3+EcrLHAFPe5A4bgtw==", "license": "MIT", "dependencies": { - "@tailwindcss/node": "4.2.1", - "@tailwindcss/oxide": "4.2.1", - "tailwindcss": "4.2.1" + "@tailwindcss/node": "4.2.4", + "@tailwindcss/oxide": "4.2.4", + "tailwindcss": "4.2.4" }, "peerDependencies": { - "vite": "^5.2.0 || ^6 || ^7" + "vite": "^5.2.0 || ^6 || ^7 || ^8" } }, "node_modules/@types/babel__core": { @@ -2484,11 +2477,12 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "24.12.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.0.tgz", - "integrity": "sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==", + "version": "24.12.2", + "resolved": "https://registry.npmjs.org/@types/node/-/node-24.12.2.tgz", + "integrity": "sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -2498,6 +2492,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -2508,6 +2503,7 @@ "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "devOptional": true, "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -2528,20 +2524,20 @@ "license": "MIT" }, "node_modules/@typescript-eslint/eslint-plugin": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.0.tgz", - "integrity": "sha512-qeu4rTHR3/IaFORbD16gmjq9+rEs9fGKdX0kF6BKSfi+gCuG3RCKLlSBYzn/bGsY9Tj7KE/DAQStbp8AHJGHEQ==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.0.tgz", + "integrity": "sha512-HyAZtpdkgZwpq8Sz3FSUvCR4c+ScbuWa9AksK2Jweub7w4M3yTz4O11AqVJzLYjy/B9ZWPyc81I+mOdJU/bDQw==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/regexpp": "^4.12.2", - "@typescript-eslint/scope-manager": "8.57.0", - "@typescript-eslint/type-utils": "8.57.0", - "@typescript-eslint/utils": "8.57.0", - "@typescript-eslint/visitor-keys": "8.57.0", + "@typescript-eslint/scope-manager": "8.59.0", + "@typescript-eslint/type-utils": "8.59.0", + "@typescript-eslint/utils": "8.59.0", + "@typescript-eslint/visitor-keys": "8.59.0", "ignore": "^7.0.5", "natural-compare": "^1.4.0", - "ts-api-utils": "^2.4.0" + "ts-api-utils": "^2.5.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2551,9 +2547,9 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "@typescript-eslint/parser": "^8.57.0", + "@typescript-eslint/parser": "^8.59.0", "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/eslint-plugin/node_modules/ignore": { @@ -2567,16 +2563,17 @@ } }, "node_modules/@typescript-eslint/parser": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.57.0.tgz", - "integrity": "sha512-XZzOmihLIr8AD1b9hL9ccNMzEMWt/dE2u7NyTY9jJG6YNiNthaD5XtUHVF2uCXZ15ng+z2hT3MVuxnUYhq6k1g==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.0.tgz", + "integrity": "sha512-TI1XGwKbDpo9tRW8UDIXCOeLk55qe9ZFGs8MTKU6/M08HWTw52DD/IYhfQtOEhEdPhLMT26Ka/x7p70nd3dzDg==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { - "@typescript-eslint/scope-manager": "8.57.0", - "@typescript-eslint/types": "8.57.0", - "@typescript-eslint/typescript-estree": "8.57.0", - "@typescript-eslint/visitor-keys": "8.57.0", + "@typescript-eslint/scope-manager": "8.59.0", + "@typescript-eslint/types": "8.59.0", + "@typescript-eslint/typescript-estree": "8.59.0", + "@typescript-eslint/visitor-keys": "8.59.0", "debug": "^4.4.3" }, "engines": { @@ -2588,18 +2585,18 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/project-service": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.57.0.tgz", - "integrity": "sha512-pR+dK0BlxCLxtWfaKQWtYr7MhKmzqZxuii+ZjuFlZlIGRZm22HnXFqa2eY+90MUz8/i80YJmzFGDUsi8dMOV5w==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.0.tgz", + "integrity": "sha512-Lw5ITrR5s5TbC19YSvlr63ZfLaJoU6vtKTHyB0GQOpX0W7d5/Ir6vUahWi/8Sps/nOukZQ0IB3SmlxZnjaKVnw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/tsconfig-utils": "^8.57.0", - "@typescript-eslint/types": "^8.57.0", + "@typescript-eslint/tsconfig-utils": "^8.59.0", + "@typescript-eslint/types": "^8.59.0", "debug": "^4.4.3" }, "engines": { @@ -2610,18 +2607,18 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.57.0.tgz", - "integrity": "sha512-nvExQqAHF01lUM66MskSaZulpPL5pgy5hI5RfrxviLgzZVffB5yYzw27uK/ft8QnKXI2X0LBrHJFr1TaZtAibw==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.0.tgz", + "integrity": "sha512-UzR16Ut8IpA3Mc4DbgAShlPPkVm8xXMWafXxB0BocaVRHs8ZGakAxGRskF7FId3sdk9lgGD73GSFaWmWFDE4dg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.57.0", - "@typescript-eslint/visitor-keys": "8.57.0" + "@typescript-eslint/types": "8.59.0", + "@typescript-eslint/visitor-keys": "8.59.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2632,9 +2629,9 @@ } }, "node_modules/@typescript-eslint/tsconfig-utils": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.57.0.tgz", - "integrity": "sha512-LtXRihc5ytjJIQEH+xqjB0+YgsV4/tW35XKX3GTZHpWtcC8SPkT/d4tqdf1cKtesryHm2bgp6l555NYcT2NLvA==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.0.tgz", + "integrity": "sha512-91Sbl3s4Kb3SybliIY6muFBmHVv+pYXfybC4Oolp3dvk8BvIE3wOPc+403CWIT7mJNkfQRGtdqghzs2+Z91Tqg==", "dev": true, "license": "MIT", "engines": { @@ -2645,21 +2642,21 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/type-utils": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.57.0.tgz", - "integrity": "sha512-yjgh7gmDcJ1+TcEg8x3uWQmn8ifvSupnPfjP21twPKrDP/pTHlEQgmKcitzF/rzPSmv7QjJ90vRpN4U+zoUjwQ==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.0.tgz", + "integrity": "sha512-3TRiZaQSltGqGeNrJzzr1+8YcEobKH9rHnqIp/1psfKFmhRQDNMGP5hBufanYTGznwShzVLs3Mz+gDN7HkWfXg==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.57.0", - "@typescript-eslint/typescript-estree": "8.57.0", - "@typescript-eslint/utils": "8.57.0", + "@typescript-eslint/types": "8.59.0", + "@typescript-eslint/typescript-estree": "8.59.0", + "@typescript-eslint/utils": "8.59.0", "debug": "^4.4.3", - "ts-api-utils": "^2.4.0" + "ts-api-utils": "^2.5.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2670,13 +2667,13 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/types": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.57.0.tgz", - "integrity": "sha512-dTLI8PEXhjUC7B9Kre+u0XznO696BhXcTlOn0/6kf1fHaQW8+VjJAVHJ3eTI14ZapTxdkOmc80HblPQLaEeJdg==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.0.tgz", + "integrity": "sha512-nLzdsT1gdOgFxxxwrlNVUBzSNBEEHJ86bblmk4QAS6stfig7rcJzWKqCyxFy3YRRHXDWEkb2NralA1nOYkkm/A==", "dev": true, "license": "MIT", "engines": { @@ -2688,21 +2685,21 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.57.0.tgz", - "integrity": "sha512-m7faHcyVg0BT3VdYTlX8GdJEM7COexXxS6KqGopxdtkQRvBanK377QDHr4W/vIPAR+ah9+B/RclSW5ldVniO1Q==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.0.tgz", + "integrity": "sha512-O9Re9P1BmBLFJyikRbQpLku/QA3/AueZNO9WePLBwQrvkixTmDe8u76B6CYUAITRl/rHawggEqUGn5QIkVRLMw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/project-service": "8.57.0", - "@typescript-eslint/tsconfig-utils": "8.57.0", - "@typescript-eslint/types": "8.57.0", - "@typescript-eslint/visitor-keys": "8.57.0", + "@typescript-eslint/project-service": "8.59.0", + "@typescript-eslint/tsconfig-utils": "8.59.0", + "@typescript-eslint/types": "8.59.0", + "@typescript-eslint/visitor-keys": "8.59.0", "debug": "^4.4.3", "minimatch": "^10.2.2", "semver": "^7.7.3", "tinyglobby": "^0.2.15", - "ts-api-utils": "^2.4.0" + "ts-api-utils": "^2.5.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2712,7 +2709,7 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/typescript-estree/node_modules/balanced-match": { @@ -2726,9 +2723,9 @@ } }, "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", - "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz", + "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==", "dev": true, "license": "MIT", "dependencies": { @@ -2739,13 +2736,13 @@ } }, "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { - "version": "10.2.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", - "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", + "version": "10.2.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz", + "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==", "dev": true, "license": "BlueOak-1.0.0", "dependencies": { - "brace-expansion": "^5.0.2" + "brace-expansion": "^5.0.5" }, "engines": { "node": "18 || 20 || >=22" @@ -2768,16 +2765,16 @@ } }, "node_modules/@typescript-eslint/utils": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.57.0.tgz", - "integrity": "sha512-5iIHvpD3CZe06riAsbNxxreP+MuYgVUsV0n4bwLH//VJmgtt54sQeY2GszntJ4BjYCpMzrfVh2SBnUQTtys2lQ==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.0.tgz", + "integrity": "sha512-I1R/K7V07XsMJ12Oaxg/O9GfrysGTmCRhvZJBv0RE0NcULMzjqVpR5kRRQjHsz3J/bElU7HwCO7zkqL+MSUz+g==", "dev": true, "license": "MIT", "dependencies": { "@eslint-community/eslint-utils": "^4.9.1", - "@typescript-eslint/scope-manager": "8.57.0", - "@typescript-eslint/types": "8.57.0", - "@typescript-eslint/typescript-estree": "8.57.0" + "@typescript-eslint/scope-manager": "8.59.0", + "@typescript-eslint/types": "8.59.0", + "@typescript-eslint/typescript-estree": "8.59.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -2788,17 +2785,17 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/@typescript-eslint/visitor-keys": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.57.0.tgz", - "integrity": "sha512-zm6xx8UT/Xy2oSr2ZXD0pZo7Jx2XsCoID2IUh9YSTFRu7z+WdwYTRk6LhUftm1crwqbuoF6I8zAFeCMw0YjwDg==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.0.tgz", + "integrity": "sha512-/uejZt4dSere1bx12WLlPfv8GktzcaDtuJ7s42/HEZ5zGj9oxRaD4bj7qwSunXkf+pbAhFt2zjpHYUiT5lHf0Q==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/types": "8.57.0", + "@typescript-eslint/types": "8.59.0", "eslint-visitor-keys": "^5.0.0" }, "engines": { @@ -2861,12 +2858,46 @@ "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/@xterm/addon-fit": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-fit/-/addon-fit-0.11.0.tgz", + "integrity": "sha512-jYcgT6xtVYhnhgxh3QgYDnnNMYTcf8ElbxxFzX0IZo+vabQqSPAjC3c1wJrKB5E19VwQei89QCiZZP86DCPF7g==", + "license": "MIT" + }, + "node_modules/@xterm/addon-unicode11": { + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-unicode11/-/addon-unicode11-0.9.0.tgz", + "integrity": "sha512-FxDnYcyuXhNl+XSqGZL/t0U9eiNb/q3EWT5rYkQT/zuig8Gz/VagnQANKHdDWFM2lTMk9ly0EFQxxxtZUoRetw==", + "license": "MIT" + }, + "node_modules/@xterm/addon-web-links": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-web-links/-/addon-web-links-0.12.0.tgz", + "integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==", + "license": "MIT" + }, + "node_modules/@xterm/addon-webgl": { + "version": "0.19.0", + "resolved": "https://registry.npmjs.org/@xterm/addon-webgl/-/addon-webgl-0.19.0.tgz", + "integrity": "sha512-b3fMOsyLVuCeNJWxolACEUED0vm7qC0cy4wRvf3oURSzDTYVQiGPhTnhWZwIHdvC48Y+oLhvYXnY4XDXPoJo6A==", + "license": "MIT" + }, + "node_modules/@xterm/xterm": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz", + "integrity": "sha512-TQwDdQGtwwDt+2cgKDLn0IRaSxYu1tSUjgKarSDkUM0ZNiSRXFpjxEsvc/Zgc5kq5omJ+V0a8/kIM2WD3sMOYg==", + "license": "MIT", + "workspaces": [ + "addons/*" + ] + }, "node_modules/acorn": { "version": "8.16.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.16.0.tgz", "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2885,9 +2916,9 @@ } }, "node_modules/ajv": { - "version": "6.14.0", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.14.0.tgz", - "integrity": "sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==", + "version": "6.15.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.15.0.tgz", + "integrity": "sha512-fgFx7Hfoq60ytK2c7DhnF8jIvzYgOMxfugjLOSMHjLIPgenqa7S7oaagATUq99mV6IYvN2tRmC0wnTYX6iPbMw==", "dev": true, "license": "MIT", "dependencies": { @@ -2970,9 +3001,9 @@ "license": "MIT" }, "node_modules/baseline-browser-mapping": { - "version": "2.10.7", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.7.tgz", - "integrity": "sha512-1ghYO3HnxGec0TCGBXiDLVns4eCSx4zJpxnHrlqFQajmhfKMQBzUGDdkMK7fUW7PTHTeLf+j87aTuKuuwWzMGw==", + "version": "2.10.21", + "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz", + "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==", "dev": true, "license": "Apache-2.0", "bin": { @@ -2989,9 +3020,9 @@ "license": "MIT" }, "node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "1.1.14", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.14.tgz", + "integrity": "sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==", "dev": true, "license": "MIT", "dependencies": { @@ -3000,9 +3031,9 @@ } }, "node_modules/browserslist": { - "version": "4.28.1", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.1.tgz", - "integrity": "sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==", + "version": "4.28.2", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz", + "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==", "dev": true, "funding": [ { @@ -3019,12 +3050,13 @@ } ], "license": "MIT", + "peer": true, "dependencies": { - "baseline-browser-mapping": "^2.9.0", - "caniuse-lite": "^1.0.30001759", - "electron-to-chromium": "^1.5.263", - "node-releases": "^2.0.27", - "update-browserslist-db": "^1.2.0" + "baseline-browser-mapping": "^2.10.12", + "caniuse-lite": "^1.0.30001782", + "electron-to-chromium": "^1.5.328", + "node-releases": "^2.0.36", + "update-browserslist-db": "^1.2.3" }, "bin": { "browserslist": "cli.js" @@ -3068,9 +3100,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001778", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001778.tgz", - "integrity": "sha512-PN7uxFL+ExFJO61aVmP1aIEG4i9whQd4eoSCebav62UwDyp5OHh06zN4jqKSMePVgxHifCw1QJxdRkA1Pisekg==", + "version": "1.0.30001790", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz", + "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==", "dev": true, "funding": [ { @@ -3526,6 +3558,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -3739,20 +3772,20 @@ } }, "node_modules/electron-to-chromium": { - "version": "1.5.313", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.313.tgz", - "integrity": "sha512-QBMrTWEf00GXZmJyx2lbYD45jpI3TUFnNIzJ5BBc8piGUDwMPa1GV6HJWTZVvY/eiN3fSopl7NRbgGp9sZ9LTA==", + "version": "1.5.344", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.344.tgz", + "integrity": "sha512-4MxfbmNDm+KPh066EZy+eUnkcDPcZ35wNmOWzFuh/ijvHsve6kbLTLURy88uCNK5FbpN+yk2nQY6BYh1GEt+wg==", "dev": true, "license": "ISC" }, "node_modules/enhanced-resolve": { - "version": "5.20.0", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.20.0.tgz", - "integrity": "sha512-/ce7+jQ1PQ6rVXwe+jKEg5hW5ciicHwIQUagZkp6IufBoY3YDgdTTY1azVs0qoRgVmvsNB+rbjLJxDAeHHtwsQ==", + "version": "5.21.0", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.0.tgz", + "integrity": "sha512-otxSQPw4lkOZWkHpB3zaEQs6gWYEsmX4xQF68ElXC/TWvGxGMSGOvoNbaLXm6/cS/fSfHtsEdw90y20PCd+sCA==", "license": "MIT", "dependencies": { "graceful-fs": "^4.2.4", - "tapable": "^2.3.0" + "tapable": "^2.3.3" }, "engines": { "node": ">=10.13.0" @@ -3771,9 +3804,9 @@ } }, "node_modules/esbuild": { - "version": "0.27.4", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz", - "integrity": "sha512-Rq4vbHnYkK5fws5NF7MYTU68FPRE1ajX7heQ/8QXXWqNgqqJ/GkmmyxIzUnf2Sr/bakf8l54716CcMGHYhMrrQ==", + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", "hasInstallScript": true, "license": "MIT", "bin": { @@ -3783,32 +3816,32 @@ "node": ">=18" }, "optionalDependencies": { - "@esbuild/aix-ppc64": "0.27.4", - "@esbuild/android-arm": "0.27.4", - "@esbuild/android-arm64": "0.27.4", - "@esbuild/android-x64": "0.27.4", - "@esbuild/darwin-arm64": "0.27.4", - "@esbuild/darwin-x64": "0.27.4", - "@esbuild/freebsd-arm64": "0.27.4", - "@esbuild/freebsd-x64": "0.27.4", - "@esbuild/linux-arm": "0.27.4", - "@esbuild/linux-arm64": "0.27.4", - "@esbuild/linux-ia32": "0.27.4", - "@esbuild/linux-loong64": "0.27.4", - "@esbuild/linux-mips64el": "0.27.4", - "@esbuild/linux-ppc64": "0.27.4", - "@esbuild/linux-riscv64": "0.27.4", - "@esbuild/linux-s390x": "0.27.4", - "@esbuild/linux-x64": "0.27.4", - "@esbuild/netbsd-arm64": "0.27.4", - "@esbuild/netbsd-x64": "0.27.4", - "@esbuild/openbsd-arm64": "0.27.4", - "@esbuild/openbsd-x64": "0.27.4", - "@esbuild/openharmony-arm64": "0.27.4", - "@esbuild/sunos-x64": "0.27.4", - "@esbuild/win32-arm64": "0.27.4", - "@esbuild/win32-ia32": "0.27.4", - "@esbuild/win32-x64": "0.27.4" + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" } }, "node_modules/escalade": { @@ -3839,6 +3872,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -3894,9 +3928,9 @@ } }, "node_modules/eslint-plugin-react-hooks": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-7.0.1.tgz", - "integrity": "sha512-O0d0m04evaNzEPoSW+59Mezf8Qt0InfgGIBJnpC0h3NH/WjUAR7BIKUfysC6todmtiZ/A0oUVS8Gce0WhBrHsA==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-7.1.1.tgz", + "integrity": "sha512-f2I7Gw6JbvCexzIInuSbZpfdQ44D7iqdWX01FKLvrPgqxoE7oMj8clOfto8U6vYiz4yd5oKu39rRSVOe1zRu0g==", "dev": true, "license": "MIT", "dependencies": { @@ -3910,7 +3944,7 @@ "node": ">=18" }, "peerDependencies": { - "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0" + "eslint": "^3.0.0 || ^4.0.0 || ^5.0.0 || ^6.0.0 || ^7.0.0 || ^8.0.0-0 || ^9.0.0 || ^10.0.0" } }, "node_modules/eslint-plugin-react-refresh": { @@ -4133,9 +4167,9 @@ } }, "node_modules/flatted": { - "version": "3.4.1", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.1.tgz", - "integrity": "sha512-IxfVbRFVlV8V/yRaGzk0UVIcsKKHMSfYw66T/u4nTwlWteQePsxe//LjudR1AMX4tZW3WFCh3Zqa/sjlqpbURQ==", + "version": "3.4.2", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.4.2.tgz", + "integrity": "sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==", "dev": true, "license": "ISC" }, @@ -4195,9 +4229,9 @@ } }, "node_modules/globals": { - "version": "17.4.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-17.4.0.tgz", - "integrity": "sha512-hjrNztw/VajQwOLsMNT1cbJiH2muO3OROCHnbehc8eY5JyD2gqz4AcMHPqgaOR59DjgUjYAYLeH699g/eWi2jw==", + "version": "17.5.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-17.5.0.tgz", + "integrity": "sha512-qoV+HK2yFl/366t2/Cb3+xxPUo5BuMynomoDmiaZBIdbs+0pYbjfZU+twLhGKp4uCZ/+NbtpVepH5bGCxRyy2g==", "dev": true, "license": "MIT", "engines": { @@ -4217,7 +4251,8 @@ "version": "3.15.0", "resolved": "https://registry.npmjs.org/gsap/-/gsap-3.15.0.tgz", "integrity": "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A==", - "license": "Standard 'no charge' license: https://gsap.com/standard-license." + "license": "Standard 'no charge' license: https://gsap.com/standard-license.", + "peer": true }, "node_modules/has-flag": { "version": "4.0.0", @@ -4364,18 +4399,6 @@ "node": ">=0.10.0" } }, - "node_modules/is-extendable/node_modules/is-plain-object": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", - "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", - "license": "MIT", - "dependencies": { - "isobject": "^3.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -4400,10 +4423,13 @@ } }, "node_modules/is-plain-object": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", - "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", + "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", "license": "MIT", + "dependencies": { + "isobject": "^3.0.1" + }, "engines": { "node": ">=0.10.0" } @@ -4532,6 +4558,7 @@ "resolved": "https://registry.npmjs.org/leva/-/leva-0.10.1.tgz", "integrity": "sha512-BcjnfUX8jpmwZUz2L7AfBtF9vn4ggTH33hmeufDULbP3YgNZ/C+ss/oO3stbrqRQyaOmRwy70y7BGTGO81S3rA==", "license": "MIT", + "peer": true, "dependencies": { "@radix-ui/react-portal": "^1.1.4", "@radix-ui/react-tooltip": "^1.1.8", @@ -4550,6 +4577,23 @@ "react-dom": "^18.0.0 || ^19.0.0" } }, + "node_modules/leva/node_modules/zustand": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/zustand/-/zustand-3.7.2.tgz", + "integrity": "sha512-PIJDIZKtokhof+9+60cpockVOq05sJzHCriyvaLBmEJixseQ1a5Kdov6fWZfWOu5SK9c+FhH1jU0tntLxRJYMA==", + "license": "MIT", + "engines": { + "node": ">=12.7.0" + }, + "peerDependencies": { + "react": ">=16.8" + }, + "peerDependenciesMeta": { + "react": { + "optional": true + } + } + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -4565,9 +4609,9 @@ } }, "node_modules/lightningcss": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.31.1.tgz", - "integrity": "sha512-l51N2r93WmGUye3WuFoN5k10zyvrVs0qfKBhyC5ogUQ6Ew6JUSswh78mbSO+IU3nTWsyOArqPCcShdQSadghBQ==", + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", + "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==", "license": "MPL-2.0", "dependencies": { "detect-libc": "^2.0.3" @@ -4580,23 +4624,23 @@ "url": "https://opencollective.com/parcel" }, "optionalDependencies": { - "lightningcss-android-arm64": "1.31.1", - "lightningcss-darwin-arm64": "1.31.1", - "lightningcss-darwin-x64": "1.31.1", - "lightningcss-freebsd-x64": "1.31.1", - "lightningcss-linux-arm-gnueabihf": "1.31.1", - "lightningcss-linux-arm64-gnu": "1.31.1", - "lightningcss-linux-arm64-musl": "1.31.1", - "lightningcss-linux-x64-gnu": "1.31.1", - "lightningcss-linux-x64-musl": "1.31.1", - "lightningcss-win32-arm64-msvc": "1.31.1", - "lightningcss-win32-x64-msvc": "1.31.1" + "lightningcss-android-arm64": "1.32.0", + "lightningcss-darwin-arm64": "1.32.0", + "lightningcss-darwin-x64": "1.32.0", + "lightningcss-freebsd-x64": "1.32.0", + "lightningcss-linux-arm-gnueabihf": "1.32.0", + "lightningcss-linux-arm64-gnu": "1.32.0", + "lightningcss-linux-arm64-musl": "1.32.0", + "lightningcss-linux-x64-gnu": "1.32.0", + "lightningcss-linux-x64-musl": "1.32.0", + "lightningcss-win32-arm64-msvc": "1.32.0", + "lightningcss-win32-x64-msvc": "1.32.0" } }, - "node_modules/lightningcss/node_modules/lightningcss-android-arm64": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.31.1.tgz", - "integrity": "sha512-HXJF3x8w9nQ4jbXRiNppBCqeZPIAfUo8zE/kOEGbW5NZvGc/K7nMxbhIr+YlFlHW5mpbg/YFPdbnCh1wAXCKFg==", + "node_modules/lightningcss-android-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz", + "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==", "cpu": [ "arm64" ], @@ -4613,10 +4657,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-darwin-arm64": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.31.1.tgz", - "integrity": "sha512-02uTEqf3vIfNMq3h/z2cJfcOXnQ0GRwQrkmPafhueLb2h7mqEidiCzkE4gBMEH65abHRiQvhdcQ+aP0D0g67sg==", + "node_modules/lightningcss-darwin-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz", + "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==", "cpu": [ "arm64" ], @@ -4633,10 +4677,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-darwin-x64": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.31.1.tgz", - "integrity": "sha512-1ObhyoCY+tGxtsz1lSx5NXCj3nirk0Y0kB/g8B8DT+sSx4G9djitg9ejFnjb3gJNWo7qXH4DIy2SUHvpoFwfTA==", + "node_modules/lightningcss-darwin-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz", + "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==", "cpu": [ "x64" ], @@ -4653,10 +4697,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-freebsd-x64": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.31.1.tgz", - "integrity": "sha512-1RINmQKAItO6ISxYgPwszQE1BrsVU5aB45ho6O42mu96UiZBxEXsuQ7cJW4zs4CEodPUioj/QrXW1r9pLUM74A==", + "node_modules/lightningcss-freebsd-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz", + "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==", "cpu": [ "x64" ], @@ -4673,10 +4717,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-linux-arm-gnueabihf": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.31.1.tgz", - "integrity": "sha512-OOCm2//MZJ87CdDK62rZIu+aw9gBv4azMJuA8/KB74wmfS3lnC4yoPHm0uXZ/dvNNHmnZnB8XLAZzObeG0nS1g==", + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz", + "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==", "cpu": [ "arm" ], @@ -4693,10 +4737,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-linux-arm64-gnu": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.31.1.tgz", - "integrity": "sha512-WKyLWztD71rTnou4xAD5kQT+982wvca7E6QoLpoawZ1gP9JM0GJj4Tp5jMUh9B3AitHbRZ2/H3W5xQmdEOUlLg==", + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz", + "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==", "cpu": [ "arm64" ], @@ -4713,10 +4757,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-linux-arm64-musl": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.31.1.tgz", - "integrity": "sha512-mVZ7Pg2zIbe3XlNbZJdjs86YViQFoJSpc41CbVmKBPiGmC4YrfeOyz65ms2qpAobVd7WQsbW4PdsSJEMymyIMg==", + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz", + "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==", "cpu": [ "arm64" ], @@ -4733,10 +4777,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-linux-x64-gnu": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.31.1.tgz", - "integrity": "sha512-xGlFWRMl+0KvUhgySdIaReQdB4FNudfUTARn7q0hh/V67PVGCs3ADFjw+6++kG1RNd0zdGRlEKa+T13/tQjPMA==", + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz", + "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==", "cpu": [ "x64" ], @@ -4753,10 +4797,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-linux-x64-musl": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.31.1.tgz", - "integrity": "sha512-eowF8PrKHw9LpoZii5tdZwnBcYDxRw2rRCyvAXLi34iyeYfqCQNA9rmUM0ce62NlPhCvof1+9ivRaTY6pSKDaA==", + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz", + "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==", "cpu": [ "x64" ], @@ -4773,10 +4817,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-win32-arm64-msvc": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.31.1.tgz", - "integrity": "sha512-aJReEbSEQzx1uBlQizAOBSjcmr9dCdL3XuC/6HLXAxmtErsj2ICo5yYggg1qOODQMtnjNQv2UHb9NpOuFtYe4w==", + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz", + "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==", "cpu": [ "arm64" ], @@ -4793,10 +4837,10 @@ "url": "https://opencollective.com/parcel" } }, - "node_modules/lightningcss/node_modules/lightningcss-win32-x64-msvc": { - "version": "1.31.1", - "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.31.1.tgz", - "integrity": "sha512-I9aiFrbd7oYHwlnQDqr1Roz+fTz61oDDJX7n9tYF9FJymH1cIN1DtKw3iYt6b8WZgEjoNwVSncwF4wx/ZedMhw==", + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz", + "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==", "cpu": [ "x64" ], @@ -4953,6 +4997,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": "^20.0.0 || >=22.0.0" } @@ -4965,9 +5010,9 @@ "license": "MIT" }, "node_modules/node-releases": { - "version": "2.0.36", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", - "integrity": "sha512-TdC8FSgHz8Mwtw9g5L4gR/Sh9XhSP/0DEkQxfEFXOpiul5IiHgHan2VhYYb6agDSfp4KuvltmGApc8HMgUrIkA==", + "version": "2.0.38", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz", + "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==", "dev": true, "license": "MIT" }, @@ -5076,10 +5121,11 @@ "license": "ISC" }, "node_modules/picomatch": { - "version": "4.0.3", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", - "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", + "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5088,9 +5134,9 @@ } }, "node_modules/postcss": { - "version": "8.5.8", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz", - "integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==", + "version": "8.5.10", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz", + "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==", "funding": [ { "type": "opencollective", @@ -5147,10 +5193,11 @@ } }, "node_modules/react": { - "version": "19.2.4", - "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", - "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", + "version": "19.2.5", + "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz", + "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -5166,15 +5213,16 @@ } }, "node_modules/react-dom": { - "version": "19.2.4", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", - "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", + "version": "19.2.5", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.5.tgz", + "integrity": "sha512-J5bAZz+DXMMwW/wV3xzKke59Af6CHY7G4uYLN1OvBcKEsWOs4pQExj86BBKamxl/Ik5bx9whOrvBlSDfWzgSag==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, "peerDependencies": { - "react": "^19.2.4" + "react": "^19.2.5" } }, "node_modules/react-dropzone": { @@ -5211,9 +5259,9 @@ } }, "node_modules/react-router": { - "version": "7.14.1", - "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.1.tgz", - "integrity": "sha512-5BCvFskyAAVumqhEKh/iPhLOIkfxcEUz8WqFIARCkMg8hZZzDYX9CtwxXA0e+qT8zAxmMC0x3Ckb9iMONwc5jg==", + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.2.tgz", + "integrity": "sha512-yCqNne6I8IB6rVCH7XUvlBK7/QKyqypBFGv+8dj4QBFJiiRX+FG7/nkdAvGElyvVZ/HQP5N19wzteuTARXi5Gw==", "license": "MIT", "dependencies": { "cookie": "^1.0.1", @@ -5233,12 +5281,12 @@ } }, "node_modules/react-router-dom": { - "version": "7.14.1", - "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.1.tgz", - "integrity": "sha512-ZkrQuwwhGibjQLqH1eCdyiZyLWglPxzxdl5tgwgKEyCSGC76vmAjleGocRe3J/MLfzMUIKwaFJWpFVJhK3d2xA==", + "version": "7.14.2", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.2.tgz", + "integrity": "sha512-YZcM5ES8jJSM+KrJ9BdvHHqlnGTg5tH3sC5ChFRj4inosKctdyzBDhOyyHdGk597q2OT6NTrCA1OvB/YDwfekQ==", "license": "MIT", "dependencies": { - "react-router": "7.14.1" + "react-router": "7.14.2" }, "engines": { "node": ">=20.0.0" @@ -5280,9 +5328,9 @@ "license": "Unlicense" }, "node_modules/rollup": { - "version": "4.59.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.59.0.tgz", - "integrity": "sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==", + "version": "4.60.2", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz", + "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==", "license": "MIT", "dependencies": { "@types/estree": "1.0.8" @@ -5295,31 +5343,31 @@ "npm": ">=8.0.0" }, "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.59.0", - "@rollup/rollup-android-arm64": "4.59.0", - "@rollup/rollup-darwin-arm64": "4.59.0", - "@rollup/rollup-darwin-x64": "4.59.0", - "@rollup/rollup-freebsd-arm64": "4.59.0", - "@rollup/rollup-freebsd-x64": "4.59.0", - "@rollup/rollup-linux-arm-gnueabihf": "4.59.0", - "@rollup/rollup-linux-arm-musleabihf": "4.59.0", - "@rollup/rollup-linux-arm64-gnu": "4.59.0", - "@rollup/rollup-linux-arm64-musl": "4.59.0", - "@rollup/rollup-linux-loong64-gnu": "4.59.0", - "@rollup/rollup-linux-loong64-musl": "4.59.0", - "@rollup/rollup-linux-ppc64-gnu": "4.59.0", - "@rollup/rollup-linux-ppc64-musl": "4.59.0", - "@rollup/rollup-linux-riscv64-gnu": "4.59.0", - "@rollup/rollup-linux-riscv64-musl": "4.59.0", - "@rollup/rollup-linux-s390x-gnu": "4.59.0", - "@rollup/rollup-linux-x64-gnu": "4.59.0", - "@rollup/rollup-linux-x64-musl": "4.59.0", - "@rollup/rollup-openbsd-x64": "4.59.0", - "@rollup/rollup-openharmony-arm64": "4.59.0", - "@rollup/rollup-win32-arm64-msvc": "4.59.0", - "@rollup/rollup-win32-ia32-msvc": "4.59.0", - "@rollup/rollup-win32-x64-gnu": "4.59.0", - "@rollup/rollup-win32-x64-msvc": "4.59.0", + "@rollup/rollup-android-arm-eabi": "4.60.2", + "@rollup/rollup-android-arm64": "4.60.2", + "@rollup/rollup-darwin-arm64": "4.60.2", + "@rollup/rollup-darwin-x64": "4.60.2", + "@rollup/rollup-freebsd-arm64": "4.60.2", + "@rollup/rollup-freebsd-x64": "4.60.2", + "@rollup/rollup-linux-arm-gnueabihf": "4.60.2", + "@rollup/rollup-linux-arm-musleabihf": "4.60.2", + "@rollup/rollup-linux-arm64-gnu": "4.60.2", + "@rollup/rollup-linux-arm64-musl": "4.60.2", + "@rollup/rollup-linux-loong64-gnu": "4.60.2", + "@rollup/rollup-linux-loong64-musl": "4.60.2", + "@rollup/rollup-linux-ppc64-gnu": "4.60.2", + "@rollup/rollup-linux-ppc64-musl": "4.60.2", + "@rollup/rollup-linux-riscv64-gnu": "4.60.2", + "@rollup/rollup-linux-riscv64-musl": "4.60.2", + "@rollup/rollup-linux-s390x-gnu": "4.60.2", + "@rollup/rollup-linux-x64-gnu": "4.60.2", + "@rollup/rollup-linux-x64-musl": "4.60.2", + "@rollup/rollup-openbsd-x64": "4.60.2", + "@rollup/rollup-openharmony-arm64": "4.60.2", + "@rollup/rollup-win32-arm64-msvc": "4.60.2", + "@rollup/rollup-win32-ia32-msvc": "4.60.2", + "@rollup/rollup-win32-x64-gnu": "4.60.2", + "@rollup/rollup-win32-x64-msvc": "4.60.2", "fsevents": "~2.3.2" } }, @@ -5349,6 +5397,15 @@ "postcss": "^8.3.11" } }, + "node_modules/sanitize-html/node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/scheduler": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", @@ -5395,18 +5452,6 @@ "node": ">=0.10.0" } }, - "node_modules/set-value/node_modules/is-plain-object": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-2.0.4.tgz", - "integrity": "sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==", - "license": "MIT", - "dependencies": { - "isobject": "^3.0.1" - }, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -5510,15 +5555,15 @@ } }, "node_modules/tailwindcss": { - "version": "4.2.1", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.1.tgz", - "integrity": "sha512-/tBrSQ36vCleJkAOsy9kbNTgaxvGbyOamC30PRePTQe/o1MFwEKHQk4Cn7BNGaPtjp+PuUrByJehM1hgxfq4sw==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.2.4.tgz", + "integrity": "sha512-HhKppgO81FQof5m6TEnuBWCZGgfRAWbaeOaGT00KOy/Pf/j6oUihdvBpA7ltCeAvZpFhW3j0PTclkxsd4IXYDA==", "license": "MIT" }, "node_modules/tapable": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.0.tgz", - "integrity": "sha512-g9ljZiwki/LfxmQADO3dEY1CbpmXT5Hm2fJ+QaGKwSXUylMybePR7/67YW7jOrrvjEgL1Fmz5kzyAjWVWLlucg==", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.3.tgz", + "integrity": "sha512-uxc/zpqFg6x7C8vOE7lh6Lbda8eEL9zmVm/PLeTPBRhh1xCgdWaQ+J1CUieGpIfm2HdtsUpRv+HshiasBMcc6A==", "license": "MIT", "engines": { "node": ">=6" @@ -5532,16 +5577,17 @@ "version": "0.180.0", "resolved": "https://registry.npmjs.org/three/-/three-0.180.0.tgz", "integrity": "sha512-o+qycAMZrh+TsE01GqWUxUIKR1AL0S8pq7zDkYOQw8GqfX8b8VoCKYUoHbhiX5j+7hr8XsuHDVU6+gkQJQKg9w==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/tinyglobby": { - "version": "0.2.15", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", - "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", + "version": "0.2.16", + "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz", + "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==", "license": "MIT", "dependencies": { "fdir": "^6.5.0", - "picomatch": "^4.0.3" + "picomatch": "^4.0.4" }, "engines": { "node": ">=12.0.0" @@ -5551,9 +5597,9 @@ } }, "node_modules/ts-api-utils": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", - "integrity": "sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==", + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz", + "integrity": "sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA==", "dev": true, "license": "MIT", "engines": { @@ -5597,6 +5643,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5606,16 +5653,16 @@ } }, "node_modules/typescript-eslint": { - "version": "8.57.0", - "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.57.0.tgz", - "integrity": "sha512-W8GcigEMEeB07xEZol8oJ26rigm3+bfPHxHvwbYUlu1fUDsGuQ7Hiskx5xGW/xM4USc9Ephe3jtv7ZYPQntHeA==", + "version": "8.59.0", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-8.59.0.tgz", + "integrity": "sha512-BU3ONW9X+v90EcCH9ZS6LMackcVtxRLlI3XrYyqZIwVSHIk7Qf7bFw1z0M9Q0IUxhTMZCf8piY9hTYaNEIASrw==", "dev": true, "license": "MIT", "dependencies": { - "@typescript-eslint/eslint-plugin": "8.57.0", - "@typescript-eslint/parser": "8.57.0", - "@typescript-eslint/typescript-estree": "8.57.0", - "@typescript-eslint/utils": "8.57.0" + "@typescript-eslint/eslint-plugin": "8.59.0", + "@typescript-eslint/parser": "8.59.0", + "@typescript-eslint/typescript-estree": "8.59.0", + "@typescript-eslint/utils": "8.59.0" }, "engines": { "node": "^18.18.0 || ^20.9.0 || >=21.1.0" @@ -5626,7 +5673,7 @@ }, "peerDependencies": { "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0", - "typescript": ">=4.8.4 <6.0.0" + "typescript": ">=4.8.4 <6.1.0" } }, "node_modules/undici-types": { @@ -5682,6 +5729,7 @@ "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", "license": "MIT", + "peer": true, "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } @@ -5693,10 +5741,11 @@ "license": "MIT" }, "node_modules/vite": { - "version": "7.3.1", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", - "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", + "version": "7.3.2", + "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.2.tgz", + "integrity": "sha512-Bby3NOsna2jsjfLVOHKes8sGwgl4TT0E6vvpYgnAYDIF/tie7MRaFthmKuHx1NSXjiTueXH3do80FMQgvEktRg==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -5818,6 +5867,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -5836,19 +5886,31 @@ } }, "node_modules/zustand": { - "version": "3.7.2", - "resolved": "https://registry.npmjs.org/zustand/-/zustand-3.7.2.tgz", - "integrity": "sha512-PIJDIZKtokhof+9+60cpockVOq05sJzHCriyvaLBmEJixseQ1a5Kdov6fWZfWOu5SK9c+FhH1jU0tntLxRJYMA==", + "version": "5.0.12", + "resolved": "https://registry.npmjs.org/zustand/-/zustand-5.0.12.tgz", + "integrity": "sha512-i77ae3aZq4dhMlRhJVCYgMLKuSiZAaUPAct2AksxQ+gOtimhGMdXljRT21P5BNpeT4kXlLIckvkPM029OljD7g==", "license": "MIT", "engines": { - "node": ">=12.7.0" + "node": ">=12.20.0" }, "peerDependencies": { - "react": ">=16.8" + "@types/react": ">=18.0.0", + "immer": ">=9.0.6", + "react": ">=18.0.0", + "use-sync-external-store": ">=1.2.0" }, "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "immer": { + "optional": true + }, "react": { "optional": true + }, + "use-sync-external-store": { + "optional": true } } } diff --git a/web/package.json b/web/package.json index 8882c5c1c..8dfac7866 100644 --- a/web/package.json +++ b/web/package.json @@ -13,10 +13,15 @@ "preview": "vite preview" }, "dependencies": { - "@nous-research/ui": "^0.3.0", + "@nous-research/ui": "^0.4.0", "@observablehq/plot": "^0.6.17", "@react-three/fiber": "^9.6.0", "@tailwindcss/vite": "^4.2.1", + "@xterm/addon-fit": "^0.11.0", + "@xterm/addon-unicode11": "^0.9.0", + "@xterm/addon-web-links": "^0.12.0", + "@xterm/addon-webgl": "^0.19.0", + "@xterm/xterm": "^6.0.0", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "gsap": "^3.15.0", diff --git a/web/public/fonts-terminal/JetBrainsMono-Bold.woff2 b/web/public/fonts-terminal/JetBrainsMono-Bold.woff2 new file mode 100644 index 000000000..81c5a219d Binary files /dev/null and b/web/public/fonts-terminal/JetBrainsMono-Bold.woff2 differ diff --git a/web/public/fonts-terminal/JetBrainsMono-Italic.woff2 b/web/public/fonts-terminal/JetBrainsMono-Italic.woff2 new file mode 100644 index 000000000..4103d3910 Binary files /dev/null and b/web/public/fonts-terminal/JetBrainsMono-Italic.woff2 differ diff --git a/web/public/fonts-terminal/JetBrainsMono-Regular.woff2 b/web/public/fonts-terminal/JetBrainsMono-Regular.woff2 new file mode 100644 index 000000000..66c54672c Binary files /dev/null and b/web/public/fonts-terminal/JetBrainsMono-Regular.woff2 differ diff --git a/web/src/App.tsx b/web/src/App.tsx index 9c6e3c337..f4285a21b 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -1,50 +1,86 @@ -import { useMemo } from "react"; -import { Routes, Route, NavLink, Navigate } from "react-router-dom"; +import { + useCallback, + useEffect, + useMemo, + useState, + type ComponentType, + type ReactNode, +} from "react"; +import { + Routes, + Route, + NavLink, + Navigate, + useLocation, + useNavigate, +} from "react-router-dom"; import { Activity, BarChart3, + BookOpen, Clock, + Code, + Database, + Download, + Eye, FileText, + Globe, + Heart, KeyRound, + Loader2, + Menu, MessageSquare, Package, - Settings, Puzzle, - Sparkles, - Terminal, - Globe, - Database, + RotateCw, + Settings, Shield, - Wrench, - Zap, - Heart, + Sparkles, Star, - Code, - Eye, + Terminal, + Wrench, + X, + Zap, } from "lucide-react"; -import { Cell, Grid, SelectionSwitcher, Typography } from "@nous-research/ui"; +import { SelectionSwitcher, Typography } from "@nous-research/ui"; import { cn } from "@/lib/utils"; import { Backdrop } from "@/components/Backdrop"; -import StatusPage from "@/pages/StatusPage"; +import { SidebarFooter } from "@/components/SidebarFooter"; +import { SidebarStatusStrip } from "@/components/SidebarStatusStrip"; +import { PageHeaderProvider } from "@/contexts/PageHeaderProvider"; +import { useSystemActions } from "@/contexts/useSystemActions"; +import type { SystemAction } from "@/contexts/system-actions-context"; import ConfigPage from "@/pages/ConfigPage"; +import DocsPage from "@/pages/DocsPage"; import EnvPage from "@/pages/EnvPage"; import SessionsPage from "@/pages/SessionsPage"; import LogsPage from "@/pages/LogsPage"; import AnalyticsPage from "@/pages/AnalyticsPage"; import CronPage from "@/pages/CronPage"; import SkillsPage from "@/pages/SkillsPage"; +import ChatPage from "@/pages/ChatPage"; import { LanguageSwitcher } from "@/components/LanguageSwitcher"; import { ThemeSwitcher } from "@/components/ThemeSwitcher"; import { useI18n } from "@/i18n"; -import { PluginSlot, usePlugins } from "@/plugins"; -import type { RegisteredPlugin } from "@/plugins"; +import { PluginPage, PluginSlot, usePlugins } from "@/plugins"; +import type { PluginManifest } from "@/plugins"; import { useTheme } from "@/themes"; +import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags"; -/** Built-in route → default page component. Used both for standard routing - * and for resolving plugin `tab.override` values. Keys must match the - * `path` in `BUILTIN_NAV` so `/path` lookups stay consistent. */ -const BUILTIN_ROUTES: Record = { - "/": StatusPage, +function RootRedirect() { + return ; +} + +const CHAT_NAV_ITEM: NavItem = { + path: "/chat", + labelKey: "chat", + label: "Chat", + icon: Terminal, +}; + +/** Built-in routes except /chat (only with `hermes dashboard --tui`). */ +const BUILTIN_ROUTES_CORE: Record = { + "/": RootRedirect, "/sessions": SessionsPage, "/analytics": AnalyticsPage, "/logs": LogsPage, @@ -52,10 +88,10 @@ const BUILTIN_ROUTES: Record = { "/skills": SkillsPage, "/config": ConfigPage, "/env": EnvPage, + "/docs": DocsPage, }; -const BUILTIN_NAV: NavItem[] = [ - { path: "/", labelKey: "status", label: "Status", icon: Activity }, +const BUILTIN_NAV_REST: NavItem[] = [ { path: "/sessions", labelKey: "sessions", @@ -73,11 +109,15 @@ const BUILTIN_NAV: NavItem[] = [ { path: "/skills", labelKey: "skills", label: "Skills", icon: Package }, { path: "/config", labelKey: "config", label: "Config", icon: Settings }, { path: "/env", labelKey: "keys", label: "Keys", icon: KeyRound }, + { + path: "/docs", + labelKey: "documentation", + label: "Documentation", + icon: BookOpen, + }, ]; -// Plugins can reference any of these by name in their manifest — keeps bundle -// size sane vs. importing the full lucide-react set. -const ICON_MAP: Record> = { +const ICON_MAP: Record> = { Activity, BarChart3, Clock, @@ -100,24 +140,15 @@ const ICON_MAP: Record> = { Eye, }; -function resolveIcon( - name: string, -): React.ComponentType<{ className?: string }> { +function resolveIcon(name: string): ComponentType<{ className?: string }> { return ICON_MAP[name] ?? Puzzle; } -function buildNavItems( - builtIn: NavItem[], - plugins: RegisteredPlugin[], -): NavItem[] { +function buildNavItems(builtIn: NavItem[], manifests: PluginManifest[]): NavItem[] { const items = [...builtIn]; - for (const { manifest } of plugins) { - // Plugins that replace a built-in route don't add a new tab entry — - // they reuse the existing tab. The nav just lights up the original - // built-in entry when the user visits `/`. + for (const manifest of manifests) { if (manifest.tab.override) continue; - // Hidden plugins register their component + slots but skip the nav. if (manifest.tab.hidden) continue; const pluginItem: NavItem = { @@ -145,54 +176,61 @@ function buildNavItems( return items; } -/** Build the final route table, letting plugins override built-in pages. - * - * Returns (path, Component, key) tuples. Plugins with `tab.override` - * win over both built-ins and other plugins (last registration wins if - * two plugins claim the same override, but we warn in dev). Plugins with - * a regular `tab.path` register alongside built-ins as standalone - * routes. */ function buildRoutes( - plugins: RegisteredPlugin[], -): Array<{ key: string; path: string; Component: React.ComponentType }> { - const overrides = new Map(); - const addons: RegisteredPlugin[] = []; + builtinRoutes: Record, + manifests: PluginManifest[], +): Array<{ + key: string; + path: string; + element: ReactNode; +}> { + const byOverride = new Map(); + const addons: PluginManifest[] = []; - for (const p of plugins) { - if (p.manifest.tab.override) { - overrides.set(p.manifest.tab.override, p); + for (const m of manifests) { + if (m.tab.override) { + byOverride.set(m.tab.override, m); } else { - addons.push(p); + addons.push(m); } } const routes: Array<{ key: string; path: string; - Component: React.ComponentType; + element: ReactNode; }> = []; - for (const [path, Component] of Object.entries(BUILTIN_ROUTES)) { - const override = overrides.get(path); - if (override) { + for (const [path, Component] of Object.entries(builtinRoutes)) { + const om = byOverride.get(path); + if (om) { routes.push({ - key: `override:${override.manifest.name}`, + key: `override:${om.name}`, path, - Component: override.component, + element: , }); } else { - routes.push({ key: `builtin:${path}`, path, Component }); + routes.push({ key: `builtin:${path}`, path, element: }); } } - for (const addon of addons) { - // Don't double-register a plugin that shadows a built-in path via - // `tab.path` — `override` is the supported mechanism for that. - if (BUILTIN_ROUTES[addon.manifest.tab.path]) continue; + for (const m of addons) { + if (m.tab.hidden) continue; + if (builtinRoutes[m.tab.path]) continue; routes.push({ - key: `plugin:${addon.manifest.name}`, - path: addon.manifest.tab.path, - Component: addon.component, + key: `plugin:${m.name}`, + path: m.tab.path, + element: , + }); + } + + for (const m of manifests) { + if (!m.tab.hidden) continue; + if (builtinRoutes[m.tab.path] || m.tab.override) continue; + routes.push({ + key: `plugin:hidden:${m.name}`, + path: m.tab.path, + element: , }); } @@ -201,154 +239,145 @@ function buildRoutes( export default function App() { const { t } = useI18n(); - const { plugins } = usePlugins(); + const { pathname } = useLocation(); + const { manifests } = usePlugins(); const { theme } = useTheme(); + const [mobileOpen, setMobileOpen] = useState(false); + const closeMobile = useCallback(() => setMobileOpen(false), []); + const isDocsRoute = pathname === "/docs" || pathname === "/docs/"; + const normalizedPath = pathname.replace(/\/$/, "") || "/"; + const isChatRoute = normalizedPath === "/chat"; + const embeddedChat = isDashboardEmbeddedChatEnabled(); + + const builtinRoutes = useMemo( + () => ({ + ...BUILTIN_ROUTES_CORE, + ...(embeddedChat ? { "/chat": ChatPage } : {}), + }), + [embeddedChat], + ); + + const builtinNav = useMemo( + () => + embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST, + [embeddedChat], + ); const navItems = useMemo( - () => buildNavItems(BUILTIN_NAV, plugins), - [plugins], + () => buildNavItems(builtinNav, manifests), + [builtinNav, manifests], + ); + const routes = useMemo( + () => buildRoutes(builtinRoutes, manifests), + [builtinRoutes, manifests], + ); + const pluginTabMeta = useMemo( + () => + manifests + .filter((m) => !m.tab.hidden) + .map((m) => ({ + path: m.tab.override ?? m.tab.path, + label: m.label, + })), + [manifests], ); - const routes = useMemo(() => buildRoutes(plugins), [plugins]); const layoutVariant = theme.layoutVariant ?? "standard"; - const showSidebar = layoutVariant === "cockpit"; - // Tiled layout drops the 1600px clamp so pages can use the full viewport; - // standard + cockpit keep the centered reading width. - const mainMaxWidth = layoutVariant === "tiled" ? "max-w-none" : "max-w-[1600px]"; + + useEffect(() => { + if (!mobileOpen) return; + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") setMobileOpen(false); + }; + document.addEventListener("keydown", onKey); + const prevOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + return () => { + document.removeEventListener("keydown", onKey); + document.body.style.overflow = prevOverflow; + }; + }, [mobileOpen]); + + useEffect(() => { + const mql = window.matchMedia("(min-width: 1024px)"); + const onChange = (e: MediaQueryListEvent) => { + if (e.matches) setMobileOpen(false); + }; + mql.addEventListener("change", onChange); + return () => mql.removeEventListener("change", onChange); + }, []); return (
- {/* Themes can style backdrop chrome via `componentStyles.backdrop.*` - CSS vars read by . Plugins can also inject full - components into the backdrop layer via the `backdrop` slot — - useful for scanlines, parallax stars, hero artwork, etc. */}
-
- -
- - - - Hermes -
- Agent -
-
+ - {navItems.map(({ path, label, labelKey, icon: Icon }) => ( - - - cn( - "group relative flex h-full w-full items-center gap-1.5", - "px-2.5 sm:px-4 py-2", - "font-mondwest text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em]", - "whitespace-nowrap transition-colors cursor-pointer", - "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground", - isActive - ? "text-midground" - : "opacity-60 hover:opacity-100", - ) - } - style={{ - clipPath: "var(--component-tab-clip-path)", - }} - > - {({ isActive }) => ( - <> - - - {labelKey - ? ((t.app.nav as Record)[ - labelKey - ] ?? label) - : label} - - - - - {isActive && ( - - )} - - )} - - - ))} -
-
- - - - - - - - {t.app.webUi} - - - -
+ + {t.app.brand} +
- {/* Full-width banner slot under the nav, outside the main clamp — - useful for marquee/alert/status strips themes want to show - above page content. */} + {mobileOpen && ( + +
+ + + + + + + +
+
+ + + +
+
+ + + + + +
+ +
+ + {routes.map(({ key, path, element }) => ( + + ))} + } + /> + +
+ +
+
+ -
- - - - {t.app.footer.name} - - } - /> - - - - {t.app.footer.org} - - } - /> - - -
- - {/* Fixed-position overlay plugins (scanlines, vignettes, etc.) render - above everything else. Each plugin is responsible for its own - pointer-events and z-index. */} ); } +function SidebarSystemActions({ onNavigate }: { onNavigate: () => void }) { + const { t } = useI18n(); + const navigate = useNavigate(); + const { activeAction, isBusy, isRunning, pendingAction, runAction } = + useSystemActions(); + + const items: SystemActionItem[] = [ + { + action: "restart", + icon: RotateCw, + label: t.status.restartGateway, + runningLabel: t.status.restartingGateway, + spin: true, + }, + { + action: "update", + icon: Download, + label: t.status.updateHermes, + runningLabel: t.status.updatingHermes, + spin: false, + }, + ]; + + const handleClick = (action: SystemAction) => { + if (isBusy) return; + void runAction(action); + navigate("/sessions"); + onNavigate(); + }; + + return ( +
+ + {t.app.system} + + + + +
    + {items.map(({ action, icon: Icon, label, runningLabel, spin }) => { + const isPending = pendingAction === action; + const isActionRunning = + activeAction === action && isRunning && !isPending; + const busy = isPending || isActionRunning; + const displayLabel = isActionRunning ? runningLabel : label; + const disabled = isBusy && !busy; + + return ( +
  • + +
  • + ); + })} +
+
+ ); +} + interface NavItem { - icon: React.ComponentType<{ className?: string }>; + icon: ComponentType<{ className?: string }>; label: string; labelKey?: string; path: string; } + +interface SystemActionItem { + action: SystemAction; + icon: ComponentType<{ className?: string }>; + label: string; + runningLabel: string; + spin: boolean; +} diff --git a/web/src/components/ChatSidebar.tsx b/web/src/components/ChatSidebar.tsx new file mode 100644 index 000000000..6bfac9cfa --- /dev/null +++ b/web/src/components/ChatSidebar.tsx @@ -0,0 +1,379 @@ +/** + * ChatSidebar — structured-events panel that sits next to the xterm.js + * terminal in the dashboard Chat tab. + * + * Two WebSockets, one per concern: + * + * 1. **JSON-RPC sidecar** (`GatewayClient` → /api/ws) — drives the + * sidebar's own slot of the dashboard's in-process gateway. Owns + * the model badge / picker / connection state / error banner. + * Independent of the PTY pane's session by design — those are the + * pieces the sidebar needs to be able to drive directly (model + * switch via slash.exec, etc.). + * + * 2. **Event subscriber** (/api/events?channel=…) — passive, receives + * every dispatcher emit from the PTY-side `tui_gateway.entry` that + * the dashboard fanned out. This is how `tool.start/progress/ + * complete` from the agent loop reach the sidebar even though the + * PTY child runs three processes deep from us. The `channel` id + * ties this listener to the same chat tab's PTY child — see + * `ChatPage.tsx` for where the id is generated. + * + * Best-effort throughout: WS failures show in the badge / banner, the + * terminal pane keeps working unimpaired. + */ + +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Card } from "@/components/ui/card"; + +import { ModelPickerDialog } from "@/components/ModelPickerDialog"; +import { ToolCall, type ToolEntry } from "@/components/ToolCall"; +import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient"; + +import { cn } from "@/lib/utils"; +import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react"; +import { useCallback, useEffect, useMemo, useState } from "react"; + +interface SessionInfo { + cwd?: string; + model?: string; + provider?: string; + credential_warning?: string; +} + +interface RpcEnvelope { + method?: string; + params?: { type?: string; payload?: unknown }; +} + +const TOOL_LIMIT = 20; + +const STATE_LABEL: Record = { + idle: "idle", + connecting: "connecting", + open: "live", + closed: "closed", + error: "error", +}; + +const STATE_TONE: Record = { + idle: "bg-muted text-muted-foreground", + connecting: "bg-primary/10 text-primary", + open: "bg-emerald-500/10 text-emerald-500 dark:text-emerald-400", + closed: "bg-muted text-muted-foreground", + error: "bg-destructive/10 text-destructive", +}; + +interface ChatSidebarProps { + channel: string; + className?: string; +} + +export function ChatSidebar({ channel, className }: ChatSidebarProps) { + // `version` bumps on reconnect; gw is derived so we never call setState + // for it inside an effect (React 19's set-state-in-effect rule). The + // counter is the dependency on purpose — it's not read in the memo body, + // it's the signal that says "rebuild the client". + const [version, setVersion] = useState(0); + // eslint-disable-next-line react-hooks/exhaustive-deps + const gw = useMemo(() => new GatewayClient(), [version]); + + const [state, setState] = useState("idle"); + const [sessionId, setSessionId] = useState(null); + const [info, setInfo] = useState({}); + const [tools, setTools] = useState([]); + const [modelOpen, setModelOpen] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + let cancelled = false; + const offState = gw.onState(setState); + + const offSessionInfo = gw.on("session.info", (ev) => { + if (ev.session_id) { + setSessionId(ev.session_id); + } + + if (ev.payload) { + setInfo((prev) => ({ ...prev, ...ev.payload })); + } + }); + + const offError = gw.on<{ message?: string }>("error", (ev) => { + const message = ev.payload?.message; + + if (message) { + setError(message); + } + }); + + // Adopt whichever session the gateway hands us. session.create on the + // sidecar is independent of the PTY pane's session by design — we + // only need a sid to drive the model picker's slash.exec calls. + gw.connect() + .then(() => { + if (cancelled) { + return; + } + return gw.request<{ session_id: string }>("session.create", {}); + }) + .then((created) => { + if (cancelled || !created?.session_id) { + return; + } + setSessionId(created.session_id); + }) + .catch((e: Error) => { + if (!cancelled) { + setError(e.message); + } + }); + + return () => { + cancelled = true; + offState(); + offSessionInfo(); + offError(); + gw.close(); + }; + }, [gw]); + + // Event subscriber WebSocket — receives the rebroadcast of every + // dispatcher emit from the PTY child's gateway. See /api/pub + + // /api/events in hermes_cli/web_server.py for the broadcast hop. + // + // Failures (auth/loopback rejection, server too old to expose the + // endpoint, transient drops) surface in the same banner as the + // JSON-RPC sidecar so the sidebar matches its documented best-effort + // UX and the user always has a reconnect affordance. + useEffect(() => { + const token = window.__HERMES_SESSION_TOKEN__; + + if (!token || !channel) { + return; + } + + const proto = window.location.protocol === "https:" ? "wss:" : "ws:"; + const qs = new URLSearchParams({ token, channel }); + const ws = new WebSocket( + `${proto}//${window.location.host}/api/events?${qs.toString()}`, + ); + + // `unmounting` suppresses the banner during cleanup — `ws.close()` + // from the effect's return fires a close event with code 1005 that + // would otherwise look like an unexpected drop. + const DISCONNECTED = "events feed disconnected — tool calls may not appear"; + let unmounting = false; + const surface = (msg: string) => !unmounting && setError(msg); + + ws.addEventListener("error", () => surface(DISCONNECTED)); + + ws.addEventListener("close", (ev) => { + if (ev.code === 4401 || ev.code === 4403) { + surface(`events feed rejected (${ev.code}) — reload the page`); + } else if (ev.code !== 1000) { + surface(DISCONNECTED); + } + }); + + ws.addEventListener("message", (ev) => { + let frame: RpcEnvelope; + + try { + frame = JSON.parse(ev.data); + } catch { + return; + } + + if (frame.method !== "event" || !frame.params) { + return; + } + + const { type, payload } = frame.params; + + if (type === "tool.start") { + const p = payload as + | { tool_id?: string; name?: string; context?: string } + | undefined; + const toolId = p?.tool_id; + + if (!toolId) { + return; + } + + setTools((prev) => + [ + ...prev, + { + kind: "tool" as const, + id: `tool-${toolId}-${prev.length}`, + tool_id: toolId, + name: p?.name ?? "tool", + context: p?.context, + status: "running" as const, + startedAt: Date.now(), + }, + ].slice(-TOOL_LIMIT), + ); + } else if (type === "tool.progress") { + const p = payload as + | { name?: string; preview?: string } + | undefined; + + if (!p?.name || !p.preview) { + return; + } + + setTools((prev) => + prev.map((t) => + t.status === "running" && t.name === p.name + ? { ...t, preview: p.preview } + : t, + ), + ); + } else if (type === "tool.complete") { + const p = payload as + | { + tool_id?: string; + summary?: string; + error?: string; + inline_diff?: string; + } + | undefined; + + if (!p?.tool_id) { + return; + } + + setTools((prev) => + prev.map((t) => + t.tool_id === p.tool_id + ? { + ...t, + status: p.error ? "error" : "done", + summary: p.summary, + error: p.error, + inline_diff: p.inline_diff, + completedAt: Date.now(), + } + : t, + ), + ); + } + }); + + return () => { + unmounting = true; + ws.close(); + }; + }, [channel, version]); + + const reconnect = useCallback(() => { + setError(null); + setTools([]); + setVersion((v) => v + 1); + }, []); + + // Picker hands us a fully-formed slash command (e.g. "/model anthropic/..."). + // Fire-and-forget through `slash.exec`; the TUI pane will render the result + // via PTY, so the sidebar doesn't need to surface output of its own. + const onModelSubmit = useCallback( + (slashCommand: string) => { + if (!sessionId) { + return; + } + + void gw.request("slash.exec", { + session_id: sessionId, + command: slashCommand, + }); + setModelOpen(false); + }, + [gw, sessionId], + ); + + const canPickModel = state === "open" && !!sessionId; + const modelLabel = (info.model ?? "—").split("/").slice(-1)[0] ?? "—"; + const banner = error ?? info.credential_warning ?? null; + + return ( + + ); +} diff --git a/web/src/components/DeleteConfirmDialog.tsx b/web/src/components/DeleteConfirmDialog.tsx new file mode 100644 index 000000000..9e2e82c68 --- /dev/null +++ b/web/src/components/DeleteConfirmDialog.tsx @@ -0,0 +1,40 @@ +import { ConfirmDialog } from "@/components/ui/confirm-dialog"; +import { useI18n } from "@/i18n"; + +export function DeleteConfirmDialog({ + cancelLabel, + confirmLabel, + description, + loading, + onCancel, + onConfirm, + open, + title, +}: DeleteConfirmDialogProps) { + const { t } = useI18n(); + + return ( + + ); +} + +interface DeleteConfirmDialogProps { + cancelLabel?: string; + confirmLabel?: string; + description?: string; + loading: boolean; + onCancel: () => void; + onConfirm: () => void; + open: boolean; + title: string; +} diff --git a/web/src/components/Markdown.tsx b/web/src/components/Markdown.tsx index b796ff0a7..bef0804e7 100644 --- a/web/src/components/Markdown.tsx +++ b/web/src/components/Markdown.tsx @@ -1,22 +1,50 @@ -import { useMemo } from "react"; +import { useMemo, type ReactNode } from "react"; /** * Lightweight markdown renderer for LLM output. * Handles: code blocks, inline code, bold, italic, headers, links, lists, horizontal rules. * NOT a full CommonMark parser — optimized for typical assistant message patterns. + * + * `streaming` renders a blinking caret at the tail of the last block so it + * appears to hug the final character instead of wrapping onto a new line + * after a block element (paragraph/list/code/…). */ -export function Markdown({ content, highlightTerms }: { content: string; highlightTerms?: string[] }) { +export function Markdown({ + content, + highlightTerms, + streaming, +}: { + content: string; + highlightTerms?: string[]; + streaming?: boolean; +}) { const blocks = useMemo(() => parseBlocks(content), [content]); + const caret = streaming ? : null; return (
{blocks.map((block, i) => ( - + ))} + {blocks.length === 0 && caret}
); } +function StreamingCaret() { + return ( + + ); +} + /* ------------------------------------------------------------------ */ /* Types */ /* ------------------------------------------------------------------ */ @@ -58,7 +86,11 @@ function parseBlocks(text: string): BlockNode[] { // Heading const headingMatch = line.match(/^(#{1,4})\s+(.+)/); if (headingMatch) { - blocks.push({ type: "heading", level: headingMatch[1].length, content: headingMatch[2] }); + blocks.push({ + type: "heading", + level: headingMatch[1].length, + content: headingMatch[2], + }); i++; continue; } @@ -124,12 +156,23 @@ function parseBlocks(text: string): BlockNode[] { /* Block renderer */ /* ------------------------------------------------------------------ */ -function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: string[] }) { +function Block({ + block, + highlightTerms, + caret, +}: { + block: BlockNode; + highlightTerms?: string[]; + caret?: ReactNode; +}) { switch (block.type) { case "code": return (
-          {block.content}
+          
+            {block.content}
+            {caret}
+          
         
); @@ -141,25 +184,46 @@ function Block({ block, highlightTerms }: { block: BlockNode; highlightTerms?: s h3: "text-sm font-semibold", h4: "text-sm font-medium", }; - return ; + return ( + + + {caret} + + ); } case "hr": - return
; + return ( + <> +
+ {caret} + + ); case "list": { const Tag = block.ordered ? "ol" : "ul"; + const last = block.items.length - 1; return ( - + {block.items.map((item, i) => ( -
  • +
  • + + {i === last ? caret : null} +
  • ))}
    ); } case "paragraph": - return

    ; + return ( +

    + + {caret} +

    + ); } } @@ -178,7 +242,8 @@ type InlineNode = function parseInline(text: string): InlineNode[] { const nodes: InlineNode[] = []; // Pattern priority: code > link > bold > italic > bare URL > line break - const pattern = /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g; + const pattern = + /(`[^`]+`)|(\[([^\]]+)\]\(([^)]+)\))|(\*\*([^*]+)\*\*)|(\*([^*]+)\*)|(\bhttps?:\/\/[^\s<>)\]]+)|(\n)/g; let lastIndex = 0; let match: RegExpExecArray | null; @@ -217,7 +282,13 @@ function parseInline(text: string): InlineNode[] { return nodes; } -function InlineContent({ text, highlightTerms }: { text: string; highlightTerms?: string[] }) { +function InlineContent({ + text, + highlightTerms, +}: { + text: string; + highlightTerms?: string[]; +}) { const nodes = useMemo(() => parseInline(text), [text]); return ( @@ -225,17 +296,34 @@ function InlineContent({ text, highlightTerms }: { text: string; highlightTerms? {nodes.map((node, i) => { switch (node.type) { case "text": - return ; + return ( + + ); case "code": return ( - + {node.content} ); case "bold": - return ; + return ( + + + + ); case "italic": - return ; + return ( + + + + ); case "link": return ( {parts.map((part, i) => regex.test(part) ? ( - {part} + + {part} + ) : ( {part} - ) + ), )} ); diff --git a/web/src/components/ModelPickerDialog.tsx b/web/src/components/ModelPickerDialog.tsx new file mode 100644 index 000000000..d30fb8dd6 --- /dev/null +++ b/web/src/components/ModelPickerDialog.tsx @@ -0,0 +1,392 @@ +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import type { GatewayClient } from "@/lib/gatewayClient"; +import { Check, Loader2, Search, X } from "lucide-react"; +import { useEffect, useMemo, useRef, useState } from "react"; + +/** + * Two-stage model picker modal. + * + * Mirrors ui-tui/src/components/modelPicker.tsx: + * Stage 1: pick provider (authenticated providers only) + * Stage 2: pick model within that provider + * + * On confirm, emits `/model --provider [--global]` through + * the parent callback so ChatPage can dispatch it via the existing slash + * pipeline. That keeps persistence + actual switch logic in one place. + */ + +interface ModelOptionProvider { + name: string; + slug: string; + models?: string[]; + total_models?: number; + is_current?: boolean; + warning?: string; +} + +interface ModelOptionsResponse { + model?: string; + provider?: string; + providers?: ModelOptionProvider[]; +} + +interface Props { + gw: GatewayClient; + sessionId: string; + onClose(): void; + /** Parent runs the resulting slash command through slashExec. */ + onSubmit(slashCommand: string): void; +} + +export function ModelPickerDialog({ gw, sessionId, onClose, onSubmit }: Props) { + const [providers, setProviders] = useState([]); + const [currentModel, setCurrentModel] = useState(""); + const [currentProviderSlug, setCurrentProviderSlug] = useState(""); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [selectedSlug, setSelectedSlug] = useState(""); + const [selectedModel, setSelectedModel] = useState(""); + const [query, setQuery] = useState(""); + const [persistGlobal, setPersistGlobal] = useState(false); + const closedRef = useRef(false); + + // Load providers + models on open. + useEffect(() => { + closedRef.current = false; + + gw.request( + "model.options", + sessionId ? { session_id: sessionId } : {}, + ) + .then((r) => { + if (closedRef.current) return; + const next = r?.providers ?? []; + setProviders(next); + setCurrentModel(String(r?.model ?? "")); + setCurrentProviderSlug(String(r?.provider ?? "")); + setSelectedSlug( + (next.find((p) => p.is_current) ?? next[0])?.slug ?? "", + ); + setSelectedModel(""); + setLoading(false); + }) + .catch((e) => { + if (closedRef.current) return; + setError(e instanceof Error ? e.message : String(e)); + setLoading(false); + }); + + return () => { + closedRef.current = true; + }; + }, [gw, sessionId]); + + // Esc closes. + useEffect(() => { + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") { + e.preventDefault(); + onClose(); + } + }; + window.addEventListener("keydown", onKey); + return () => window.removeEventListener("keydown", onKey); + }, [onClose]); + + const selectedProvider = useMemo( + () => providers.find((p) => p.slug === selectedSlug) ?? null, + [providers, selectedSlug], + ); + + const models = useMemo( + () => selectedProvider?.models ?? [], + [selectedProvider], + ); + + const needle = query.trim().toLowerCase(); + + const filteredProviders = useMemo( + () => + !needle + ? providers + : providers.filter( + (p) => + p.name.toLowerCase().includes(needle) || + p.slug.toLowerCase().includes(needle) || + (p.models ?? []).some((m) => m.toLowerCase().includes(needle)), + ), + [providers, needle], + ); + + const filteredModels = useMemo( + () => + !needle ? models : models.filter((m) => m.toLowerCase().includes(needle)), + [models, needle], + ); + + const canConfirm = !!selectedProvider && !!selectedModel; + + const confirm = () => { + if (!canConfirm) return; + const global = persistGlobal ? " --global" : ""; + onSubmit( + `/model ${selectedModel} --provider ${selectedProvider.slug}${global}`, + ); + onClose(); + }; + + return ( +
    e.target === e.currentTarget && onClose()} + role="dialog" + aria-modal="true" + aria-labelledby="model-picker-title" + > +
    + + +
    +

    + Switch Model +

    +

    + current: {currentModel || "(unknown)"} + {currentProviderSlug && ` · ${currentProviderSlug}`} +

    +
    + +
    +
    + + setQuery(e.target.value)} + className="pl-7 h-8 text-sm" + /> +
    +
    + +
    + { + setSelectedSlug(slug); + setSelectedModel(""); + }} + /> + + { + setSelectedModel(m); + // Confirm on next tick so state settles. + window.setTimeout(confirm, 0); + }} + /> +
    + +
    + + +
    + + +
    +
    +
    +
    + ); +} + +/* ------------------------------------------------------------------ */ +/* Provider column */ +/* ------------------------------------------------------------------ */ + +function ProviderColumn({ + loading, + error, + providers, + total, + selectedSlug, + query, + onSelect, +}: { + loading: boolean; + error: string | null; + providers: ModelOptionProvider[]; + total: number; + selectedSlug: string; + query: string; + onSelect(slug: string): void; +}) { + return ( +
    + {loading && ( +
    + loading… +
    + )} + + {error &&
    {error}
    } + + {!loading && !error && providers.length === 0 && ( +
    + {query + ? "no matches" + : total === 0 + ? "no authenticated providers" + : "no matches"} +
    + )} + + {providers.map((p) => { + const active = p.slug === selectedSlug; + return ( + + ); + })} +
    + ); +} + +/* ------------------------------------------------------------------ */ +/* Model column */ +/* ------------------------------------------------------------------ */ + +function ModelColumn({ + provider, + models, + allModels, + selectedModel, + currentModel, + currentProviderSlug, + onSelect, + onConfirm, +}: { + provider: ModelOptionProvider | null; + models: string[]; + allModels: string[]; + selectedModel: string; + currentModel: string; + currentProviderSlug: string; + onSelect(model: string): void; + onConfirm(model: string): void; +}) { + if (!provider) { + return ( +
    +
    + pick a provider → +
    +
    + ); + } + + return ( +
    + {provider.warning && ( +
    + {provider.warning} +
    + )} + + {models.length === 0 ? ( +
    + {allModels.length + ? "no models match your filter" + : "no models listed for this provider"} +
    + ) : ( + models.map((m) => { + const active = m === selectedModel; + const isCurrent = + m === currentModel && provider.slug === currentProviderSlug; + + return ( + + ); + }) + )} +
    + ); +} + +function CurrentTag() { + return ( + + current + + ); +} diff --git a/web/src/components/PlatformsCard.tsx b/web/src/components/PlatformsCard.tsx new file mode 100644 index 000000000..c0412e400 --- /dev/null +++ b/web/src/components/PlatformsCard.tsx @@ -0,0 +1,97 @@ +import { AlertTriangle, Radio, Wifi, WifiOff } from "lucide-react"; +import type { PlatformStatus } from "@/lib/api"; +import { isoTimeAgo } from "@/lib/utils"; +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { useI18n } from "@/i18n"; + +export function PlatformsCard({ platforms }: PlatformsCardProps) { + const { t } = useI18n(); + const platformStateBadge: Record< + string, + { variant: "success" | "warning" | "destructive"; label: string } + > = { + connected: { variant: "success", label: t.status.connected }, + disconnected: { variant: "warning", label: t.status.disconnected }, + fatal: { variant: "destructive", label: t.status.error }, + }; + + return ( + + +
    + + + {t.status.connectedPlatforms} + +
    +
    + + + {platforms.map(([name, info]) => { + const display = platformStateBadge[info.state] ?? { + variant: "outline" as const, + label: info.state, + }; + const IconComponent = + info.state === "connected" + ? Wifi + : info.state === "fatal" + ? AlertTriangle + : WifiOff; + + return ( +
    +
    + + +
    + + {name} + + + {info.error_message && ( + + {info.error_message} + + )} + + {info.updated_at && ( + + {t.status.lastUpdate}: {isoTimeAgo(info.updated_at)} + + )} +
    +
    + + + {display.variant === "success" && ( + + )} + {display.label} + +
    + ); + })} +
    +
    + ); +} + +interface PlatformsCardProps { + platforms: [string, PlatformStatus][]; +} diff --git a/web/src/components/SidebarFooter.tsx b/web/src/components/SidebarFooter.tsx new file mode 100644 index 000000000..e28623d72 --- /dev/null +++ b/web/src/components/SidebarFooter.tsx @@ -0,0 +1,40 @@ +import { Typography } from "@nous-research/ui"; +import { useSidebarStatus } from "@/hooks/useSidebarStatus"; +import { cn } from "@/lib/utils"; +import { useI18n } from "@/i18n"; + +export function SidebarFooter() { + const status = useSidebarStatus(); + const { t } = useI18n(); + + return ( +
    + ); +} diff --git a/web/src/components/SidebarStatusStrip.tsx b/web/src/components/SidebarStatusStrip.tsx new file mode 100644 index 000000000..b96603cec --- /dev/null +++ b/web/src/components/SidebarStatusStrip.tsx @@ -0,0 +1,70 @@ +import { Link } from "react-router-dom"; +import type { StatusResponse } from "@/lib/api"; +import { useSidebarStatus } from "@/hooks/useSidebarStatus"; +import { cn } from "@/lib/utils"; +import { useI18n } from "@/i18n"; + +/** Gateway + session summary for the System sidebar block (no separate strip chrome). */ +export function SidebarStatusStrip() { + const status = useSidebarStatus(); + const { t } = useI18n(); + + if (status === null) { + return ( +
    +
    +
    + ); + } + + const gw = gatewayLine(status, t); + const { activeSessionsLabel, gatewayStatusLabel } = t.app; + + return ( + +
    +

    + {gatewayStatusLabel}{" "} + {gw.label} +

    + +

    + {activeSessionsLabel}{" "} + + {status.active_sessions} + +

    +
    + + ); +} + +function gatewayLine( + status: StatusResponse, + t: ReturnType["t"], +): { label: string; tone: string } { + const g = t.app.gatewayStrip; + const byState: Record = { + running: { label: g.running, tone: "text-success" }, + starting: { label: g.starting, tone: "text-warning" }, + startup_failed: { label: g.failed, tone: "text-destructive" }, + stopped: { label: g.stopped, tone: "text-muted-foreground" }, + }; + if (status.gateway_state && byState[status.gateway_state]) { + return byState[status.gateway_state]; + } + return status.gateway_running + ? { label: g.running, tone: "text-success" } + : { label: g.off, tone: "text-muted-foreground" }; +} diff --git a/web/src/components/SlashPopover.tsx b/web/src/components/SlashPopover.tsx new file mode 100644 index 000000000..1c4b273b3 --- /dev/null +++ b/web/src/components/SlashPopover.tsx @@ -0,0 +1,174 @@ +import type { GatewayClient } from "@/lib/gatewayClient"; +import { ChevronRight } from "lucide-react"; +import { + forwardRef, + useCallback, + useEffect, + useImperativeHandle, + useRef, + useState, +} from "react"; + +/** + * Slash-command autocomplete popover, rendered above the composer in ChatPage. + * Mirrors the completion UX of the Ink TUI — type `/`, see matching commands, + * arrow keys or click to select, Tab to apply, Enter to submit. + * + * The parent owns all keyboard handling via `ref.handleKey`, which returns + * true when the popover consumed the event, so the composer's Enter/arrow + * logic stays in one place. + */ + +export interface CompletionItem { + display: string; + text: string; + meta?: string; +} + +export interface SlashPopoverHandle { + /** Returns true if the key was consumed by the popover. */ + handleKey(e: React.KeyboardEvent): boolean; +} + +interface Props { + input: string; + gw: GatewayClient | null; + onApply(nextInput: string): void; +} + +interface CompletionResponse { + items?: CompletionItem[]; + replace_from?: number; +} + +const DEBOUNCE_MS = 60; + +export const SlashPopover = forwardRef( + function SlashPopover({ input, gw, onApply }, ref) { + const [items, setItems] = useState([]); + const [selected, setSelected] = useState(0); + const [replaceFrom, setReplaceFrom] = useState(1); + const lastInputRef = useRef(""); + + // Debounced completion fetch. We never clear `items` in the effect body + // (doing so would flag react-hooks/set-state-in-effect); instead the + // render guard below hides stale items once the input stops matching. + useEffect(() => { + const trimmed = input ?? ""; + + if (!gw || !trimmed.startsWith("/") || trimmed === lastInputRef.current) { + if (!trimmed.startsWith("/")) lastInputRef.current = ""; + return; + } + lastInputRef.current = trimmed; + + const timer = window.setTimeout(async () => { + if (lastInputRef.current !== trimmed) return; + try { + const r = await gw.request("complete.slash", { + text: trimmed, + }); + if (lastInputRef.current !== trimmed) return; + setItems(r?.items ?? []); + setReplaceFrom(r?.replace_from ?? 1); + setSelected(0); + } catch { + if (lastInputRef.current === trimmed) setItems([]); + } + }, DEBOUNCE_MS); + + return () => window.clearTimeout(timer); + }, [input, gw]); + + const apply = useCallback( + (item: CompletionItem) => { + onApply(input.slice(0, replaceFrom) + item.text); + }, + [input, replaceFrom, onApply], + ); + + // Only consume keys when the popover is actually visible. Stale items from + // a previous slash prefix are ignored once the user deletes the "/". + const visible = items.length > 0 && input.startsWith("/"); + + useImperativeHandle( + ref, + () => ({ + handleKey: (e) => { + if (!visible) return false; + + switch (e.key) { + case "ArrowDown": + e.preventDefault(); + setSelected((s) => (s + 1) % items.length); + return true; + + case "ArrowUp": + e.preventDefault(); + setSelected((s) => (s - 1 + items.length) % items.length); + return true; + + case "Tab": { + e.preventDefault(); + const item = items[selected]; + if (item) apply(item); + return true; + } + + case "Escape": + e.preventDefault(); + setItems([]); + return true; + + default: + return false; + } + }, + }), + [visible, items, selected, apply], + ); + + if (!visible) return null; + + return ( +
    + {items.map((it, i) => { + const active = i === selected; + + return ( + + ); + })} +
    + ); + }, +); diff --git a/web/src/components/ThemeSwitcher.tsx b/web/src/components/ThemeSwitcher.tsx index b3475bf46..778afc21e 100644 --- a/web/src/components/ThemeSwitcher.tsx +++ b/web/src/components/ThemeSwitcher.tsx @@ -11,8 +11,12 @@ import { cn } from "@/lib/utils"; * glow) so users can preview the palette before committing. User-defined * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in * `BUILTIN_THEMES` render without swatches and apply the default palette. + * + * When placed at the bottom of a container (e.g. the sidebar rail), pass + * `dropUp` so the menu opens above the trigger instead of clipping below + * the viewport. */ -export function ThemeSwitcher() { +export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { const { themeName, availableThemes, setTheme } = useTheme(); const { t } = useI18n(); const [open, setOpen] = useState(false); @@ -73,7 +77,8 @@ export function ThemeSwitcher() { role="listbox" aria-label={t.theme?.title ?? "Theme"} className={cn( - "absolute right-0 top-full mt-1 z-50 min-w-[240px]", + "absolute z-50 min-w-[240px]", + dropUp ? "left-0 bottom-full mb-1" : "right-0 top-full mt-1", "border border-current/20 bg-background-base/95 backdrop-blur-sm", "shadow-[0_12px_32px_-8px_rgba(0,0,0,0.6)]", )} @@ -166,3 +171,7 @@ function PlaceholderSwatch() { /> ); } + +interface ThemeSwitcherProps { + dropUp?: boolean; +} diff --git a/web/src/components/ToolCall.tsx b/web/src/components/ToolCall.tsx new file mode 100644 index 000000000..8ac1ebce6 --- /dev/null +++ b/web/src/components/ToolCall.tsx @@ -0,0 +1,228 @@ +import { + AlertCircle, + Check, + ChevronDown, + ChevronRight, + Zap, +} from "lucide-react"; +import { useEffect, useState } from "react"; + +/** + * Expandable tool call row — the web equivalent of Ink's ToolTrail node. + * + * Renders one `tool.start` + `tool.complete` pair (plus any `tool.progress` + * in between) as a single collapsible item in the transcript: + * + * ▸ ● read_file(path=/foo) 2.3s + * + * Click the header to reveal a preformatted body with context (args), the + * streaming preview (while running), and the final summary or error. Error + * rows auto-expand so failures aren't silently collapsed. + */ + +export interface ToolEntry { + kind: "tool"; + id: string; + tool_id: string; + name: string; + context?: string; + preview?: string; + summary?: string; + error?: string; + inline_diff?: string; + status: "running" | "done" | "error"; + startedAt: number; + completedAt?: number; +} + +const STATUS_TONE: Record = { + running: "border-primary/40 bg-primary/[0.04]", + done: "border-border bg-muted/20", + error: "border-destructive/50 bg-destructive/[0.04]", +}; + +const BULLET_TONE: Record = { + running: "text-primary", + done: "text-primary/80", + error: "text-destructive", +}; + +const TICK_MS = 500; + +export function ToolCall({ tool }: { tool: ToolEntry }) { + // `open` is derived: errors default-expanded, everything else collapsed. + // `null` means "follow the default"; any explicit bool is the user's override. + // This lets a running tool flip to expanded automatically when it errors, + // without mirroring state in an effect. + const [userOverride, setUserOverride] = useState(null); + const open = userOverride ?? tool.status === "error"; + + // Tick `now` while the tool is running so the elapsed label updates live. + const [now, setNow] = useState(() => Date.now()); + useEffect(() => { + if (tool.status !== "running") return; + const id = window.setInterval(() => setNow(() => Date.now()), TICK_MS); + return () => window.clearInterval(id); + }, [tool.status]); + + // Historical tools (hydrated from session.resume) signal missing timestamps + // with `startedAt === 0`; we hide the elapsed badge for those rather than + // rendering a misleading "0ms". + const hasTimestamps = tool.startedAt > 0; + const elapsed = hasTimestamps + ? fmtElapsed((tool.completedAt ?? now) - tool.startedAt) + : null; + + const hasBody = !!( + tool.context || + tool.preview || + tool.summary || + tool.error || + tool.inline_diff + ); + + const Chevron = open ? ChevronDown : ChevronRight; + + return ( +
    + + + {open && hasBody && ( +
    + {tool.context &&
    {tool.context}
    } + + {tool.preview && tool.status === "running" && ( +
    + {tool.preview} + +
    + )} + + {tool.inline_diff && ( +
    +
    +                {colorizeDiff(tool.inline_diff)}
    +              
    +
    + )} + + {tool.summary && ( +
    + + {tool.summary} + +
    + )} + + {tool.error && ( +
    + + {tool.error} + +
    + )} +
    + )} +
    + ); +} + +function Section({ + label, + children, + tone, +}: { + label: string; + children: React.ReactNode; + tone?: "error"; +}) { + return ( +
    + + {label} + + +
    {children}
    +
    + ); +} + +function fmtElapsed(ms: number): string { + const sec = Math.max(0, ms) / 1000; + if (sec < 1) return `${Math.round(ms)}ms`; + if (sec < 10) return `${sec.toFixed(1)}s`; + if (sec < 60) return `${Math.round(sec)}s`; + + const m = Math.floor(sec / 60); + const s = Math.round(sec % 60); + return s ? `${m}m ${s}s` : `${m}m`; +} + +/** Colorize unified-diff lines for the inline diff section. */ +function colorizeDiff(diff: string): React.ReactNode { + return diff.split("\n").map((line, i) => ( +
    + {line || "\u00A0"} +
    + )); +} + +function diffLineClass(line: string): string { + if (line.startsWith("+") && !line.startsWith("+++")) + return "text-emerald-500 dark:text-emerald-400"; + if (line.startsWith("-") && !line.startsWith("---")) + return "text-destructive"; + if (line.startsWith("@@")) return "text-primary"; + return "text-muted-foreground/80"; +} diff --git a/web/src/components/ui/button.tsx b/web/src/components/ui/button.tsx index f8e10a6cf..8f2f27206 100644 --- a/web/src/components/ui/button.tsx +++ b/web/src/components/ui/button.tsx @@ -1,7 +1,7 @@ import { cva, type VariantProps } from "class-variance-authority"; import { cn } from "@/lib/utils"; -const buttonVariants = cva( +export const buttonVariants = cva( "inline-flex items-center justify-center gap-2 whitespace-nowrap font-mondwest text-xs tracking-[0.1em] uppercase transition-colors cursor-pointer" + " disabled:pointer-events-none disabled:opacity-50", { diff --git a/web/src/components/ui/confirm-dialog.tsx b/web/src/components/ui/confirm-dialog.tsx new file mode 100644 index 000000000..48e58264f --- /dev/null +++ b/web/src/components/ui/confirm-dialog.tsx @@ -0,0 +1,138 @@ +import { useEffect, useRef } from "react"; +import { createPortal } from "react-dom"; +import { AlertTriangle } from "lucide-react"; +import { cn } from "@/lib/utils"; +import { Button } from "@/components/ui/button"; + +export function ConfirmDialog({ + cancelLabel = "Cancel", + confirmLabel = "Confirm", + description, + destructive = false, + loading = false, + onCancel, + onConfirm, + open, + title, +}: ConfirmDialogProps) { + const dialogRef = useRef(null); + + // Focus the confirm button when opened; trap ESC to cancel. + useEffect(() => { + if (!open) return; + + const prevActive = document.activeElement as HTMLElement | null; + dialogRef.current + ?.querySelector("[data-confirm]") + ?.focus(); + + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") { + e.preventDefault(); + onCancel(); + } + }; + + document.addEventListener("keydown", onKey); + const prevOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + + return () => { + document.removeEventListener("keydown", onKey); + document.body.style.overflow = prevOverflow; + prevActive?.focus?.(); + }; + }, [open, onCancel]); + + if (!open) return null; + + return createPortal( +
    { + if (e.target === e.currentTarget) onCancel(); + }} + className={cn( + "fixed inset-0 z-50 flex items-center justify-center", + "bg-black/60 backdrop-blur-sm", + "animate-[fade-in_150ms_ease-out]", + )} + > +
    +
    + {destructive && ( +
    + +
    + )} + +
    +

    + {title} +

    + + {description && ( +

    + {description} +

    + )} +
    +
    + +
    + + +
    +
    +
    , + document.body, + ); +} + +interface ConfirmDialogProps { + cancelLabel?: string; + confirmLabel?: string; + description?: string; + destructive?: boolean; + loading?: boolean; + onCancel: () => void; + onConfirm: () => void; + open: boolean; + title: string; +} diff --git a/web/src/components/ui/segmented.tsx b/web/src/components/ui/segmented.tsx new file mode 100644 index 000000000..eb4346e9e --- /dev/null +++ b/web/src/components/ui/segmented.tsx @@ -0,0 +1,80 @@ +import { cn } from "@/lib/utils"; + +export function Segmented({ + className, + onChange, + options, + size = "sm", + value, +}: SegmentedProps) { + return ( +
    + {options.map((opt) => { + const active = opt.value === value; + + return ( + + ); + })} +
    + ); +} + +export function FilterGroup({ + children, + className, + label, +}: FilterGroupProps) { + return ( +
    + + {label} + + {children} +
    + ); +} + +interface FilterGroupProps { + children: React.ReactNode; + className?: string; + label: string; +} + +interface SegmentedOption { + label: string; + value: T; +} + +interface SegmentedProps { + className?: string; + onChange: (value: T) => void; + options: SegmentedOption[]; + size?: "sm" | "md"; + value: T; +} diff --git a/web/src/components/ui/switch.tsx b/web/src/components/ui/switch.tsx index fe36c7755..ad2031277 100644 --- a/web/src/components/ui/switch.tsx +++ b/web/src/components/ui/switch.tsx @@ -5,15 +5,18 @@ export function Switch({ onCheckedChange, className, disabled, + id, }: { checked: boolean; onCheckedChange: (v: boolean) => void; className?: string; disabled?: boolean; + id?: string; }) { return ( + ))} +
    + + , + ); + return () => { + setAfterTitle(null); + setEnd(null); + }; + }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]); + useEffect(() => { load(); }, [load]); return (
    - {/* Period selector */} -
    - {t.analytics.period} - {PERIODS.map((p) => ( - - ))} -
    - {loading && !data && (
    diff --git a/web/src/pages/ChatPage.tsx b/web/src/pages/ChatPage.tsx new file mode 100644 index 000000000..639c6324f --- /dev/null +++ b/web/src/pages/ChatPage.tsx @@ -0,0 +1,743 @@ +/** + * ChatPage — embeds `hermes --tui` inside the dashboard. + * + *
    (dashboard chrome) . + * └─
    (rounded, dark bg, padded — the "terminal window" . + * look that gives the page a distinct visual identity) . + * └─ @xterm/xterm Terminal (WebGL renderer, Unicode 11 widths) . + * │ onData keystrokes → WebSocket → PTY master . + * │ onResize terminal resize → `\x1b[RESIZE:cols;rows]` . + * │ write(data) PTY output bytes → VT100 parser . + * ▼ . + * WebSocket /api/pty?token= . + * ▼ . + * FastAPI pty_ws (hermes_cli/web_server.py) . + * ▼ . + * POSIX PTY → `node ui-tui/dist/entry.js` → tui_gateway + AIAgent . + */ + +import { FitAddon } from "@xterm/addon-fit"; +import { Unicode11Addon } from "@xterm/addon-unicode11"; +import { WebLinksAddon } from "@xterm/addon-web-links"; +import { WebglAddon } from "@xterm/addon-webgl"; +import { Terminal } from "@xterm/xterm"; +import "@xterm/xterm/css/xterm.css"; +import { Typography } from "@nous-research/ui"; +import { cn } from "@/lib/utils"; +import { Copy, PanelRight, X } from "lucide-react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { createPortal } from "react-dom"; +import { useSearchParams } from "react-router-dom"; + +import { ChatSidebar } from "@/components/ChatSidebar"; +import { usePageHeader } from "@/contexts/usePageHeader"; +import { useI18n } from "@/i18n"; + +function buildWsUrl( + token: string, + resume: string | null, + channel: string, +): string { + const proto = window.location.protocol === "https:" ? "wss:" : "ws:"; + const qs = new URLSearchParams({ token, channel }); + if (resume) qs.set("resume", resume); + return `${proto}//${window.location.host}/api/pty?${qs.toString()}`; +} + +// Channel id ties this chat tab's PTY child (publisher) to its sidebar +// (subscriber). Generated once per mount so a tab refresh starts a fresh +// channel — the previous PTY child terminates with the old WS, and its +// channel auto-evicts when no subscribers remain. +function generateChannelId(): string { + if (typeof crypto !== "undefined" && "randomUUID" in crypto) { + return crypto.randomUUID(); + } + return `chat-${Math.random().toString(36).slice(2)}-${Date.now().toString(36)}`; +} + +// Colors for the terminal body. Matches the dashboard's dark teal canvas +// with cream foreground — we intentionally don't pick monokai or a loud +// theme, because the TUI's skin engine already paints the content; the +// terminal chrome just needs to sit quietly inside the dashboard. +const TERMINAL_THEME = { + background: "#0d2626", + foreground: "#f0e6d2", + cursor: "#f0e6d2", + cursorAccent: "#0d2626", + selectionBackground: "#f0e6d244", +}; + +/** + * CSS width for xterm font tiers. + * + * Prefer the terminal host's `clientWidth` — Chrome DevTools device mode often + * keeps `window.innerWidth` at the full desktop value while the *drawn* layout + * is phone-sized, which made us pick desktop font sizes (~14px) and look huge. + */ +function terminalTierWidthPx(host: HTMLElement | null): number { + if (typeof window === "undefined") return 1280; + const fromHost = host?.clientWidth ?? 0; + if (fromHost > 2) return Math.round(fromHost); + const doc = document.documentElement?.clientWidth ?? 0; + const vv = window.visualViewport; + const inner = window.innerWidth; + const vvw = vv?.width ?? inner; + const layout = Math.min(inner, vvw, doc > 0 ? doc : inner); + return Math.max(1, Math.round(layout)); +} + +function terminalFontSizeForWidth(layoutWidthPx: number): number { + if (layoutWidthPx < 300) return 7; + if (layoutWidthPx < 360) return 8; + if (layoutWidthPx < 420) return 9; + if (layoutWidthPx < 520) return 10; + if (layoutWidthPx < 720) return 11; + if (layoutWidthPx < 1024) return 12; + return 14; +} + +function terminalLineHeightForWidth(layoutWidthPx: number): number { + return layoutWidthPx < 1024 ? 1.02 : 1.15; +} + +export default function ChatPage() { + const hostRef = useRef(null); + const termRef = useRef(null); + const fitRef = useRef(null); + const wsRef = useRef(null); + const [searchParams] = useSearchParams(); + // Lazy-init: the missing-token check happens at construction so the effect + // body doesn't have to setState (React 19's set-state-in-effect rule). + const [banner, setBanner] = useState(() => + typeof window !== "undefined" && !window.__HERMES_SESSION_TOKEN__ + ? "Session token unavailable. Open this page through `hermes dashboard`, not directly." + : null, + ); + const [copyState, setCopyState] = useState<"idle" | "copied">("idle"); + const copyResetRef = useRef | null>(null); + const [mobilePanelOpen, setMobilePanelOpen] = useState(false); + const { setEnd } = usePageHeader(); + const { t } = useI18n(); + const closeMobilePanel = useCallback(() => setMobilePanelOpen(false), []); + const modelToolsLabel = useMemo( + () => `${t.app.modelToolsSheetTitle} ${t.app.modelToolsSheetSubtitle}`, + [t.app.modelToolsSheetSubtitle, t.app.modelToolsSheetTitle], + ); + const [portalRoot] = useState(() => + typeof document !== "undefined" ? document.body : null, + ); + const [narrow, setNarrow] = useState(() => + typeof window !== "undefined" + ? window.matchMedia("(max-width: 1023px)").matches + : false, + ); + + const resumeRef = useRef(searchParams.get("resume")); + const channel = useMemo(() => generateChannelId(), []); + + useEffect(() => { + const mql = window.matchMedia("(max-width: 1023px)"); + const sync = () => setNarrow(mql.matches); + sync(); + mql.addEventListener("change", sync); + return () => mql.removeEventListener("change", sync); + }, []); + + useEffect(() => { + if (!mobilePanelOpen) return; + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") closeMobilePanel(); + }; + document.addEventListener("keydown", onKey); + const prevOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + return () => { + document.removeEventListener("keydown", onKey); + document.body.style.overflow = prevOverflow; + }; + }, [mobilePanelOpen, closeMobilePanel]); + + useEffect(() => { + const mql = window.matchMedia("(min-width: 1024px)"); + const onChange = (e: MediaQueryListEvent) => { + if (e.matches) setMobilePanelOpen(false); + }; + mql.addEventListener("change", onChange); + return () => mql.removeEventListener("change", onChange); + }, []); + + useEffect(() => { + if (!narrow) { + setEnd(null); + return; + } + setEnd( + , + ); + return () => setEnd(null); + }, [narrow, mobilePanelOpen, modelToolsLabel, setEnd]); + + const handleCopyLast = () => { + const ws = wsRef.current; + if (!ws || ws.readyState !== WebSocket.OPEN) return; + // Send the slash as a burst, wait long enough for Ink's tokenizer to + // emit a keypress event for each character (not coalesce them into a + // paste), then send Return as its own event. The timing here is + // empirical — 100ms is safely past Node's default stdin coalescing + // window and well inside UI responsiveness. + ws.send("/copy"); + setTimeout(() => { + const s = wsRef.current; + if (s && s.readyState === WebSocket.OPEN) s.send("\r"); + }, 100); + setCopyState("copied"); + if (copyResetRef.current) clearTimeout(copyResetRef.current); + copyResetRef.current = setTimeout(() => setCopyState("idle"), 1500); + termRef.current?.focus(); + }; + + useEffect(() => { + const host = hostRef.current; + if (!host) return; + + const token = window.__HERMES_SESSION_TOKEN__; + // Banner already initialised above; just bail before wiring xterm/WS. + if (!token) { + return; + } + + const tierW0 = terminalTierWidthPx(host); + const term = new Terminal({ + allowProposedApi: true, + cursorBlink: true, + fontFamily: + "'JetBrains Mono', 'Cascadia Mono', 'Fira Code', 'MesloLGS NF', 'Source Code Pro', Menlo, Consolas, 'DejaVu Sans Mono', monospace", + fontSize: terminalFontSizeForWidth(tierW0), + lineHeight: terminalLineHeightForWidth(tierW0), + letterSpacing: 0, + fontWeight: "400", + fontWeightBold: "700", + macOptionIsMeta: true, + scrollback: 0, + theme: TERMINAL_THEME, + }); + termRef.current = term; + + // --- Clipboard integration --------------------------------------- + // + // Three independent paths all route to the system clipboard: + // + // 1. **Selection → Ctrl+C (or Cmd+C on macOS).** Ink's own handler + // in useInputHandlers.ts turns Ctrl+C into a copy when the + // terminal has a selection, then emits an OSC 52 escape. Our + // OSC 52 handler below decodes that escape and writes to the + // browser clipboard — so the flow works just like it does in + // `hermes --tui`. + // + // 2. **Ctrl/Cmd+Shift+C.** Belt-and-suspenders shortcut that + // operates directly on xterm's selection, useful if the TUI + // ever stops listening (e.g. overlays / pickers) or if the user + // has selected with the mouse outside of Ink's selection model. + // + // 3. **Ctrl/Cmd+Shift+V.** Reads the system clipboard and feeds + // it to the terminal as keyboard input. xterm's paste() wraps + // it with bracketed-paste if the host has that mode enabled. + // + // OSC 52 reads (terminal asking to read the clipboard) are not + // supported — that would let any content the TUI renders exfiltrate + // the user's clipboard. + term.parser.registerOscHandler(52, (data) => { + // Format: ";" + const semi = data.indexOf(";"); + if (semi < 0) return false; + const payload = data.slice(semi + 1); + if (payload === "?" || payload === "") return false; // read/clear — ignore + try { + // atob returns a binary string (one byte per char); we need UTF-8 + // decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip + // correctly. Without this step, the three UTF-8 bytes of `≥` + // would land in the clipboard as the three separate Latin-1 + // characters `≥`. + const binary = atob(payload); + const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0)); + const text = new TextDecoder("utf-8").decode(bytes); + navigator.clipboard.writeText(text).catch(() => {}); + } catch { + // Malformed base64 — silently drop. + } + return true; + }); + + const isMac = + typeof navigator !== "undefined" && /Mac/i.test(navigator.platform); + + term.attachCustomKeyEventHandler((ev) => { + if (ev.type !== "keydown") return true; + + const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey; + const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey; + + if (copyModifier && ev.key.toLowerCase() === "c") { + const sel = term.getSelection(); + if (sel) { + navigator.clipboard.writeText(sel).catch(() => {}); + ev.preventDefault(); + return false; + } + } + + if (pasteModifier && ev.key.toLowerCase() === "v") { + navigator.clipboard + .readText() + .then((text) => { + if (text) term.paste(text); + }) + .catch(() => {}); + ev.preventDefault(); + return false; + } + + return true; + }); + + const fit = new FitAddon(); + fitRef.current = fit; + term.loadAddon(fit); + + const unicode11 = new Unicode11Addon(); + term.loadAddon(unicode11); + term.unicode.activeVersion = "11"; + + term.loadAddon(new WebLinksAddon()); + + term.open(host); + + // WebGL draws from a texture atlas sized with device pixels. On phones and + // in DevTools device mode that often produces *visually* much larger cells + // than `fontSize` suggests — users see "huge" text even at 7–9px settings. + // The canvas/DOM renderer tracks `fontSize` faithfully; use it for narrow + // hosts. Wide layouts still get WebGL for crisp box-drawing. + const useWebgl = terminalTierWidthPx(host) >= 768; + if (useWebgl) { + try { + const webgl = new WebglAddon(); + webgl.onContextLoss(() => webgl.dispose()); + term.loadAddon(webgl); + } catch (err) { + console.warn( + "[hermes-chat] WebGL renderer unavailable; falling back to default", + err, + ); + } + } + + // Initial fit + resize observer. fit.fit() reads the container's + // current bounding box and resizes the terminal grid to match. + // + // The subtle bit: the dashboard has CSS transitions on the container + // (backdrop fade-in, rounded corners settling as fonts load). If we + // call fit() at mount time, the bounding box we measure is often 1-2 + // cell widths off from the final size. ResizeObserver *does* fire + // when the container settles, but if the pixel delta happens to be + // smaller than one cell's width, fit() computes the same integer + // (cols, rows) as before and doesn't emit onResize — so the PTY + // never learns the final size. Users see truncated long lines until + // they resize the browser window. + // + // We force one extra fit + explicit RESIZE send after two animation + // frames. rAF→rAF guarantees one layout commit between the two + // callbacks, giving CSS transitions and font metrics time to finalize + // before we take the authoritative measurement. + let hostSyncRaf = 0; + const scheduleHostSync = () => { + if (hostSyncRaf) return; + hostSyncRaf = requestAnimationFrame(() => { + hostSyncRaf = 0; + syncTerminalMetrics(); + }); + }; + + let metricsDebounce: ReturnType | null = null; + const syncTerminalMetrics = () => { + const w = terminalTierWidthPx(host); + const nextSize = terminalFontSizeForWidth(w); + const nextLh = terminalLineHeightForWidth(w); + const fontChanged = + term.options.fontSize !== nextSize || + term.options.lineHeight !== nextLh; + if (fontChanged) { + term.options.fontSize = nextSize; + term.options.lineHeight = nextLh; + } + try { + fit.fit(); + } catch { + return; + } + if (fontChanged && term.rows > 0) { + try { + term.refresh(0, term.rows - 1); + } catch { + /* ignore */ + } + } + if ( + fontChanged && + wsRef.current && + wsRef.current.readyState === WebSocket.OPEN + ) { + wsRef.current.send(`\x1b[RESIZE:${term.cols};${term.rows}]`); + } + }; + + const scheduleSyncTerminalMetrics = () => { + if (metricsDebounce) clearTimeout(metricsDebounce); + metricsDebounce = setTimeout(() => { + metricsDebounce = null; + syncTerminalMetrics(); + }, 60); + }; + + const ro = new ResizeObserver(() => scheduleHostSync()); + ro.observe(host); + + window.addEventListener("resize", scheduleSyncTerminalMetrics); + window.visualViewport?.addEventListener("resize", scheduleSyncTerminalMetrics); + window.visualViewport?.addEventListener("scroll", scheduleSyncTerminalMetrics); + scheduleHostSync(); + requestAnimationFrame(() => scheduleHostSync()); + + // Double-rAF authoritative fit. On the second frame the layout has + // committed at least once since mount; fit.fit() then reads the + // stable container size. We always send a RESIZE escape afterwards + // (even if fit's cols/rows didn't change, so the PTY has the same + // dims registered as our JS state — prevents a drift where Ink + // thinks the terminal is one col bigger than what's on screen). + let settleRaf1 = 0; + let settleRaf2 = 0; + settleRaf1 = requestAnimationFrame(() => { + settleRaf1 = 0; + settleRaf2 = requestAnimationFrame(() => { + settleRaf2 = 0; + syncTerminalMetrics(); + }); + }); + + // WebSocket + const url = buildWsUrl(token, resumeRef.current, channel); + const ws = new WebSocket(url); + ws.binaryType = "arraybuffer"; + wsRef.current = ws; + // Suppress banner/terminal side-effects when cleanup() calls `ws.close()` + // (React StrictMode remount, route change) so we never write to a + // disposed xterm or setState on an unmounted tree. + let unmounting = false; + + ws.onopen = () => { + setBanner(null); + // Send the initial RESIZE immediately so Ink has *a* size to lay + // out against on its first paint. The double-rAF block above will + // follow up with the authoritative measurement — at worst Ink + // reflows once after the PTY boots, which is imperceptible. + ws.send(`\x1b[RESIZE:${term.cols};${term.rows}]`); + }; + + ws.onmessage = (ev) => { + if (typeof ev.data === "string") { + term.write(ev.data); + } else { + term.write(new Uint8Array(ev.data as ArrayBuffer)); + } + }; + + ws.onclose = (ev) => { + wsRef.current = null; + if (unmounting) { + return; + } + if (ev.code === 4401) { + setBanner("Auth failed. Reload the page to refresh the session token."); + return; + } + if (ev.code === 4403) { + setBanner("Chat is only reachable from localhost."); + return; + } + if (ev.code === 1011) { + // Server already wrote an ANSI error frame. + return; + } + term.write("\r\n\x1b[90m[session ended]\x1b[0m\r\n"); + }; + + // Keystrokes + mouse events → PTY, with cell-level dedup for motion. + // + // Ink enables `\x1b[?1003h` (any-motion tracking), which asks the + // terminal to report every mouse-move as an SGR mouse event even with + // no button held. xterm.js happily emits one report per pixel of + // mouse motion; without deduping, a casual mouse-over floods Ink with + // hundreds of redraw-triggering reports and the UI goes laggy + // (scrolling stutters, clicks land on stale positions by the time + // Ink finishes processing the motion backlog). + // + // We keep track of the last cell we reported a motion for. Press, + // release, and wheel events always pass through; motion events only + // pass through if the cell changed. Parsing is cheap — SGR reports + // are short literal strings. + // eslint-disable-next-line no-control-regex -- intentional ESC byte in xterm SGR mouse report parser + const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/; + let lastMotionCell = { col: -1, row: -1 }; + let lastMotionCb = -1; + const onDataDisposable = term.onData((data) => { + if (ws.readyState !== WebSocket.OPEN) return; + + const m = SGR_MOUSE_RE.exec(data); + if (m) { + const cb = parseInt(m[1], 10); + const col = parseInt(m[2], 10); + const row = parseInt(m[3], 10); + const released = m[4] === "m"; + // Motion events have bit 0x20 (32) set in the button code. + // Wheel events have bit 0x40 (64); always forward wheel. + const isMotion = (cb & 0x20) !== 0 && (cb & 0x40) === 0; + const isWheel = (cb & 0x40) !== 0; + if (isMotion && !isWheel && !released) { + if ( + col === lastMotionCell.col && + row === lastMotionCell.row && + cb === lastMotionCb + ) { + return; // same cell + same button state; skip redundant report + } + lastMotionCell = { col, row }; + lastMotionCb = cb; + } else { + // Non-motion event (press, release, wheel) — reset dedup state + // so the next motion after this always reports. + lastMotionCell = { col: -1, row: -1 }; + lastMotionCb = -1; + } + } + + ws.send(data); + }); + + const onResizeDisposable = term.onResize(({ cols, rows }) => { + if (ws.readyState === WebSocket.OPEN) { + ws.send(`\x1b[RESIZE:${cols};${rows}]`); + } + }); + + term.focus(); + + return () => { + unmounting = true; + onDataDisposable.dispose(); + onResizeDisposable.dispose(); + if (metricsDebounce) clearTimeout(metricsDebounce); + window.removeEventListener("resize", scheduleSyncTerminalMetrics); + window.visualViewport?.removeEventListener( + "resize", + scheduleSyncTerminalMetrics, + ); + window.visualViewport?.removeEventListener( + "scroll", + scheduleSyncTerminalMetrics, + ); + ro.disconnect(); + if (hostSyncRaf) cancelAnimationFrame(hostSyncRaf); + if (settleRaf1) cancelAnimationFrame(settleRaf1); + if (settleRaf2) cancelAnimationFrame(settleRaf2); + ws.close(); + wsRef.current = null; + term.dispose(); + termRef.current = null; + fitRef.current = null; + if (copyResetRef.current) { + clearTimeout(copyResetRef.current); + copyResetRef.current = null; + } + }; + }, [channel]); + + // Layout: + // outer flex column — sits inside the dashboard's content area + // row split — terminal pane (flex-1) + sidebar (fixed width, lg+) + // terminal wrapper — rounded, dark, padded — the "terminal window" + // floating copy button — bottom-right corner, transparent with a + // subtle border; stays out of the way until hovered. Sends + // `/copy\n` to Ink, which emits OSC 52 → our clipboard handler. + // sidebar — ChatSidebar opens its own JSON-RPC sidecar; renders + // model badge, tool-call list, model picker. Best-effort: if the + // sidecar fails to connect the terminal pane keeps working. + // + // `normal-case` opts out of the dashboard's global `uppercase` rule on + // the root `
    ` in App.tsx — terminal output must preserve case. + // + // Mobile model/tools sheet is portaled to `document.body` so it stacks + // above the app sidebar (`z-50`) and mobile chrome (`z-40`). The main + // dashboard column uses `relative z-2`, which traps `position:fixed` + // descendants below those layers (see Toast.tsx). + const mobileModelToolsPortal = + narrow && + portalRoot && + createPortal( + <> + {mobilePanelOpen && ( + +
    + +
    + +
    +
    + , + portalRoot, + ); + + return ( +
    + {mobileModelToolsPortal} + + {banner && ( +
    + {banner} +
    + )} + +
    +
    +
    + + +
    + + {!narrow && ( + + )} +
    +
    + ); +} + +declare global { + interface Window { + __HERMES_SESSION_TOKEN__?: string; + } +} diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx index b72f0dcdb..80cef29e4 100644 --- a/web/src/pages/ConfigPage.tsx +++ b/web/src/pages/ConfigPage.tsx @@ -1,4 +1,4 @@ -import { useEffect, useRef, useState, useMemo } from "react"; +import { useEffect, useLayoutEffect, useRef, useState, useMemo } from "react"; import { Code, Download, @@ -8,7 +8,6 @@ import { Search, Upload, X, - ChevronRight, Settings2, FileText, Settings, @@ -27,6 +26,7 @@ import { MessageCircle, Wrench, FileQuestion, + Filter, } from "lucide-react"; import { api } from "@/lib/api"; import { getNestedValue, setNestedValue } from "@/lib/nested"; @@ -38,6 +38,7 @@ import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Badge } from "@/components/ui/badge"; import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; /* ------------------------------------------------------------------ */ /* Helpers */ @@ -85,6 +86,35 @@ export default function ConfigPage() { const { toast, showToast } = useToast(); const fileInputRef = useRef(null); const { t } = useI18n(); + const { setEnd } = usePageHeader(); + + useLayoutEffect(() => { + if (!config || !schema) { + setEnd(null); + return; + } + setEnd( +
    + + setSearchQuery(e.target.value)} + /> + {searchQuery && ( + + )} +
    , + ); + return () => setEnd(null); + }, [config, schema, searchQuery, setEnd, t.common.search]); function prettyCategoryName(cat: string): string { const key = cat as keyof typeof t.config.categories; @@ -366,62 +396,66 @@ export default function ConfigPage() { ) : ( /* ═══════════════ Form Mode ═══════════════ */ -
    - {/* ---- Sidebar — horizontal scroll on mobile, fixed column on sm+ ---- */} -
    -
    - {/* Search */} -
    - - setSearchQuery(e.target.value)} - /> - {searchQuery && ( - - )} -
    +
    + {/* ---- Filter panel ---- */} + {/* ---- Content ---- */}
    diff --git a/web/src/pages/CronPage.tsx b/web/src/pages/CronPage.tsx index 5db9bac41..10fba6913 100644 --- a/web/src/pages/CronPage.tsx +++ b/web/src/pages/CronPage.tsx @@ -1,9 +1,11 @@ -import { useEffect, useState } from "react"; +import { useCallback, useEffect, useState } from "react"; import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react"; import { H2 } from "@nous-research/ui"; import { api } from "@/lib/api"; import type { CronJob } from "@/lib/api"; +import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog"; import { useToast } from "@/hooks/useToast"; +import { useConfirmDelete } from "@/hooks/useConfirmDelete"; import { Toast } from "@/components/Toast"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; @@ -40,17 +42,17 @@ export default function CronPage() { const [deliver, setDeliver] = useState("local"); const [creating, setCreating] = useState(false); - const loadJobs = () => { + const loadJobs = useCallback(() => { api .getCronJobs() .then(setJobs) .catch(() => showToast(t.common.loading, "error")) .finally(() => setLoading(false)); - }; + }, [showToast, t.common.loading]); useEffect(() => { loadJobs(); - }, []); + }, [loadJobs]); const handleCreate = async () => { if (!prompt.trim() || !schedule.trim()) { @@ -113,18 +115,25 @@ export default function CronPage() { } }; - const handleDelete = async (job: CronJob) => { - try { - await api.deleteCronJob(job.id); - showToast( - `${t.common.delete}: "${job.name || job.prompt.slice(0, 30)}"`, - "success", - ); - loadJobs(); - } catch (e) { - showToast(`${t.status.error}: ${e}`, "error"); - } - }; + const jobDelete = useConfirmDelete({ + onDelete: useCallback( + async (id: string) => { + const job = jobs.find((j) => j.id === id); + try { + await api.deleteCronJob(id); + showToast( + `${t.common.delete}: "${job?.name || (job?.prompt ?? "").slice(0, 30) || id}"`, + "success", + ); + loadJobs(); + } catch (e) { + showToast(`${t.status.error}: ${e}`, "error"); + throw e; + } + }, + [jobs, loadJobs, showToast, t.common.delete, t.status.error], + ), + }); if (loading) { return ( @@ -134,10 +143,27 @@ export default function CronPage() { ); } + const pendingJob = jobDelete.pendingId + ? jobs.find((j) => j.id === jobDelete.pendingId) + : null; + return (
    + + {/* Create new job form */} @@ -311,7 +337,7 @@ export default function CronPage() { size="icon" title={t.common.delete} aria-label={t.common.delete} - onClick={() => handleDelete(job)} + onClick={() => jobDelete.requestDelete(job.id)} > diff --git a/web/src/pages/DocsPage.tsx b/web/src/pages/DocsPage.tsx new file mode 100644 index 000000000..5861aeccc --- /dev/null +++ b/web/src/pages/DocsPage.tsx @@ -0,0 +1,54 @@ +import { useLayoutEffect } from "react"; +import { ExternalLink } from "lucide-react"; +import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; +import { buttonVariants } from "@/components/ui/button"; +import { cn } from "@/lib/utils"; + +export const HERMES_DOCS_URL = "https://hermes-agent.nousresearch.com/docs/"; + +export default function DocsPage() { + const { t } = useI18n(); + const { setEnd } = usePageHeader(); + + useLayoutEffect(() => { + setEnd( + + + {t.app.openDocumentation} + , + ); + return () => { + setEnd(null); + }; + }, [setEnd, t]); + + return ( +
    +