From ef5eaf8d8757a5e75fa571042abc287430192f53 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:48:50 -0700 Subject: [PATCH 001/220] feat(cron): honor `hermes tools` config for the cron platform (#14798) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cron now resolves its toolset from the same per-platform config the gateway uses — `_get_platform_tools(cfg, 'cron')` — instead of blindly loading every default toolset. Existing cron jobs without a per-job override automatically lose `moa`, `homeassistant`, and `rl` (the `_DEFAULT_OFF_TOOLSETS` set), which stops the "surprise $4.63 mixture_of_agents run" class of bug (Norbert, Discord). Precedence inside `run_job`: 1. per-job `enabled_toolsets` (PR #14767 / #6130) — wins if set 2. `_get_platform_tools(cfg, 'cron')` — new, the blanket gate 3. `None` fallback (legacy) — only on resolver exception Changes: - hermes_cli/platforms.py: register 'cron' with default_toolset 'hermes-cron' - toolsets.py: add 'hermes-cron' toolset (mirrors 'hermes-cli'; `_get_platform_tools` then filters via `_DEFAULT_OFF_TOOLSETS`) - cron/scheduler.py: add `_resolve_cron_enabled_toolsets(job, cfg)`, call it at the `AIAgent(...)` kwargs site - tests/cron/test_scheduler.py: replace the 'None when not set' test (outdated contract) with an invariant ('moa not in default cron toolset') + new per-job-wins precedence test - tests/hermes_cli/test_tools_config.py: mark 'cron' as non-messaging in the gateway-toolset-coverage test --- cron/scheduler.py | 33 +++++++++++++++++++- hermes_cli/platforms.py | 1 + tests/cron/test_scheduler.py | 44 +++++++++++++++++++++++++-- tests/hermes_cli/test_tools_config.py | 2 +- toolsets.py | 13 +++++++- 5 files changed, 88 insertions(+), 5 deletions(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index 9797703744..d051a7ab36 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now logger = logging.getLogger(__name__) + +def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None: + """Resolve the toolset list for a cron job. + + Precedence: + 1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update). + Keeps the agent's job-scoped toolset override intact — #6130. + 2. Per-platform ``hermes tools`` config for the ``cron`` platform. + Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``) + so users can gate cron toolsets globally without recreating every job. + 3. ``None`` on any lookup failure — AIAgent loads the full default set + (legacy behavior before this change, preserved as the safety net). + + _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by + ``_get_platform_tools`` for unconfigured platforms, so fresh installs + get cron WITHOUT ``moa`` by default (issue reported by Norbert — + surprise $4.63 run). + """ + per_job = job.get("enabled_toolsets") + if per_job: + return per_job + try: + from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load + return sorted(_get_platform_tools(cfg or {}, "cron")) + except Exception as exc: + logger.warning( + "Cron toolset resolution failed, falling back to full default toolset: %s", + exc, + ) + return None + # Valid delivery platforms — used to validate user-supplied platform names # in cron delivery targets, preventing env var enumeration via crafted names. _KNOWN_DELIVERY_PLATFORMS = frozenset({ @@ -886,7 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), provider_sort=pr.get("sort"), - enabled_toolsets=job.get("enabled_toolsets") or None, + enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg), disabled_toolsets=["cronjob", "messaging", "clarify"], quiet_mode=True, skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd diff --git a/hermes_cli/platforms.py b/hermes_cli/platforms.py index 1fc3a3a850..05507eaced 100644 --- a/hermes_cli/platforms.py +++ b/hermes_cli/platforms.py @@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([ ("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")), ("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")), ("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")), + ("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")), ]) diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 421d6859d9..4cd4b7cd75 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -710,7 +710,15 @@ class TestRunJobSessionPersistence: kwargs = mock_agent_cls.call_args.kwargs assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"] - def test_run_job_enabled_toolsets_none_when_not_set(self, tmp_path): + def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path): + """When a job has no explicit enabled_toolsets, the scheduler now + resolves them from ``hermes tools`` platform config for ``cron`` + (PR #14xxx — blanket fix for Norbert's surprise ``moa`` run). + + The legacy "pass None → AIAgent loads full default" path is still + reachable, but only when ``_get_platform_tools`` raises (safety net + for any unexpected config shape). + """ job = { "id": "no-toolset-job", "name": "test", @@ -725,7 +733,39 @@ class TestRunJobSessionPersistence: run_job(job) kwargs = mock_agent_cls.call_args.kwargs - assert kwargs["enabled_toolsets"] is None + # Resolution happened — not None, is a list. + assert isinstance(kwargs["enabled_toolsets"], list) + # The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS + # (``moa``, ``homeassistant``, ``rl``) removed. The most important + # invariant: ``moa`` is NOT in the default cron toolset, so a cron + # run cannot accidentally spin up frontier models. + assert "moa" not in kwargs["enabled_toolsets"] + + def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path): + """Per-job enabled_toolsets (via cronjob tool) always take precedence + over the platform-level ``hermes tools`` config.""" + job = { + "id": "override-job", + "name": "test", + "prompt": "hello", + "enabled_toolsets": ["terminal"], + } + fake_db, patches = self._make_run_job_patches(tmp_path) + # Even if the user has ``hermes tools`` configured to enable web+file + # for cron, the per-job override wins. + with patches[0], patches[1], patches[2], patches[3], patches[4], \ + patch("run_agent.AIAgent") as mock_agent_cls, \ + patch( + "hermes_cli.tools_config._get_platform_tools", + return_value={"web", "file"}, + ): + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + run_job(job) + + kwargs = mock_agent_cls.call_args.kwargs + assert kwargs["enabled_toolsets"] == ["terminal"] def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path): """Empty final_response should stay empty for delivery logic (issue #2234). diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index 9fb2745acd..b134fc98b3 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency: gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"]) # Exclude non-messaging platforms from the check - non_messaging = {"cli", "api_server"} + non_messaging = {"cli", "api_server", "cron"} for platform, meta in PLATFORMS.items(): if platform in non_messaging: continue diff --git a/toolsets.py b/toolsets.py index f1dc7fca1c..975d8883c2 100644 --- a/toolsets.py +++ b/toolsets.py @@ -295,7 +295,18 @@ TOOLSETS = { "tools": _HERMES_CORE_TOOLS, "includes": [] }, - + + "hermes-cron": { + # Mirrors hermes-cli so cron's "default" toolset is the same set of + # core tools users see interactively — then `hermes tools` filters + # them down per the platform config. _DEFAULT_OFF_TOOLSETS (moa, + # homeassistant, rl) are excluded by _get_platform_tools() unless + # the user explicitly enables them. + "description": "Default cron toolset - same core tools as hermes-cli; gated by `hermes tools`", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + "hermes-telegram": { "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)", "tools": _HERMES_CORE_TOOLS, From 24f139e16a6fa800d3cdf96f95fe3b586c36da18 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:44:13 -0600 Subject: [PATCH 002/220] fix(mcp): rewrite definitions refs to in input schemas --- tests/tools/test_mcp_tool.py | 66 ++++++++++++++++++++++++++++++++++++ tools/mcp_tool.py | 31 ++++++++++++++--- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index da46348ea8..c70d1a5335 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -120,6 +120,72 @@ class TestSchemaConversion: assert schema["parameters"] == {"type": "object", "properties": {}} + def test_definitions_refs_are_rewritten_to_defs(self): + from tools.mcp_tool import _convert_mcp_schema + + mcp_tool = _make_mcp_tool( + name="submit", + description="Submit a payload", + input_schema={ + "type": "object", + "properties": { + "input": {"$ref": "#/definitions/Payload"}, + }, + "required": ["input"], + "definitions": { + "Payload": { + "type": "object", + "properties": { + "query": {"type": "string"}, + }, + "required": ["query"], + } + }, + }, + ) + + schema = _convert_mcp_schema("forms", mcp_tool) + + assert schema["parameters"]["properties"]["input"]["$ref"] == "#/$defs/Payload" + assert "$defs" in schema["parameters"] + assert "definitions" not in schema["parameters"] + + def test_nested_definition_refs_are_rewritten_recursively(self): + from tools.mcp_tool import _convert_mcp_schema + + mcp_tool = _make_mcp_tool( + name="nested", + description="Nested schema", + input_schema={ + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"$ref": "#/definitions/Entry"}, + }, + }, + "definitions": { + "Entry": { + "type": "object", + "properties": { + "child": {"$ref": "#/definitions/Child"}, + }, + }, + "Child": { + "type": "object", + "properties": { + "value": {"type": "string"}, + }, + }, + }, + }, + ) + + schema = _convert_mcp_schema("forms", mcp_tool) + + assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry" + assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child" + def test_tool_name_prefix_format(self): from tools.mcp_tool import _convert_mcp_schema diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index efef5ea91a..58bd6cd112 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -2019,14 +2019,37 @@ def _make_check_fn(server_name: str): # --------------------------------------------------------------------------- def _normalize_mcp_input_schema(schema: dict | None) -> dict: - """Normalize MCP input schemas for LLM tool-calling compatibility.""" + """Normalize MCP input schemas for LLM tool-calling compatibility. + + MCP servers can emit plain JSON Schema with ``definitions`` / + ``#/definitions/...`` references. Kimi / Moonshot rejects that form and + requires local refs to point into ``#/$defs/...`` instead. Normalize the + common draft-07 shape here so MCP tool schemas remain portable across + OpenAI-compatible providers. + """ if not schema: return {"type": "object", "properties": {}} - if schema.get("type") == "object" and "properties" not in schema: - return {**schema, "properties": {}} + def _rewrite_local_refs(node): + if isinstance(node, dict): + normalized = {} + for key, value in node.items(): + out_key = "$defs" if key == "definitions" else key + normalized[out_key] = _rewrite_local_refs(value) + ref = normalized.get("$ref") + if isinstance(ref, str) and ref.startswith("#/definitions/"): + normalized["$ref"] = "#/$defs/" + ref[len("#/definitions/"):] + return normalized + if isinstance(node, list): + return [_rewrite_local_refs(item) for item in node] + return node - return schema + normalized = _rewrite_local_refs(schema) + + if normalized.get("type") == "object" and "properties" not in normalized: + return {**normalized, "properties": {}} + + return normalized def sanitize_mcp_name_component(value: str) -> str: From e26c4f0e343536d0b39f7fda076d6bc11e210863 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:11:57 -0700 Subject: [PATCH 003/220] fix(kimi,mcp): Moonshot schema sanitizer + MCP schema robustness (#14805) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a broader class of 'tools.function.parameters is not a valid moonshot flavored json schema' errors on Nous / OpenRouter aggregators routing to moonshotai/kimi-k2.6 with MCP tools loaded. ## Moonshot sanitizer (agent/moonshot_schema.py, new) Model-name-routed (not base-URL-routed) so Nous / OpenRouter users are covered alongside api.moonshot.ai. Applied in ChatCompletionsTransport.build_kwargs when is_moonshot_model(model). Two repairs: 1. Fill missing 'type' on every property / items / anyOf-child schema node (structural walk — only schema-position dicts are touched, not container maps like properties/$defs). 2. Strip 'type' at anyOf parents; Moonshot rejects it. ## MCP normalizer hardened (tools/mcp_tool.py) Draft-07 $ref rewrite from PR #14802 now also does: - coerce missing / null 'type' on object-shaped nodes (salvages #4897) - prune 'required' arrays to names that exist in 'properties' (salvages #4651; Gemini 400s on dangling required) - apply recursively, not just top-level These repairs are provider-agnostic so the same MCP schema is valid on OpenAI, Anthropic, Gemini, and Moonshot in one pass. ## Crash fix: safe getattr for Tool.inputSchema _convert_mcp_schema now uses getattr(t, 'inputSchema', None) so MCP servers whose Tool objects omit the attribute entirely no longer abort registration (salvages #3882). ## Validation - tests/agent/test_moonshot_schema.py: 27 new tests (model detection, missing-type fill, anyOf-parent strip, non-mutation, real-world MCP shape) - tests/tools/test_mcp_tool.py: 7 new tests (missing / null type, required pruning, nested repair, safe getattr) - tests/agent/transports/test_chat_completions.py: 2 new integration tests (Moonshot route sanitizes, non-Moonshot route doesn't) - Targeted suite: 49 passed - E2E via execute_code with a realistic MCP tool carrying all three Moonshot rejection modes + dangling required + draft-07 refs: sanitizer produces a schema valid on Moonshot and Gemini --- agent/moonshot_schema.py | 190 +++++++++++++ agent/transports/chat_completions.py | 6 + tests/agent/test_moonshot_schema.py | 254 ++++++++++++++++++ .../agent/transports/test_chat_completions.py | 50 ++++ tests/tools/test_mcp_tool.py | 105 ++++++++ tools/mcp_tool.py | 61 ++++- 6 files changed, 663 insertions(+), 3 deletions(-) create mode 100644 agent/moonshot_schema.py create mode 100644 tests/agent/test_moonshot_schema.py diff --git a/agent/moonshot_schema.py b/agent/moonshot_schema.py new file mode 100644 index 0000000000..08585bab4c --- /dev/null +++ b/agent/moonshot_schema.py @@ -0,0 +1,190 @@ +"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset. + +Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI +tool calling. Requests that violate it fail with HTTP 400: + + tools.function.parameters is not a valid moonshot flavored json schema, + details: <...> + +Known rejection modes documented at +https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102 +and MoonshotAI/kimi-cli#1595: + +1. Every property schema must carry a ``type``. Standard JSON Schema allows + type to be omitted (the value is then unconstrained); Moonshot refuses. +2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not + the parent. Presence of both causes "type should be defined in anyOf + items instead of the parent schema". + +The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is +handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it +applies at MCP registration time for all providers. +""" + +from __future__ import annotations + +import copy +from typing import Any, Dict, List + +# Keys whose values are maps of name → schema (not schemas themselves). +# When we recurse, we walk the values of these maps as schemas, but we do +# NOT apply the missing-type repair to the map itself. +_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"}) + +# Keys whose values are lists of schemas. +_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"}) + +# Keys whose values are a single nested schema. +_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"}) + + +def _repair_schema(node: Any, is_schema: bool = True) -> Any: + """Recursively apply Moonshot repairs to a schema node. + + ``is_schema=True`` means this dict is a JSON Schema node and gets the + missing-type + anyOf-parent repairs applied. ``is_schema=False`` means + it's a container map (e.g. the value of ``properties``) and we only + recurse into its values. + """ + if isinstance(node, list): + # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so + # every element is itself a schema. + return [_repair_schema(item, is_schema=True) for item in node] + if not isinstance(node, dict): + return node + + # Walk the dict, deciding per-key whether recursion is into a schema + # node, a container map, or a scalar. + repaired: Dict[str, Any] = {} + for key, value in node.items(): + if key in _SCHEMA_MAP_KEYS and isinstance(value, dict): + # Map of name → schema. Don't treat the map itself as a schema + # (it has no type / properties of its own), but each value is. + repaired[key] = { + sub_key: _repair_schema(sub_val, is_schema=True) + for sub_key, sub_val in value.items() + } + elif key in _SCHEMA_LIST_KEYS and isinstance(value, list): + repaired[key] = [_repair_schema(v, is_schema=True) for v in value] + elif key in _SCHEMA_NODE_KEYS: + # items / not / additionalProperties: single nested schema. + # additionalProperties can also be a bool — leave those alone. + if isinstance(value, dict): + repaired[key] = _repair_schema(value, is_schema=True) + else: + repaired[key] = value + else: + # Scalars (description, title, format, enum values, etc.) pass through. + repaired[key] = value + + if not is_schema: + return repaired + + # Rule 2: when anyOf is present, type belongs only on the children. + if "anyOf" in repaired and isinstance(repaired["anyOf"], list): + repaired.pop("type", None) + return repaired + + # Rule 1: property schemas without type need one. $ref nodes are exempt + # — their type comes from the referenced definition. + if "$ref" in repaired: + return repaired + return _fill_missing_type(repaired) + + +def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]: + """Infer a reasonable ``type`` if this schema node has none.""" + if "type" in node and node["type"] not in (None, ""): + return node + + # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum`` + # → type of first enum value, else fall back to ``string`` (safest scalar). + if "properties" in node or "required" in node or "additionalProperties" in node: + inferred = "object" + elif "items" in node or "prefixItems" in node: + inferred = "array" + elif "enum" in node and isinstance(node["enum"], list) and node["enum"]: + sample = node["enum"][0] + if isinstance(sample, bool): + inferred = "boolean" + elif isinstance(sample, int): + inferred = "integer" + elif isinstance(sample, float): + inferred = "number" + else: + inferred = "string" + else: + inferred = "string" + + return {**node, "type": inferred} + + +def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]: + """Normalize tool parameters to a Moonshot-compatible object schema. + + Returns a deep-copied schema with the two flavored-JSON-Schema repairs + applied. Input is not mutated. + """ + if not isinstance(parameters, dict): + return {"type": "object", "properties": {}} + + repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True) + if not isinstance(repaired, dict): + return {"type": "object", "properties": {}} + + # Top-level must be an object schema + if repaired.get("type") != "object": + repaired["type"] = "object" + if "properties" not in repaired: + repaired["properties"] = {} + + return repaired + + +def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters.""" + if not tools: + return tools + + sanitized: List[Dict[str, Any]] = [] + any_change = False + for tool in tools: + if not isinstance(tool, dict): + sanitized.append(tool) + continue + fn = tool.get("function") + if not isinstance(fn, dict): + sanitized.append(tool) + continue + params = fn.get("parameters") + repaired = sanitize_moonshot_tool_parameters(params) + if repaired is not params: + any_change = True + new_fn = {**fn, "parameters": repaired} + sanitized.append({**tool, "function": new_fn}) + else: + sanitized.append(tool) + + return sanitized if any_change else tools + + +def is_moonshot_model(model: str | None) -> bool: + """True for any Kimi / Moonshot model slug, regardless of aggregator prefix. + + Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator- + prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``). + Detection by model name covers Nous / OpenRouter / other aggregators that + route to Moonshot's inference, where the base URL is the aggregator's, not + ``api.moonshot.ai``. + """ + if not model: + return False + bare = model.strip().lower() + # Last path segment (covers aggregator-prefixed slugs) + tail = bare.rsplit("/", 1)[-1] + if tail.startswith("kimi-") or tail == "kimi": + return True + # Vendor-prefixed forms commonly used on aggregators + if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"): + return True + return False diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 900f59dcf4..1cccf7e928 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly. import copy from typing import Any, Dict, List, Optional +from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools from agent.prompt_builder import DEVELOPER_ROLE_MODELS from agent.transports.base import ProviderTransport from agent.transports.types import NormalizedResponse, ToolCall, Usage @@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport): # Tools if tools: + # Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting + # tool parameters here keeps aggregator routes (Nous, OpenRouter, + # etc.) compatible, in addition to direct moonshot.ai endpoints. + if is_moonshot_model(model): + tools = sanitize_moonshot_tools(tools) api_kwargs["tools"] = tools # max_tokens resolution — priority: ephemeral > user > provider default diff --git a/tests/agent/test_moonshot_schema.py b/tests/agent/test_moonshot_schema.py new file mode 100644 index 0000000000..da53806587 --- /dev/null +++ b/tests/agent/test_moonshot_schema.py @@ -0,0 +1,254 @@ +"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer. + +Moonshot's tool-parameter validator rejects several shapes that the rest of +the JSON Schema ecosystem accepts: + +1. Properties without ``type`` — Moonshot requires ``type`` on every node. +2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside + ``anyOf`` children. + +These tests cover the repairs applied by ``agent/moonshot_schema.py``. +""" + +from __future__ import annotations + +import pytest + +from agent.moonshot_schema import ( + is_moonshot_model, + sanitize_moonshot_tool_parameters, + sanitize_moonshot_tools, +) + + +class TestMoonshotModelDetection: + """is_moonshot_model() must match across aggregator prefixes.""" + + @pytest.mark.parametrize( + "model", + [ + "kimi-k2.6", + "kimi-k2-thinking", + "moonshotai/Kimi-K2.6", + "moonshotai/kimi-k2.6", + "nous/moonshotai/kimi-k2.6", + "openrouter/moonshotai/kimi-k2-thinking", + "MOONSHOTAI/KIMI-K2.6", + ], + ) + def test_positive_matches(self, model): + assert is_moonshot_model(model) is True + + @pytest.mark.parametrize( + "model", + [ + "", + None, + "anthropic/claude-sonnet-4.6", + "openai/gpt-5.4", + "google/gemini-3-flash-preview", + "deepseek-chat", + ], + ) + def test_negative_matches(self, model): + assert is_moonshot_model(model) is False + + +class TestMissingTypeFilled: + """Rule 1: every property must carry a type.""" + + def test_property_without_type_gets_string(self): + params = { + "type": "object", + "properties": {"query": {"description": "a bare property"}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["query"]["type"] == "string" + + def test_property_with_enum_infers_type_from_first_value(self): + params = { + "type": "object", + "properties": {"flag": {"enum": [True, False]}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["flag"]["type"] == "boolean" + + def test_nested_properties_are_repaired(self): + params = { + "type": "object", + "properties": { + "filter": { + "type": "object", + "properties": { + "field": {"description": "no type"}, + }, + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["filter"]["properties"]["field"]["type"] == "string" + + def test_array_items_without_type_get_repaired(self): + params = { + "type": "object", + "properties": { + "tags": { + "type": "array", + "items": {"description": "tag entry"}, + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["tags"]["items"]["type"] == "string" + + def test_ref_node_is_not_given_synthetic_type(self): + """$ref nodes should NOT get a synthetic type — the referenced + definition supplies it, and Moonshot would reject the conflict.""" + params = { + "type": "object", + "properties": {"payload": {"$ref": "#/$defs/Payload"}}, + "$defs": {"Payload": {"type": "object", "properties": {}}}, + } + out = sanitize_moonshot_tool_parameters(params) + assert "type" not in out["properties"]["payload"] + assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload" + + +class TestAnyOfParentType: + """Rule 2: type must not appear at the anyOf parent level.""" + + def test_parent_type_stripped_when_anyof_present(self): + params = { + "type": "object", + "properties": { + "from_format": { + "type": "string", + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + from_format = out["properties"]["from_format"] + assert "type" not in from_format + assert "anyOf" in from_format + + def test_anyof_children_missing_type_get_filled(self): + params = { + "type": "object", + "properties": { + "value": { + "anyOf": [ + {"type": "string"}, + {"description": "A typeless option"}, + ], + }, + }, + } + out = sanitize_moonshot_tool_parameters(params) + children = out["properties"]["value"]["anyOf"] + assert children[0]["type"] == "string" + assert "type" in children[1] + + +class TestTopLevelGuarantees: + """The returned top-level schema is always a well-formed object.""" + + def test_non_dict_input_returns_empty_object(self): + assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}} + assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}} + assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}} + + def test_non_object_top_level_coerced(self): + params = {"type": "string"} + out = sanitize_moonshot_tool_parameters(params) + assert out["type"] == "object" + assert "properties" in out + + def test_does_not_mutate_input(self): + params = { + "type": "object", + "properties": {"q": {"description": "no type"}}, + } + snapshot = { + "type": params["type"], + "properties": {"q": dict(params["properties"]["q"])}, + } + sanitize_moonshot_tool_parameters(params) + assert params["type"] == snapshot["type"] + assert "type" not in params["properties"]["q"] + + +class TestToolListSanitizer: + """sanitize_moonshot_tools() walks an OpenAI-format tool list.""" + + def test_applies_per_tool(self): + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": { + "type": "object", + "properties": {"q": {"description": "query"}}, + }, + }, + }, + { + "type": "function", + "function": { + "name": "noop", + "description": "Does nothing", + "parameters": {"type": "object", "properties": {}}, + }, + }, + ] + out = sanitize_moonshot_tools(tools) + assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string" + # Second tool already clean — should be structurally equivalent + assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}} + + def test_empty_list_is_passthrough(self): + assert sanitize_moonshot_tools([]) == [] + assert sanitize_moonshot_tools(None) is None + + def test_skips_malformed_entries(self): + """Entries without a function dict are passed through untouched.""" + tools = [{"type": "function"}, {"not": "a tool"}] + out = sanitize_moonshot_tools(tools) + assert out == tools + + +class TestRealWorldMCPShape: + """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot.""" + + def test_combined_rewrites(self): + # Shape: missing type on a property, anyOf with parent type, array + # items without type — all in one tool. + params = { + "type": "object", + "properties": { + "query": {"description": "search text"}, + "filter": { + "type": "string", + "anyOf": [ + {"type": "string"}, + {"type": "null"}, + ], + }, + "tags": { + "type": "array", + "items": {"description": "tag"}, + }, + }, + "required": ["query"], + } + out = sanitize_moonshot_tool_parameters(params) + assert out["properties"]["query"]["type"] == "string" + assert "type" not in out["properties"]["filter"] + assert out["properties"]["filter"]["anyOf"][0]["type"] == "string" + assert out["properties"]["tags"]["items"]["type"] == "string" + assert out["required"] == ["query"] diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index b44eafd453..cb8e17c6af 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -238,6 +238,56 @@ class TestChatCompletionsKimi: ) assert kw["extra_body"]["thinking"] == {"type": "disabled"} + def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport): + """Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL.""" + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": { + "type": "object", + "properties": { + "q": {"description": "query"}, # missing type + }, + }, + }, + }, + ] + kw = transport.build_kwargs( + model="moonshotai/kimi-k2.6", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + max_tokens_param_fn=lambda n: {"max_tokens": n}, + ) + assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string" + + def test_non_moonshot_tools_are_not_mutated(self, transport): + """Other models don't go through the Moonshot sanitizer.""" + original_params = { + "type": "object", + "properties": {"q": {"description": "query"}}, # missing type + } + tools = [ + { + "type": "function", + "function": { + "name": "search", + "description": "Search", + "parameters": original_params, + }, + }, + ] + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=[{"role": "user", "content": "Hi"}], + tools=tools, + max_tokens_param_fn=lambda n: {"max_tokens": n}, + ) + # The parameters dict is passed through untouched (no synthetic type) + assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"] + class TestChatCompletionsValidate: diff --git a/tests/tools/test_mcp_tool.py b/tests/tools/test_mcp_tool.py index c70d1a5335..3762eb6169 100644 --- a/tests/tools/test_mcp_tool.py +++ b/tests/tools/test_mcp_tool.py @@ -186,6 +186,111 @@ class TestSchemaConversion: assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry" assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child" + def test_missing_type_on_object_is_coerced(self): + """Schemas that describe an object but omit ``type`` get type='object'.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "properties": {"q": {"type": "string"}}, + "required": ["q"], + }) + + assert schema["type"] == "object" + assert schema["properties"]["q"]["type"] == "string" + assert schema["required"] == ["q"] + + def test_null_type_on_object_is_coerced(self): + """type: None should be treated like missing type (common MCP server bug).""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": None, + "properties": {"x": {"type": "integer"}}, + }) + + assert schema["type"] == "object" + + def test_required_pruned_when_property_missing(self): + """Gemini 400s on required names that don't exist in properties.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": {"a": {"type": "string"}}, + "required": ["a", "ghost", "phantom"], + }) + + assert schema["required"] == ["a"] + + def test_required_removed_when_all_names_dangle(self): + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": {}, + "required": ["ghost"], + }) + + assert "required" not in schema + + def test_required_pruning_applies_recursively_inside_nested_objects(self): + """Nested object schemas also get required pruning.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": { + "filter": { + "type": "object", + "properties": {"field": {"type": "string"}}, + "required": ["field", "missing"], + }, + }, + }) + + assert schema["properties"]["filter"]["required"] == ["field"] + + def test_object_in_array_items_gets_properties_filled(self): + """Array-item object schemas without properties get an empty dict.""" + from tools.mcp_tool import _normalize_mcp_input_schema + + schema = _normalize_mcp_input_schema({ + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"type": "object"}, + }, + }, + }) + + assert schema["properties"]["items"]["items"]["properties"] == {} + + def test_convert_mcp_schema_survives_missing_inputschema_attribute(self): + """A Tool object without .inputSchema must not crash registration.""" + import types + + from tools.mcp_tool import _convert_mcp_schema + + bare_tool = types.SimpleNamespace(name="probe", description="Probe") + schema = _convert_mcp_schema("srv", bare_tool) + + assert schema["name"] == "mcp_srv_probe" + assert schema["parameters"] == {"type": "object", "properties": {}} + + def test_convert_mcp_schema_with_none_inputschema(self): + """Tool with inputSchema=None produces a valid empty object schema.""" + import types + + from tools.mcp_tool import _convert_mcp_schema + + # Note: _make_mcp_tool(input_schema=None) falls back to a default — + # build the namespace directly so .inputSchema really is None. + mcp_tool = types.SimpleNamespace(name="probe", description="Probe", inputSchema=None) + schema = _convert_mcp_schema("srv", mcp_tool) + + assert schema["parameters"] == {"type": "object", "properties": {}} + def test_tool_name_prefix_format(self): from tools.mcp_tool import _convert_mcp_schema diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 58bd6cd112..3ed612eda1 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -2026,6 +2026,19 @@ def _normalize_mcp_input_schema(schema: dict | None) -> dict: requires local refs to point into ``#/$defs/...`` instead. Normalize the common draft-07 shape here so MCP tool schemas remain portable across OpenAI-compatible providers. + + Additional MCP-server robustness repairs applied recursively: + + * Missing or ``null`` ``type`` on an object-shaped node is coerced to + ``"object"`` (some servers omit it). See PR #4897. + * When an ``object`` node lacks ``properties``, an empty ``properties`` + dict is added so ``required`` entries don't dangle. + * ``required`` arrays are pruned to only names that exist in + ``properties``; otherwise Google AI Studio / Gemini 400s with + ``property is not defined``. See PR #4651. + + All repairs are provider-agnostic and ideally produce a schema valid on + OpenAI, Anthropic, Gemini, and Moonshot in one pass. """ if not schema: return {"type": "object", "properties": {}} @@ -2044,10 +2057,52 @@ def _normalize_mcp_input_schema(schema: dict | None) -> dict: return [_rewrite_local_refs(item) for item in node] return node - normalized = _rewrite_local_refs(schema) + def _repair_object_shape(node): + """Recursively repair object-shaped nodes: fill type, prune required.""" + if isinstance(node, list): + return [_repair_object_shape(item) for item in node] + if not isinstance(node, dict): + return node + repaired = {k: _repair_object_shape(v) for k, v in node.items()} + + # Coerce missing / null type when the shape is clearly an object + # (has properties or required but no type). + if not repaired.get("type") and ( + "properties" in repaired or "required" in repaired + ): + repaired["type"] = "object" + + if repaired.get("type") == "object": + # Ensure properties exists so required can reference it safely + if "properties" not in repaired or not isinstance( + repaired.get("properties"), dict + ): + repaired["properties"] = {} if "properties" not in repaired else repaired["properties"] + if not isinstance(repaired.get("properties"), dict): + repaired["properties"] = {} + + # Prune required to only include names that exist in properties + required = repaired.get("required") + if isinstance(required, list): + props = repaired.get("properties") or {} + valid = [r for r in required if isinstance(r, str) and r in props] + if len(valid) != len(required): + if valid: + repaired["required"] = valid + else: + repaired.pop("required", None) + + return repaired + + normalized = _rewrite_local_refs(schema) + normalized = _repair_object_shape(normalized) + + # Ensure top-level is a well-formed object schema + if not isinstance(normalized, dict): + return {"type": "object", "properties": {}} if normalized.get("type") == "object" and "properties" not in normalized: - return {**normalized, "properties": {}} + normalized = {**normalized, "properties": {}} return normalized @@ -2080,7 +2135,7 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict: return { "name": prefixed_name, "description": mcp_tool.description or f"MCP tool {mcp_tool.name} from {server_name}", - "parameters": _normalize_mcp_input_schema(mcp_tool.inputSchema), + "parameters": _normalize_mcp_input_schema(getattr(mcp_tool, "inputSchema", None)), } From 50d97edbe15e3a4fd72ddbe00a3afb85e9bbafc9 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 16:14:55 -0700 Subject: [PATCH 004/220] feat(delegation): bump default child_timeout_seconds to 600s (#14809) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 300s default was too tight for high-reasoning models on non-trivial delegated tasks — e.g. gpt-5.5 xhigh reviewing 12 files would burn >5min on reasoning tokens before issuing its first tool call, tripping the hard wall-clock timeout with 0 api_calls logged. - tools/delegate_tool.py: DEFAULT_CHILD_TIMEOUT 300 -> 600 - hermes_cli/config.py: surface delegation.child_timeout_seconds in DEFAULT_CONFIG so it's discoverable (previously the key was read by _get_child_timeout() but absent from the default config schema) Users can still override via config.yaml delegation.child_timeout_seconds or DELEGATION_CHILD_TIMEOUT_SECONDS env var (floor 30s, no ceiling). --- hermes_cli/config.py | 4 ++++ tools/delegate_tool.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index cfcc7ff28f..c578ded969 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -739,6 +739,10 @@ DEFAULT_CONFIG = { "inherit_mcp_toolsets": True, "max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget, # independent of the parent's max_iterations) + "child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s, + # no ceiling). High-reasoning models on large tasks + # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets; + # raise if children time out before producing output. "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 2c35c7c7e7..e779e6f609 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -298,7 +298,7 @@ def _get_child_timeout() -> float: """Read delegation.child_timeout_seconds from config. Returns the number of seconds a single child agent is allowed to run - before being considered stuck. Default: 300 s (5 minutes). + before being considered stuck. Default: 600 s (10 minutes). """ cfg = _load_config() val = cfg.get("child_timeout_seconds") @@ -409,7 +409,7 @@ def _preserve_parent_mcp_toolsets( DEFAULT_MAX_ITERATIONS = 50 -DEFAULT_CHILD_TIMEOUT = 300 # seconds before a child agent is considered stuck +DEFAULT_CHILD_TIMEOUT = 600 # seconds before a child agent is considered stuck _HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation _HEARTBEAT_STALE_CYCLES = ( 5 # mark child stale after this many heartbeats with no iteration progress From 3504bd401b8d95abb47e7ea705b373553dcc2a9b Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:19:50 +0300 Subject: [PATCH 005/220] fix(tui): route Ctrl+B to voice toggle, not composer input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user runs /voice and then presses Ctrl+B in the TUI, three handlers collaborate to consume the chord and none of them dispatch voice.record: - isAction() is platform-aware — on macOS it requires Cmd (meta/super), so Ctrl+B fails the match in useInputHandlers and never triggers voiceStart/voiceStop. - TextInput's Ctrl+B pass-through list doesn't include 'b', so the keystroke falls through to the wordMod backward-word branch on Linux and to the printable-char insertion branch on macOS — the latter is exactly what timmie reported ("enters a b into the tui"). - /voice emits "voice: on" with no hint, so the user has no way to know Ctrl+B is the recording toggle. Introduces isVoiceToggleKey(key, ch) in lib/platform.ts that matches raw Ctrl+B on every platform (mirrors tips.py and config.yaml's voice.record_key default) and additionally accepts Cmd+B on macOS so existing muscle memory keeps working. Wires it into useInputHandlers, adds Ctrl+B to TextInput's pass-through list so the global handler actually receives the chord, and appends "press Ctrl+B to record" to the /voice on message. Empirically verified with hermes --tui: Ctrl+B no longer leaks 'b' into the composer and now dispatches the voice.record RPC (the downstream ImportError for hermes_cli.voice is a separate upstream bug — follow-up patch). --- ui-tui/src/__tests__/platform.test.ts | 30 ++++++++++++++++++++++++ ui-tui/src/app/slash/commands/session.ts | 2 +- ui-tui/src/app/useInputHandlers.ts | 4 ++-- ui-tui/src/components/textInput.tsx | 14 ++++++++++- ui-tui/src/lib/platform.ts | 14 +++++++++++ 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts index dbb6f0fe6a..8995b9c6fc 100644 --- a/ui-tui/src/__tests__/platform.test.ts +++ b/ui-tui/src/__tests__/platform.test.ts @@ -31,6 +31,36 @@ describe('platform action modifier', () => { }) }) +describe('isVoiceToggleKey', () => { + it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'B')).toBe(true) + }) + + it('matches Cmd+B on macOS (preserve platform muscle memory)', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: false, meta: true, super: false }, 'b')).toBe(true) + expect(isVoiceToggleKey({ ctrl: false, meta: false, super: true }, 'b')).toBe(true) + }) + + it('matches Ctrl+B on non-macOS platforms', async () => { + const { isVoiceToggleKey } = await importPlatform('linux') + + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true) + }) + + it('does not match unmodified b or other Ctrl combos', async () => { + const { isVoiceToggleKey } = await importPlatform('darwin') + + expect(isVoiceToggleKey({ ctrl: false, meta: false, super: false }, 'b')).toBe(false) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'a')).toBe(false) + expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'c')).toBe(false) + }) +}) + describe('isMacActionFallback', () => { it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => { const { isMacActionFallback } = await importPlatform('darwin') diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts index 5f17667f03..90a1beb3f0 100644 --- a/ui-tui/src/app/slash/commands/session.ts +++ b/ui-tui/src/app/slash/commands/session.ts @@ -192,7 +192,7 @@ export const sessionCommands: SlashCommand[] = [ ctx.gateway.rpc('voice.toggle', { action }).then( ctx.guarded(r => { ctx.voice.setVoiceEnabled(!!r.enabled) - ctx.transcript.sys(`voice: ${r.enabled ? 'on' : 'off'}`) + ctx.transcript.sys(`voice: ${r.enabled ? 'on — press Ctrl+B to record' : 'off'}`) }) ) } diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index 72cd5b9e5a..cfc3eed7c8 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -8,7 +8,7 @@ import type { SudoRespondResponse, VoiceRecordResponse } from '../gatewayTypes.js' -import { isAction, isMac } from '../lib/platform.js' +import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js' import { getInputSelection } from './inputSelectionStore.js' import type { InputHandlerContext, InputHandlerResult } from './interfaces.js' @@ -370,7 +370,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { return actions.newSession() } - if (isAction(key, ch, 'b')) { + if (isVoiceToggleKey(key, ch)) { return voice.recording ? voiceStop() : voiceStart() } diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx index e91143c00b..394c3c67af 100644 --- a/ui-tui/src/components/textInput.tsx +++ b/ui-tui/src/components/textInput.tsx @@ -623,7 +623,19 @@ export function TextInput({ return } - if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) { + // Ctrl+B is the documented voice-recording toggle (see platform.ts → + // isVoiceToggleKey). Pass it through so the app-level handler in + // useInputHandlers receives it instead of being swallowed here as + // either backward-word nav (line below) or a literal 'b' insertion. + if ( + (k.ctrl && inp === 'c') || + (k.ctrl && inp === 'b') || + k.tab || + (k.shift && k.tab) || + k.pageUp || + k.pageDown || + k.escape + ) { return } diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts index ab694baaf7..9e85da16f8 100644 --- a/ui-tui/src/lib/platform.ts +++ b/ui-tui/src/lib/platform.ts @@ -33,3 +33,17 @@ export const isMacActionFallback = ( /** Match action-modifier + a single character (case-insensitive). */ export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean => isActionMod(key) && ch.toLowerCase() === target + +/** + * Voice recording toggle key (Ctrl+B). + * + * Documented as "Ctrl+B" everywhere: tips.py, config.yaml's voice.record_key + * default, and the Python CLI prompt_toolkit handler. We accept raw Ctrl+B on + * every platform so the TUI matches those docs. On macOS we additionally + * accept Cmd+B (the platform action modifier) so existing macOS muscle memory + * keeps working. + */ +export const isVoiceToggleKey = ( + key: { ctrl: boolean; meta: boolean; super?: boolean }, + ch: string +): boolean => (key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b' From 0bb460b07011a6753bec6a8ebf824e8940b9bc00 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:21:59 +0300 Subject: [PATCH 006/220] fix(tui): add missing hermes_cli.voice wrapper for gateway RPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tui_gateway/server.py:3486/3491/3509 imports start_recording, stop_and_transcribe, and speak_text from hermes_cli.voice, but the module never existed (not in git history — never shipped, never deleted). Every voice.record / voice.tts RPC call hit the ImportError branch and the TUI surfaced it as "voice module not available — install audio dependencies" even on boxes with sounddevice / faster-whisper / numpy installed. Adds a thin wrapper on top of tools.voice_mode (recording + transcription) and tools.tts_tool (text-to-speech): - start_recording() — idempotent; stores the active AudioRecorder in a module-global guarded by a Lock so repeat Ctrl+B presses don't fight over the mic. - stop_and_transcribe() — returns None for no-op / no-speech / Whisper-hallucination cases so the TUI's existing "no speech detected" path keeps working unchanged. - speak_text(text) — lazily imports tts_tool (optional provider SDKs stay unloaded until the first /voice tts call), parses the tool's JSON result, and plays the audio via play_audio_file. Paired with the Ctrl+B keybinding fix in the prior commit, the TUI voice pipeline now works end-to-end for the first time. --- hermes_cli/voice.py | 120 +++++++++++++++++++++++++ tests/hermes_cli/test_voice_wrapper.py | 53 +++++++++++ 2 files changed, 173 insertions(+) create mode 100644 hermes_cli/voice.py create mode 100644 tests/hermes_cli/test_voice_wrapper.py diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py new file mode 100644 index 0000000000..71f1805633 --- /dev/null +++ b/hermes_cli/voice.py @@ -0,0 +1,120 @@ +"""Process-wide voice recording + TTS API for the TUI gateway. + +Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool`` +(text-to-speech) behind idempotent, stateful entry points that the gateway's +``voice.record`` and ``voice.tts`` JSON-RPC handlers can call from a +dedicated thread. The gateway imports this module lazily so missing optional +audio deps (sounddevice, faster-whisper, numpy) surface as an ``ImportError`` +at call time, not at startup. +""" + +from __future__ import annotations + +import json +import logging +import threading +from typing import Optional + +from tools.voice_mode import ( + create_audio_recorder, + is_whisper_hallucination, + play_audio_file, + transcribe_recording, +) + +logger = logging.getLogger(__name__) + +_recorder = None +_recorder_lock = threading.Lock() + + +def start_recording() -> None: + """Begin capturing from the default input device. + + Idempotent — calling again while a recording is in progress is a no-op, + which matches the TUI's toggle semantics (Ctrl+B starts, Ctrl+B stops). + """ + global _recorder + + with _recorder_lock: + if _recorder is not None and getattr(_recorder, "is_recording", False): + return + rec = create_audio_recorder() + # No silence callback: the TUI drives start/stop explicitly via + # the voice.record RPC. VAD auto-stop is a CLI-mode feature. + rec.start() + _recorder = rec + + +def stop_and_transcribe() -> Optional[str]: + """Stop the active recording, transcribe it, and return the text. + + Returns ``None`` when no recording is active, when the microphone + captured no speech, or when Whisper returned a known hallucination + token (silence artefacts like "Thanks for watching!"). The caller + treats ``None`` as "no speech detected" and leaves the composer + untouched. + """ + global _recorder + + with _recorder_lock: + rec = _recorder + _recorder = None + + if rec is None: + return None + + wav_path = rec.stop() + if not wav_path: + return None + + try: + result = transcribe_recording(wav_path) + except Exception as e: + logger.warning("voice transcription failed: %s", e) + return None + + text = (result.get("text") or "").strip() + if not text or is_whisper_hallucination(text): + return None + + return text + + +def speak_text(text: str) -> None: + """Synthesize ``text`` with the configured TTS provider and play it. + + The gateway spawns a daemon thread to call this so the RPC returns + immediately. Failures are logged and swallowed — the UI already + acknowledged "speaking" by the time we get here. + """ + if not text or not text.strip(): + return + + # Lazy import — tts_tool pulls optional provider SDKs (OpenAI, + # ElevenLabs, etc.) and config-reading machinery that we don't + # want to load at module import time. + from tools.tts_tool import text_to_speech_tool + + try: + raw = text_to_speech_tool(text) + except Exception as e: + logger.warning("TTS synthesis failed: %s", e) + return + + try: + result = json.loads(raw) if isinstance(raw, str) else raw + except json.JSONDecodeError: + logger.warning("TTS returned non-JSON result") + return + + if not isinstance(result, dict): + return + + file_path = result.get("file_path") + if not file_path: + err = result.get("error") or "no file_path in TTS result" + logger.warning("TTS succeeded but produced no audio: %s", err) + return + + play_audio_file(file_path) diff --git a/tests/hermes_cli/test_voice_wrapper.py b/tests/hermes_cli/test_voice_wrapper.py new file mode 100644 index 0000000000..f711ec356f --- /dev/null +++ b/tests/hermes_cli/test_voice_wrapper.py @@ -0,0 +1,53 @@ +"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper. + +The module is imported *lazily* by ``tui_gateway/server.py`` so that a +box with missing audio deps fails at call time (returning a clean RPC +error) rather than at gateway startup. These tests therefore only +assert the public contract the gateway depends on: the three symbols +exist, ``stop_and_transcribe`` is a no-op when nothing is recording, +and ``speak_text`` tolerates empty input without touching the provider +stack. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +class TestPublicAPI: + def test_gateway_symbols_importable(self): + """Match the exact import shape tui_gateway/server.py uses.""" + from hermes_cli.voice import ( + speak_text, + start_recording, + stop_and_transcribe, + ) + + assert callable(start_recording) + assert callable(stop_and_transcribe) + assert callable(speak_text) + + +class TestStopWithoutStart: + def test_returns_none_when_no_recording_active(self, monkeypatch): + """Idempotent no-op: stop before start must not raise or touch state.""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_recorder", None) + + assert voice.stop_and_transcribe() is None + + +class TestSpeakTextGuards: + @pytest.mark.parametrize("text", ["", " ", "\n\t "]) + def test_empty_text_is_noop(self, text): + """Empty / whitespace-only text must return without importing tts_tool + (the gateway spawns a thread per call, so a no-op on empty input + keeps the thread pool from churning on trivial inputs).""" + from hermes_cli.voice import speak_text + + # Should simply return None without raising. + assert speak_text(text) is None From 04c489b5873dae86caa4c99757e004c767e1303f Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:55:17 +0300 Subject: [PATCH 007/220] feat(tui): match CLI's voice slash + VAD-continuous recording model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TUI had drifted from the CLI's voice model in two ways: - /voice on was lighting up the microphone immediately and Ctrl+B was interpreted as a mode toggle. The CLI separates the two: /voice on just flips the umbrella bit, recording only starts once the user presses Ctrl+B, which also sets _voice_continuous so the VAD loop auto-restarts until the user presses Ctrl+B again or three silent cycles pass. - /voice tts was missing entirely, so users couldn't turn agent reply speech on/off from inside the TUI. This commit brings the TUI to parity. Python - hermes_cli/voice.py: continuous-mode API (start_continuous, stop_continuous, is_continuous_active) layered on the existing PTT wrappers. The silence callback transcribes, fires on_transcript, tracks consecutive no-speech cycles, and auto-restarts — mirroring cli.py:_voice_stop_and_transcribe + _restart_recording. - tui_gateway/server.py: - voice.toggle now supports on / off / tts / status. The umbrella bit lives in HERMES_VOICE + display.voice_enabled; tts lives in HERMES_VOICE_TTS + display.voice_tts. /voice off also tears down any active continuous loop so a toggle-off really releases the microphone. - voice.record start/stop now drives start_continuous/stop_continuous. start is refused with a clear error when the mode is off, matching cli.py:handle_voice_record's early return on `not _voice_mode`. - New voice.transcript / voice.status events emit through _voice_emit (remembers the sid that last enabled the mode so events land in the right session). TypeScript - gatewayTypes.ts: voice.status + voice.transcript event discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse gains status for the new "started/stopped" responses. - interfaces.ts: GatewayEventHandlerContext gains composer.setInput + submission.submitRef + voice.{setRecording, setProcessing, setVoiceEnabled}; InputHandlerContext.voice gains enabled + setVoiceEnabled for the mode-aware Ctrl+B handler. - createGatewayEventHandler.ts: voice.status drives REC/STT badges; voice.transcript auto-submits when the composer is empty (CLI _pending_input.put parity) and appends when a draft is in flight. no_speech_limit flips voice off + sys line. - useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop), not voice.toggle, and nudges the user with a sys line when the mode is off instead of silently flipping it on. - useMainApp.ts: wires the new event-handler context fields. - slash/commands/session.ts: /voice handles on / off / tts / status with CLI-matching output ("voice: mode on · tts off"). Backward compat preserved for voice.record (was always PTT shape; gateway still honours start/stop with mode-gating added). --- hermes_cli/voice.py | 360 +++++++++++++++++- tests/hermes_cli/test_voice_wrapper.py | 202 ++++++++++ tui_gateway/server.py | 147 ++++++- .../createGatewayEventHandler.test.ts | 11 +- ui-tui/src/app/createGatewayEventHandler.ts | 57 +++ ui-tui/src/app/interfaces.ts | 13 + ui-tui/src/app/slash/commands/session.ts | 55 ++- ui-tui/src/app/useInputHandlers.ts | 64 ++-- ui-tui/src/app/useMainApp.ts | 22 +- ui-tui/src/gatewayTypes.ts | 8 + 10 files changed, 861 insertions(+), 78 deletions(-) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 71f1805633..70e097e77c 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -2,18 +2,31 @@ Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool`` (text-to-speech) behind idempotent, stateful entry points that the gateway's -``voice.record`` and ``voice.tts`` JSON-RPC handlers can call from a -dedicated thread. The gateway imports this module lazily so missing optional -audio deps (sounddevice, faster-whisper, numpy) surface as an ``ImportError`` -at call time, not at startup. +``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can +call from a dedicated thread. The gateway imports this module lazily so that +missing optional audio deps (sounddevice, faster-whisper, numpy) surface as +an ``ImportError`` at call time, not at startup. + +Two usage modes are exposed: + +* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single + manually-bounded capture used when the caller drives the start/stop pair + explicitly. +* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors + the classic CLI voice mode: recording auto-stops on silence, transcribes, + hands the result to a callback, and then auto-restarts for the next turn. + Three consecutive no-speech cycles stop the loop and fire + ``on_silent_limit`` so the UI can turn the mode off. """ from __future__ import annotations import json import logging +import os +import sys import threading -from typing import Optional +from typing import Any, Callable, Optional from tools.voice_mode import ( create_audio_recorder, @@ -24,15 +37,71 @@ from tools.voice_mode import ( logger = logging.getLogger(__name__) + +def _debug(msg: str) -> None: + """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1. + + Goes to stderr so the TUI gateway wraps it as a gateway.stderr event, + which createGatewayEventHandler shows as an Activity line — exactly + what we need to diagnose "why didn't the loop auto-restart?" in the + user's real terminal without shipping a separate debug RPC. + """ + if os.environ.get("HERMES_VOICE_DEBUG", "").strip() == "1": + print(f"[voice] {msg}", file=sys.stderr, flush=True) + + +def _beeps_enabled() -> bool: + """CLI parity: voice.beep_enabled in config.yaml (default True).""" + try: + from hermes_cli.config import load_config + + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + + +def _play_beep(frequency: int, count: int = 1) -> None: + """Audible cue matching cli.py's record/stop beeps. + + 880 Hz single-beep on start (cli.py:_voice_start_recording line 7532), + 660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585). + Best-effort — sounddevice failures are silently swallowed so the + voice loop never breaks because a speaker was unavailable. + """ + if not _beeps_enabled(): + return + try: + from tools.voice_mode import play_beep + + play_beep(frequency=frequency, count=count) + except Exception as e: + _debug(f"beep {frequency}Hz failed: {e}") + +# ── Push-to-talk state ─────────────────────────────────────────────── _recorder = None _recorder_lock = threading.Lock() +# ── Continuous (VAD) state ─────────────────────────────────────────── +_continuous_lock = threading.Lock() +_continuous_active = False +_continuous_recorder: Any = None +_continuous_on_transcript: Optional[Callable[[str], None]] = None +_continuous_on_status: Optional[Callable[[str], None]] = None +_continuous_on_silent_limit: Optional[Callable[[], None]] = None +_continuous_no_speech_count = 0 +_CONTINUOUS_NO_SPEECH_LIMIT = 3 + + +# ── Push-to-talk API ───────────────────────────────────────────────── + def start_recording() -> None: - """Begin capturing from the default input device. + """Begin capturing from the default input device (push-to-talk). - Idempotent — calling again while a recording is in progress is a no-op, - which matches the TUI's toggle semantics (Ctrl+B starts, Ctrl+B stops). + Idempotent — calling again while a recording is in progress is a no-op. """ global _recorder @@ -40,20 +109,15 @@ def start_recording() -> None: if _recorder is not None and getattr(_recorder, "is_recording", False): return rec = create_audio_recorder() - # No silence callback: the TUI drives start/stop explicitly via - # the voice.record RPC. VAD auto-stop is a CLI-mode feature. rec.start() _recorder = rec def stop_and_transcribe() -> Optional[str]: - """Stop the active recording, transcribe it, and return the text. + """Stop the active push-to-talk recording, transcribe, return text. Returns ``None`` when no recording is active, when the microphone - captured no speech, or when Whisper returned a known hallucination - token (silence artefacts like "Thanks for watching!"). The caller - treats ``None`` as "no speech detected" and leaves the composer - untouched. + captured no speech, or when Whisper returned a known hallucination. """ global _recorder @@ -73,27 +137,281 @@ def stop_and_transcribe() -> Optional[str]: except Exception as e: logger.warning("voice transcription failed: %s", e) return None + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass - text = (result.get("text") or "").strip() + # transcribe_recording returns {"success": bool, "transcript": str, ...} + # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript"). + if not result.get("success"): + return None + text = (result.get("transcript") or "").strip() if not text or is_whisper_hallucination(text): return None return text +# ── Continuous (VAD) API ───────────────────────────────────────────── + + +def start_continuous( + on_transcript: Callable[[str], None], + on_status: Optional[Callable[[str], None]] = None, + on_silent_limit: Optional[Callable[[], None]] = None, + silence_threshold: int = 200, + silence_duration: float = 3.0, +) -> None: + """Start a VAD-driven continuous recording loop. + + The loop calls ``on_transcript(text)`` each time speech is detected and + transcribed successfully, then auto-restarts. After + ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech + picked up at all) the loop stops itself and calls ``on_silent_limit`` + so the UI can reflect "voice off". Idempotent — calling while already + active is a no-op. + + ``on_status`` is called with ``"listening"`` / ``"transcribing"`` / + ``"idle"`` so the UI can show a live indicator. + """ + global _continuous_active, _continuous_recorder + global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit + global _continuous_no_speech_count + + with _continuous_lock: + if _continuous_active: + _debug("start_continuous: already active — no-op") + return + _continuous_active = True + _continuous_on_transcript = on_transcript + _continuous_on_status = on_status + _continuous_on_silent_limit = on_silent_limit + _continuous_no_speech_count = 0 + + if _continuous_recorder is None: + _continuous_recorder = create_audio_recorder() + + _continuous_recorder._silence_threshold = silence_threshold + _continuous_recorder._silence_duration = silence_duration + rec = _continuous_recorder + + _debug( + f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)" + ) + + # CLI parity: single 880 Hz beep *before* opening the stream — placing + # the beep after stream.start() on macOS triggers a CoreAudio conflict + # (cli.py:7528 comment). + _play_beep(frequency=880, count=1) + + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to start continuous recording: %s", e) + _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + raise + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +def stop_continuous() -> None: + """Stop the active continuous loop and release the microphone. + + Idempotent — calling while not active is a no-op. Any in-flight + transcription completes but its result is discarded (the callback + checks ``_continuous_active`` before firing). + """ + global _continuous_active, _continuous_on_transcript + global _continuous_on_status, _continuous_on_silent_limit + global _continuous_recorder, _continuous_no_speech_count + + with _continuous_lock: + if not _continuous_active: + return + _continuous_active = False + rec = _continuous_recorder + on_status = _continuous_on_status + _continuous_on_transcript = None + _continuous_on_status = None + _continuous_on_silent_limit = None + _continuous_no_speech_count = 0 + + if rec is not None: + try: + # cancel() (not stop()) discards buffered frames — the loop + # is over, we don't want to transcribe a half-captured turn. + rec.cancel() + except Exception as e: + logger.warning("failed to cancel recorder: %s", e) + + # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the + # silence-auto-stop path plays). + _play_beep(frequency=660, count=2) + + if on_status: + try: + on_status("idle") + except Exception: + pass + + +def is_continuous_active() -> bool: + """Whether a continuous voice loop is currently running.""" + with _continuous_lock: + return _continuous_active + + +def _continuous_on_silence() -> None: + """AudioRecorder silence callback — runs in a daemon thread. + + Stops the current capture, transcribes, delivers the text via + ``on_transcript``, and — if the loop is still active — starts the + next capture. Three consecutive silent cycles end the loop. + """ + global _continuous_active, _continuous_no_speech_count + + _debug("_continuous_on_silence: fired") + + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: loop inactive — abort") + return + rec = _continuous_recorder + on_transcript = _continuous_on_transcript + on_status = _continuous_on_status + on_silent_limit = _continuous_on_silent_limit + + if rec is None: + _debug("_continuous_on_silence: no recorder — abort") + return + + if on_status: + try: + on_status("transcribing") + except Exception: + pass + + wav_path = rec.stop() + # Peak RMS is the critical diagnostic when stop() returns None despite + # the VAD firing — tells us at a glance whether the mic was too quiet + # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree. + peak_rms = getattr(rec, "_peak_rms", -1) + _debug( + f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})" + ) + + # CLI parity: double 660 Hz beep after the stream stops (safe from the + # CoreAudio conflict that blocks pre-start beeps). + _play_beep(frequency=660, count=2) + + transcript: Optional[str] = None + + if wav_path: + try: + result = transcribe_recording(wav_path) + # transcribe_recording returns {"success": bool, "transcript": str, + # "error": str?} — NOT {"text": str}. Using the wrong key silently + # produced empty transcripts even when Groq/local STT returned fine, + # which masqueraded as "not hearing the user" to the caller. + success = bool(result.get("success")) + text = (result.get("transcript") or "").strip() + err = result.get("error") + _debug( + f"_continuous_on_silence: transcribe -> success={success} " + f"text={text!r} err={err!r}" + ) + if success and text and not is_whisper_hallucination(text): + transcript = text + except Exception as e: + logger.warning("continuous transcription failed: %s", e) + _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}") + finally: + try: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception: + pass + + with _continuous_lock: + if not _continuous_active: + # User stopped us while we were transcribing — discard. + _debug("_continuous_on_silence: stopped during transcribe — no restart") + return + if transcript: + _continuous_no_speech_count = 0 + else: + _continuous_no_speech_count += 1 + should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT + no_speech = _continuous_no_speech_count + + if transcript and on_transcript: + try: + on_transcript(transcript) + except Exception as e: + logger.warning("on_transcript callback raised: %s", e) + + if should_halt: + _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting") + with _continuous_lock: + _continuous_active = False + _continuous_no_speech_count = 0 + if on_silent_limit: + try: + on_silent_limit() + except Exception: + pass + try: + rec.cancel() + except Exception: + pass + if on_status: + try: + on_status("idle") + except Exception: + pass + return + + # Restart for the next turn. + _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") + _play_beep(frequency=880, count=1) + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to restart continuous recording: %s", e) + _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + return + + if on_status: + try: + on_status("listening") + except Exception: + pass + + +# ── TTS API ────────────────────────────────────────────────────────── + + def speak_text(text: str) -> None: """Synthesize ``text`` with the configured TTS provider and play it. The gateway spawns a daemon thread to call this so the RPC returns - immediately. Failures are logged and swallowed — the UI already - acknowledged "speaking" by the time we get here. + immediately. Failures are logged and swallowed. """ if not text or not text.strip(): return - # Lazy import — tts_tool pulls optional provider SDKs (OpenAI, - # ElevenLabs, etc.) and config-reading machinery that we don't - # want to load at module import time. + # Lazy import — tts_tool pulls optional provider SDKs. from tools.tts_tool import text_to_speech_tool try: diff --git a/tests/hermes_cli/test_voice_wrapper.py b/tests/hermes_cli/test_voice_wrapper.py index f711ec356f..a372c1194f 100644 --- a/tests/hermes_cli/test_voice_wrapper.py +++ b/tests/hermes_cli/test_voice_wrapper.py @@ -51,3 +51,205 @@ class TestSpeakTextGuards: # Should simply return None without raising. assert speak_text(text) is None + + +class TestContinuousAPI: + """Continuous (VAD) mode API — CLI-parity loop entry points.""" + + def test_continuous_exports(self): + from hermes_cli.voice import ( + is_continuous_active, + start_continuous, + stop_continuous, + ) + + assert callable(start_continuous) + assert callable(stop_continuous) + assert callable(is_continuous_active) + + def test_not_active_by_default(self, monkeypatch): + import hermes_cli.voice as voice + + # Isolate from any state left behind by other tests in the session. + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + + assert voice.is_continuous_active() is False + + def test_stop_continuous_idempotent_when_inactive(self, monkeypatch): + """stop_continuous must not raise when no loop is active — the + gateway's voice.toggle off path calls it unconditionally.""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + + # Should return cleanly without exceptions + assert voice.stop_continuous() is None + assert voice.is_continuous_active() is False + + def test_double_start_is_idempotent(self, monkeypatch): + """A second start_continuous while already active is a no-op — prevents + two overlapping capture threads fighting over the microphone when the + UI double-fires (e.g. both /voice on and Ctrl+B within the same tick).""" + import hermes_cli.voice as voice + + monkeypatch.setattr(voice, "_continuous_active", True) + called = {"n": 0} + + class FakeRecorder: + def start(self, on_silence_stop=None): + called["n"] += 1 + + def cancel(self): + pass + + monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder()) + + voice.start_continuous(on_transcript=lambda _t: None) + + # The guard inside start_continuous short-circuits before rec.start() + assert called["n"] == 0 + + +class TestContinuousLoopSimulation: + """End-to-end simulation of the VAD loop with a fake recorder. + + Proves auto-restart works: the silence callback must trigger transcribe → + on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers + the 3-strikes no-speech halt. + """ + + @pytest.fixture + def fake_recorder(self, monkeypatch): + import hermes_cli.voice as voice + + # Reset module state between tests. + monkeypatch.setattr(voice, "_continuous_active", False) + monkeypatch.setattr(voice, "_continuous_recorder", None) + monkeypatch.setattr(voice, "_continuous_no_speech_count", 0) + monkeypatch.setattr(voice, "_continuous_on_transcript", None) + monkeypatch.setattr(voice, "_continuous_on_status", None) + monkeypatch.setattr(voice, "_continuous_on_silent_limit", None) + + class FakeRecorder: + _silence_threshold = 200 + _silence_duration = 3.0 + is_recording = False + + def __init__(self): + self.start_calls = 0 + self.last_callback = None + self.stopped = 0 + self.cancelled = 0 + # Preset WAV path returned by stop() + self.next_stop_wav = "/tmp/fake.wav" + + def start(self, on_silence_stop=None): + self.start_calls += 1 + self.last_callback = on_silence_stop + self.is_recording = True + + def stop(self): + self.stopped += 1 + self.is_recording = False + return self.next_stop_wav + + def cancel(self): + self.cancelled += 1 + self.is_recording = False + + rec = FakeRecorder() + monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec) + # Skip real file ops in the silence callback. + monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False) + return rec + + def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch): + import hermes_cli.voice as voice + + monkeypatch.setattr( + voice, + "transcribe_recording", + lambda _p: {"success": True, "transcript": "hello world"}, + ) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + statuses = [] + + voice.start_continuous( + on_transcript=lambda t: transcripts.append(t), + on_status=lambda s: statuses.append(s), + ) + + assert fake_recorder.start_calls == 1 + assert statuses == ["listening"] + + # Simulate AudioRecorder's silence detector firing. + fake_recorder.last_callback() + + assert transcripts == ["hello world"] + assert fake_recorder.start_calls == 2 # auto-restarted + assert statuses == ["listening", "transcribing", "listening"] + assert voice.is_continuous_active() is True + + voice.stop_continuous() + + def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch): + import hermes_cli.voice as voice + + # Transcription returns no speech — fake_recorder.stop() returns the + # path, but transcribe returns empty text, counting as silence. + monkeypatch.setattr( + voice, + "transcribe_recording", + lambda _p: {"success": True, "transcript": ""}, + ) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + silent_limit_fired = [] + + voice.start_continuous( + on_transcript=lambda t: transcripts.append(t), + on_silent_limit=lambda: silent_limit_fired.append(True), + ) + + # Fire silence callback 3 times + for _ in range(3): + fake_recorder.last_callback() + + assert transcripts == [] + assert silent_limit_fired == [True] + assert voice.is_continuous_active() is False + assert fake_recorder.cancelled >= 1 + + def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch): + """User hits Ctrl+B mid-transcription: the in-flight transcript must + still fire (it's a real utterance), but the loop must NOT restart.""" + import hermes_cli.voice as voice + + stop_triggered = {"flag": False} + + def late_transcribe(_p): + # Simulate stop_continuous arriving while we're inside transcribe + voice.stop_continuous() + stop_triggered["flag"] = True + return {"success": True, "transcript": "final word"} + + monkeypatch.setattr(voice, "transcribe_recording", late_transcribe) + monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False) + + transcripts = [] + voice.start_continuous(on_transcript=lambda t: transcripts.append(t)) + + initial_starts = fake_recorder.start_calls # 1 + fake_recorder.last_callback() + + assert stop_triggered["flag"] is True + # Loop is stopped — no auto-restart + assert fake_recorder.start_calls == initial_starts + # The in-flight transcript was suppressed because we stopped mid-flight + assert transcripts == [] + assert voice.is_continuous_active() is False diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 165b47bf99..130b60576e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -3455,43 +3455,154 @@ def _(rid, params: dict) -> dict: # ── Methods: voice ─────────────────────────────────────────────────── +_voice_sid_lock = threading.Lock() +_voice_event_sid: str = "" + + +def _voice_emit(event: str, payload: dict | None = None) -> None: + """Emit a voice event toward the session that most recently turned the + mode on. Voice is process-global (one microphone), so there's only ever + one sid to target; the TUI handler treats an empty sid as "active + session". Kept separate from _emit to make the lack of per-call sid + argument explicit.""" + with _voice_sid_lock: + sid = _voice_event_sid + _emit(event, sid, payload) + + +def _voice_mode_enabled() -> bool: + """Current voice-mode flag. HERMES_VOICE env var wins over config so + the gateway and CLI agree when one of them was launched with an + explicit override.""" + env = os.environ.get("HERMES_VOICE", "").strip() + if env in {"0", "1"}: + return env == "1" + return bool(_load_cfg().get("display", {}).get("voice_enabled", False)) + + +def _voice_tts_enabled() -> bool: + """Whether agent replies should be spoken back via TTS.""" + env = os.environ.get("HERMES_VOICE_TTS", "").strip() + if env in {"0", "1"}: + return env == "1" + return bool(_load_cfg().get("display", {}).get("voice_tts", False)) + + @method("voice.toggle") def _(rid, params: dict) -> dict: + """CLI parity for the ``/voice`` slash command. + + Subcommands: + + * ``status`` — report mode + TTS flags (default when action is unknown). + * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it + off also tears down any active continuous recording loop. Does NOT + start recording on its own; recording is driven by ``voice.record`` + (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split. + * ``tts`` — toggle speech-output of agent replies. Requires mode on + (mirrors CLI's _toggle_voice_tts guard). + """ action = params.get("action", "status") + if action == "status": - env = os.environ.get("HERMES_VOICE", "").strip() - if env in {"0", "1"}: - return _ok(rid, {"enabled": env == "1"}) - return _ok( - rid, - { - "enabled": bool( - _load_cfg().get("display", {}).get("voice_enabled", False) - ) - }, - ) + # Mirror CLI's _show_voice_status: include STT/TTS provider + # availability so the user can tell at a glance *why* voice mode + # isn't working ("STT provider: MISSING ..." is the common case). + payload: dict = { + "enabled": _voice_mode_enabled(), + "tts": _voice_tts_enabled(), + } + try: + from tools.voice_mode import check_voice_requirements + + reqs = check_voice_requirements() + payload["available"] = bool(reqs.get("available")) + payload["audio_available"] = bool(reqs.get("audio_available")) + payload["stt_available"] = bool(reqs.get("stt_available")) + payload["details"] = reqs.get("details") or "" + except Exception as e: + # check_voice_requirements pulls optional transcription deps — + # swallow so /voice status always returns something useful. + logger.warning("voice.toggle status: requirements probe failed: %s", e) + + return _ok(rid, payload) + if action in ("on", "off"): enabled = action == "on" os.environ["HERMES_VOICE"] = "1" if enabled else "0" _write_config_key("display.voice_enabled", enabled) - return _ok(rid, {"enabled": action == "on"}) + + if not enabled: + # Disabling the mode must tear the continuous loop down; the + # loop holds the microphone and would otherwise keep running. + try: + from hermes_cli.voice import stop_continuous + + stop_continuous() + except ImportError: + pass + except Exception as e: + logger.warning("voice: stop_continuous failed during toggle off: %s", e) + + return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()}) + + if action == "tts": + if not _voice_mode_enabled(): + return _err(rid, 4014, "enable voice mode first: /voice on") + new_value = not _voice_tts_enabled() + os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0" + _write_config_key("display.voice_tts", new_value) + return _ok(rid, {"enabled": True, "tts": new_value}) + return _err(rid, 4013, f"unknown voice action: {action}") @method("voice.record") def _(rid, params: dict) -> dict: + """VAD-driven continuous record loop, CLI-parity. + + ``start`` turns on a VAD loop that emits ``voice.transcript`` events + for each detected utterance and auto-restarts for the next turn. + ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while- + recording branch clearing ``_voice_continuous``). Three consecutive + silent cycles stop the loop automatically and emit a + ``voice.transcript`` with ``no_speech_limit=True``. + """ action = params.get("action", "start") + + if action not in {"start", "stop"}: + return _err(rid, 4019, f"unknown voice action: {action}") + try: if action == "start": - from hermes_cli.voice import start_recording + if not _voice_mode_enabled(): + return _err(rid, 4015, "voice mode is off — enable with /voice on") - start_recording() + with _voice_sid_lock: + global _voice_event_sid + _voice_event_sid = params.get("session_id") or _voice_event_sid + + from hermes_cli.voice import start_continuous + + voice_cfg = _load_cfg().get("voice", {}) + start_continuous( + on_transcript=lambda t: _voice_emit( + "voice.transcript", {"text": t} + ), + on_status=lambda s: _voice_emit("voice.status", {"state": s}), + on_silent_limit=lambda: _voice_emit( + "voice.transcript", {"no_speech_limit": True} + ), + silence_threshold=voice_cfg.get("silence_threshold", 200), + silence_duration=voice_cfg.get("silence_duration", 3.0), + ) return _ok(rid, {"status": "recording"}) - if action == "stop": - from hermes_cli.voice import stop_and_transcribe - return _ok(rid, {"text": stop_and_transcribe() or ""}) - return _err(rid, 4019, f"unknown voice action: {action}") + # action == "stop" + from hermes_cli.voice import stop_continuous + + stop_continuous() + return _ok(rid, {"status": "stopped"}) except ImportError: return _err( rid, 5025, "voice module not available — install audio dependencies" diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index 23f7c46465..ef55d807ca 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) => composer: { dequeue: () => undefined, queueEditRef: ref(null), - sendQueued: vi.fn() + sendQueued: vi.fn(), + setInput: vi.fn() }, gateway: { gw: { request: vi.fn() }, @@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) => resumeById: vi.fn(), setCatalog: vi.fn() }, + submission: { + submitRef: { current: vi.fn() } + }, system: { bellOnComplete: false, sys: vi.fn() @@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) => panel: (title: string, sections: any[]) => appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }), setHistoryItems: vi.fn() + }, + voice: { + setProcessing: vi.fn(), + setRecording: vi.fn(), + setVoiceEnabled: vi.fn() } }) as any diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 1ec123f11a..377735ca91 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session const { bellOnComplete, stdout, sys } = ctx.system const { appendMessage, panel, setHistoryItems } = ctx.transcript + const { setInput } = ctx.composer + const { submitRef } = ctx.submission + const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice let pendingThinkingStatus = '' let thinkingStatusTimer: null | ReturnType = null @@ -261,6 +264,60 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } + case 'voice.status': { + // Continuous VAD loop reports its internal state so the status bar + // can show listening / transcribing / idle without polling. + const state = String(ev.payload?.state ?? '') + + if (state === 'listening') { + setVoiceRecording(true) + setVoiceProcessing(false) + } else if (state === 'transcribing') { + setVoiceRecording(false) + setVoiceProcessing(true) + } else { + setVoiceRecording(false) + setVoiceProcessing(false) + } + + return + } + + case 'voice.transcript': { + // CLI parity: the 3-strikes silence detector flipped off automatically. + // Mirror that on the UI side and tell the user why the mode is off. + if (ev.payload?.no_speech_limit) { + setVoiceEnabled(false) + setVoiceRecording(false) + setVoiceProcessing(false) + sys('voice: no speech detected 3 times, continuous mode stopped') + + return + } + + const text = String(ev.payload?.text ?? '').trim() + + if (!text) { + return + } + + // Match CLI's _pending_input.put(transcript): auto-submit when the + // composer is empty, otherwise append so the user can keep editing + // a partial draft they were working on. + setInput(prev => { + if (!prev) { + // defer submit so React commits the state change first + setTimeout(() => submitRef.current(text), 0) + + return '' + } + + return `${prev}${/\s$/.test(prev) ? '' : ' '}${text}` + }) + + return + } + case 'gateway.start_timeout': { const { cwd, python } = ev.payload ?? {} const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : '' diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts index c1c427739b..81def036cd 100644 --- a/ui-tui/src/app/interfaces.ts +++ b/ui-tui/src/app/interfaces.ts @@ -189,9 +189,11 @@ export interface InputHandlerContext { stdout?: NodeJS.WriteStream } voice: { + enabled: boolean recording: boolean setProcessing: StateSetter setRecording: StateSetter + setVoiceEnabled: StateSetter } wheelStep: number } @@ -201,6 +203,9 @@ export interface InputHandlerResult { } export interface GatewayEventHandlerContext { + composer: { + setInput: StateSetter + } gateway: GatewayServices session: { STARTUP_RESUME_ID: string @@ -210,6 +215,9 @@ export interface GatewayEventHandlerContext { resumeById: (id: string) => void setCatalog: StateSetter } + submission: { + submitRef: MutableRefObject<(value: string) => void> + } system: { bellOnComplete: boolean stdout?: NodeJS.WriteStream @@ -220,6 +228,11 @@ export interface GatewayEventHandlerContext { panel: (title: string, sections: PanelSection[]) => void setHistoryItems: StateSetter } + voice: { + setProcessing: StateSetter + setRecording: StateSetter + setVoiceEnabled: StateSetter + } } export interface SlashHandlerContext { diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts index 90a1beb3f0..cf36fee6c8 100644 --- a/ui-tui/src/app/slash/commands/session.ts +++ b/ui-tui/src/app/slash/commands/session.ts @@ -184,15 +184,64 @@ export const sessionCommands: SlashCommand[] = [ }, { - help: 'toggle voice input', + help: 'voice mode: [on|off|tts|status]', name: 'voice', run: (arg, ctx) => { - const action = arg === 'on' || arg === 'off' ? arg : 'status' + const normalized = (arg ?? '').trim().toLowerCase() + + const action = + normalized === 'on' || normalized === 'off' || normalized === 'tts' || normalized === 'status' + ? normalized + : 'status' ctx.gateway.rpc('voice.toggle', { action }).then( ctx.guarded(r => { ctx.voice.setVoiceEnabled(!!r.enabled) - ctx.transcript.sys(`voice: ${r.enabled ? 'on — press Ctrl+B to record' : 'off'}`) + + // Match CLI's _show_voice_status / _enable_voice_mode / + // _toggle_voice_tts output shape so users don't have to learn + // two vocabularies. + if (action === 'status') { + const mode = r.enabled ? 'ON' : 'OFF' + const tts = r.tts ? 'ON' : 'OFF' + ctx.transcript.sys('Voice Mode Status') + ctx.transcript.sys(` Mode: ${mode}`) + ctx.transcript.sys(` TTS: ${tts}`) + ctx.transcript.sys(' Record key: Ctrl+B') + + // CLI's "Requirements:" block — surfaces STT/audio setup issues + // so the user sees "STT provider: MISSING ..." instead of + // silently failing on every Ctrl+B press. + if (r.details) { + ctx.transcript.sys('') + ctx.transcript.sys(' Requirements:') + + for (const line of r.details.split('\n')) { + if (line.trim()) { + ctx.transcript.sys(` ${line}`) + } + } + } + + return + } + + if (action === 'tts') { + ctx.transcript.sys(`Voice TTS ${r.tts ? 'enabled' : 'disabled'}.`) + + return + } + + // on/off — mirror cli.py:_enable_voice_mode's 3-line output + if (r.enabled) { + const tts = r.tts ? ' (TTS enabled)' : '' + ctx.transcript.sys(`Voice mode enabled${tts}`) + ctx.transcript.sys(' Ctrl+B to start/stop recording') + ctx.transcript.sys(' /voice tts to toggle speech output') + ctx.transcript.sys(' /voice off to disable voice mode') + } else { + ctx.transcript.sys('Voice mode disabled.') + } }) ) } diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index cfc3eed7c8..47fe8a2166 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -134,45 +134,43 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { } } - const voiceStop = () => { - voice.setRecording(false) - voice.setProcessing(true) + // CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop + // (NOT the voice-mode umbrella bit). The mode is enabled via /voice on; + // Ctrl+B while the mode is off sys-nudges the user. While the mode is + // on, the first press starts a continuous loop (gateway → start_continuous, + // VAD auto-stop → transcribe → auto-restart), a subsequent press stops it. + // The gateway publishes voice.status + voice.transcript events that + // createGatewayEventHandler turns into UI badges and composer injection. + const voiceRecordToggle = () => { + if (!voice.enabled) { + return actions.sys('voice: mode is off — enable with /voice on') + } + + const starting = !voice.recording + const action = starting ? 'start' : 'stop' + + // Optimistic UI — flip the REC badge immediately so the user gets + // feedback while the RPC round-trips; the voice.status event is the + // authoritative source and may correct us. + if (starting) { + voice.setRecording(true) + } else { + voice.setRecording(false) + voice.setProcessing(false) + } gateway - .rpc('voice.record', { action: 'stop' }) - .then(r => { - if (!r) { - return + .rpc('voice.record', { action }) + .catch((e: Error) => { + // Revert optimistic UI on failure. + if (starting) { + voice.setRecording(false) } - const transcript = String(r.text || '').trim() - - if (!transcript) { - return actions.sys('voice: no speech detected') - } - - cActions.setInput(prev => (prev ? `${prev}${/\s$/.test(prev) ? '' : ' '}${transcript}` : transcript)) - }) - .catch((e: Error) => actions.sys(`voice error: ${e.message}`)) - .finally(() => { - voice.setProcessing(false) - patchUiState({ status: 'ready' }) + actions.sys(`voice error: ${e.message}`) }) } - const voiceStart = () => - gateway - .rpc('voice.record', { action: 'start' }) - .then(r => { - if (!r) { - return - } - - voice.setRecording(true) - patchUiState({ status: 'recording…' }) - }) - .catch((e: Error) => actions.sys(`voice error: ${e.message}`)) - useInput((ch, key) => { const live = getUiState() @@ -371,7 +369,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { } if (isVoiceToggleKey(key, ch)) { - return voice.recording ? voiceStop() : voiceStart() + return voiceRecordToggle() } if (isAction(key, ch, 'g')) { diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 41edcc8282..c061aa5dd5 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -454,13 +454,20 @@ export function useMainApp(gw: GatewayClient) { composer: { actions: composerActions, refs: composerRefs, state: composerState }, gateway, terminal: { hasSelection, scrollRef, scrollWithSelection, selection, stdout }, - voice: { recording: voiceRecording, setProcessing: setVoiceProcessing, setRecording: setVoiceRecording }, + voice: { + enabled: voiceEnabled, + recording: voiceRecording, + setProcessing: setVoiceProcessing, + setRecording: setVoiceRecording, + setVoiceEnabled + }, wheelStep: WHEEL_SCROLL_STEP }) const onEvent = useMemo( () => createGatewayEventHandler({ + composer: { setInput: composerActions.setInput }, gateway, session: { STARTUP_RESUME_ID, @@ -470,18 +477,29 @@ export function useMainApp(gw: GatewayClient) { resumeById: session.resumeById, setCatalog }, + submission: { submitRef }, system: { bellOnComplete, stdout, sys }, - transcript: { appendMessage, panel, setHistoryItems } + transcript: { appendMessage, panel, setHistoryItems }, + voice: { + setProcessing: setVoiceProcessing, + setRecording: setVoiceRecording, + setVoiceEnabled + } }), [ appendMessage, bellOnComplete, + composerActions.setInput, gateway, panel, session.newSession, session.resetSession, session.resumeById, + setVoiceEnabled, + setVoiceProcessing, + setVoiceRecording, stdout, + submitRef, sys ] ) diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 1dc8ea5bed..05f8d9a41c 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -236,10 +236,16 @@ export interface ImageAttachResponse { // ── Voice ──────────────────────────────────────────────────────────── export interface VoiceToggleResponse { + audio_available?: boolean + available?: boolean + details?: string enabled?: boolean + stt_available?: boolean + tts?: boolean } export interface VoiceRecordResponse { + status?: string text?: string } @@ -368,6 +374,8 @@ export type GatewayEvent = | { payload?: { text?: string }; session_id?: string; type: 'thinking.delta' } | { payload?: undefined; session_id?: string; type: 'message.start' } | { payload?: { kind?: string; text?: string }; session_id?: string; type: 'status.update' } + | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' } + | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' } | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' } | { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' } | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' } From 42ff7857712b0dec93e2aab6a033d77857f3f6bb Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:27:19 +0300 Subject: [PATCH 008/220] fix(tui): voice TTS speak-back + transcript-key bug + auto-submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues surfaced during end-to-end testing of the CLI-parity voice loop and are fixed together because they all blocked "speak → agent responds → TTS reads it back" from working at all: 1. Wrong result key (hermes_cli/voice.py) transcribe_recording() returns {"success": bool, "transcript": str}, matching cli.py:_voice_stop_and_transcribe. The wrapper was reading result.get("text"), which is None, so every successful Groq / local STT response was thrown away and the 3-strikes halt fired after three silent-looking cycles. Fixed by reading "transcript" and also honouring "success" like the CLI does. Updated the loop simulation tests to return the correct shape. 2. TTS speak-back was missing (tui_gateway/server.py + hermes_cli/voice.py) The TUI had a voice.toggle "tts" subcommand but nothing downstream actually read the flag — agent replies never spoke. Mirrored cli.py:8747-8754's dispatch: on message.complete with status == "complete", if _voice_tts_enabled() is true, spawn a daemon thread running speak_text(response). Rewrote speak_text as a full port of cli.py:_voice_speak_response — same markdown-strip regex pipeline (code blocks, links, bold/italic, inline code, headers, list bullets, horizontal rules, excessive newlines), same 4000-char cap, same explicit mp3 output path, same MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup of both extensions. Keeps TUI TTS audible output byte-for-byte identical to the classic CLI. 3. Auto-submit swallowed on non-empty composer (createGatewayEventHandler.ts) The voice.transcript handler branched on prev input via a setInput updater and fired submitRef.current inside the updater when prev was empty. React strict mode double-invokes state updaters, which would queue the submit twice; and when the composer had any content the transcript was merely appended — the agent never saw it. CLI _pending_input.put(transcript) unconditionally feeds the transcript as the next turn, so match that: always clear the composer and setTimeout(() => submitRef.current(text), 0) outside any updater. Side effect can't run twice this way, and a half-typed draft on the rare occasion is a fair trade vs. silently dropping the turn. Also added peak_rms to the rec.stop debug line so "recording too quiet" is diagnosable at a glance when HERMES_VOICE_DEBUG=1. --- hermes_cli/voice.py | 77 ++++++++++++++------- tui_gateway/server.py | 22 ++++++ ui-tui/src/app/createGatewayEventHandler.ts | 23 +++--- 3 files changed, 84 insertions(+), 38 deletions(-) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 70e097e77c..448021d115 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -21,7 +21,6 @@ Two usage modes are exposed: from __future__ import annotations -import json import logging import os import sys @@ -405,34 +404,62 @@ def _continuous_on_silence() -> None: def speak_text(text: str) -> None: """Synthesize ``text`` with the configured TTS provider and play it. - The gateway spawns a daemon thread to call this so the RPC returns - immediately. Failures are logged and swallowed. + Mirrors cli.py:_voice_speak_response exactly — same markdown strip + pipeline, same 4000-char cap, same explicit mp3 output path, same + MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup + of both extensions. Keeping these in sync means a voice-mode TTS + session in the TUI sounds identical to one in the classic CLI. """ if not text or not text.strip(): return - # Lazy import — tts_tool pulls optional provider SDKs. - from tools.tts_tool import text_to_speech_tool + import re + import tempfile + import time try: - raw = text_to_speech_tool(text) + from tools.tts_tool import text_to_speech_tool + + tts_text = text[:4000] if len(text) > 4000 else text + tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks + tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text + tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs + tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold + tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic + tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code + tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers + tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets + tts_text = re.sub(r'---+', '', tts_text) # horizontal rules + tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines + tts_text = tts_text.strip() + if not tts_text: + return + + # MP3 output path, pre-chosen so we can play the MP3 directly even + # when text_to_speech_tool auto-converts to OGG for messaging + # platforms. afplay's OGG support is flaky, MP3 always works. + os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True) + mp3_path = os.path.join( + tempfile.gettempdir(), + "hermes_voice", + f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3", + ) + + _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}") + text_to_speech_tool(text=tts_text, output_path=mp3_path) + + if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0: + _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)") + play_audio_file(mp3_path) + try: + os.unlink(mp3_path) + ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg" + if os.path.isfile(ogg_path): + os.unlink(ogg_path) + except OSError: + pass + else: + _debug(f"speak_text: TTS tool produced no audio at {mp3_path}") except Exception as e: - logger.warning("TTS synthesis failed: %s", e) - return - - try: - result = json.loads(raw) if isinstance(raw, str) else raw - except json.JSONDecodeError: - logger.warning("TTS returned non-JSON result") - return - - if not isinstance(result, dict): - return - - file_path = result.get("file_path") - if not file_path: - err = result.get("error") or "no file_path in TTS result" - logger.warning("TTS succeeded but produced no audio: %s", err) - return - - play_audio_file(file_path) + logger.warning("Voice TTS playback failed: %s", e) + _debug(f"speak_text raised {type(e).__name__}: {e}") diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 130b60576e..f31ff3b0e2 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2126,6 +2126,28 @@ def _(rid, params: dict) -> dict: if rendered: payload["rendered"] = rendered _emit("message.complete", sid, payload) + + # CLI parity: when voice-mode TTS is on, speak the agent reply + # (cli.py:_voice_speak_response). Only the final text — tool + # calls / reasoning already stream separately and would be + # noisy to read aloud. + if ( + status == "complete" + and isinstance(raw, str) + and raw.strip() + and _voice_tts_enabled() + ): + try: + from hermes_cli.voice import speak_text + + spoken = raw + threading.Thread( + target=speak_text, args=(spoken,), daemon=True + ).start() + except ImportError: + logger.warning("voice TTS skipped: hermes_cli.voice unavailable") + except Exception as e: + logger.warning("voice TTS dispatch failed: %s", e) except Exception as e: _emit("error", sid, {"message": str(e)}) finally: diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 377735ca91..50f6fa3af4 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -301,19 +301,16 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } - // Match CLI's _pending_input.put(transcript): auto-submit when the - // composer is empty, otherwise append so the user can keep editing - // a partial draft they were working on. - setInput(prev => { - if (!prev) { - // defer submit so React commits the state change first - setTimeout(() => submitRef.current(text), 0) - - return '' - } - - return `${prev}${/\s$/.test(prev) ? '' : ' '}${text}` - }) + // CLI parity: _pending_input.put(transcript) unconditionally feeds + // the transcript to the agent as its next turn — draft handling + // doesn't apply because voice-mode users are speaking, not typing. + // + // We can't branch on composer input from inside a setInput updater + // (React strict mode double-invokes it, duplicating the submit). + // Just clear + defer submit so the cleared input is committed before + // submit reads it. + setInput('') + setTimeout(() => submitRef.current(text), 0) return } From 98418afd5d81a4e01813b819f3001dc360579d6c Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:33:10 +0300 Subject: [PATCH 009/220] =?UTF-8?q?fix(tui):=20break=20TTS=E2=86=92STT=20f?= =?UTF-8?q?eedback=20loop=20+=20colorize=20REC=20badge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTS feedback loop (hermes_cli/voice.py) The VAD loop kept the microphone live while speak_text played the agent's reply over the speakers, so the reply itself was picked up, transcribed, and submitted — the agent then replied to its own echo ("Ha, looks like we're in a loop"). Ported cli.py:_voice_tts_done synchronisation: - _tts_playing: threading.Event (initially set = "not playing"). - speak_text cancels the active recorder before opening the speakers, clears _tts_playing, and on exit waits 300 ms before re-starting the recorder — long enough for the OS audio device to settle so afplay and sounddevice don't race for it. - _continuous_on_silence now waits on _tts_playing (up to 60 s) before re-arming the mic with another 300 ms gap, mirroring cli.py:10619-10621. If the user flips voice off during the wait the loop exits cleanly instead of fighting for the device. Without both halves the loop races: if the silence callback fires before TTS starts it re-arms immediately; if TTS is already playing the pause-and-resume path catches it. Red REC badge (ui-tui appChrome + useMainApp) Classic CLI (cli.py:_get_voice_status_fragments) renders "● REC" in red and "◉ STT" in amber. TUI was showing a dim "REC" with no dot, making it hard to spot at a glance. voiceLabel now emits the same glyphs and appChrome colours them via t.color.error / t.color.warn, falling back to dim for the idle label. --- hermes_cli/voice.py | 74 +++++++++++++++++++++++++++++ ui-tui/src/app/useMainApp.ts | 4 +- ui-tui/src/components/appChrome.tsx | 15 +++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 448021d115..4deee8636f 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -87,6 +87,18 @@ _recorder_lock = threading.Lock() _continuous_lock = threading.Lock() _continuous_active = False _continuous_recorder: Any = None + +# ── TTS-vs-STT feedback guard ──────────────────────────────────────── +# When TTS plays the agent reply over the speakers, the live microphone +# picks it up and transcribes the agent's own voice as user input — an +# infinite loop the agent happily joins ("Ha, looks like we're in a loop"). +# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is +# playing, set while silent. _continuous_on_silence waits on it before +# re-arming the recorder, and speak_text itself cancels any live capture +# before starting playback so the tail of the previous utterance doesn't +# leak into the mic. +_tts_playing = threading.Event() +_tts_playing.set() # initially "not playing" _continuous_on_transcript: Optional[Callable[[str], None]] = None _continuous_on_status: Optional[Callable[[str], None]] = None _continuous_on_silent_limit: Optional[Callable[[], None]] = None @@ -379,6 +391,23 @@ def _continuous_on_silence() -> None: pass return + # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to + # finish before re-arming the mic, then leave a small gap to avoid + # catching the tail of the speaker output. Without this the voice + # loop becomes a feedback loop — the agent's spoken reply lands + # back in the mic and gets re-submitted. + if not _tts_playing.is_set(): + _debug("_continuous_on_silence: waiting for TTS to finish") + _tts_playing.wait(timeout=60) + import time as _time + _time.sleep(0.3) + + # User may have stopped the loop during the wait. + with _continuous_lock: + if not _continuous_active: + _debug("_continuous_on_silence: stopped while waiting for TTS") + return + # Restart for the next turn. _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") _play_beep(frequency=880, count=1) @@ -409,6 +438,11 @@ def speak_text(text: str) -> None: MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup of both extensions. Keeping these in sync means a voice-mode TTS session in the TUI sounds identical to one in the classic CLI. + + While playback is in flight the module-level _tts_playing Event is + cleared so the continuous-recording loop knows to wait before + re-arming the mic (otherwise the agent's spoken reply feedback-loops + through the microphone and the agent ends up replying to itself). """ if not text or not text.strip(): return @@ -417,6 +451,26 @@ def speak_text(text: str) -> None: import tempfile import time + # Cancel any live capture before we open the speakers — otherwise the + # last ~200ms of the user's turn tail + the first syllables of our TTS + # both end up in the next recording window. The continuous loop will + # re-arm itself after _tts_playing flips back (see _continuous_on_silence). + paused_recording = False + with _continuous_lock: + if ( + _continuous_active + and _continuous_recorder is not None + and getattr(_continuous_recorder, "is_recording", False) + ): + try: + _continuous_recorder.cancel() + paused_recording = True + except Exception as e: + logger.warning("failed to pause recorder for TTS: %s", e) + + _tts_playing.clear() + _debug(f"speak_text: TTS begin (paused_recording={paused_recording})") + try: from tools.tts_tool import text_to_speech_tool @@ -463,3 +517,23 @@ def speak_text(text: str) -> None: except Exception as e: logger.warning("Voice TTS playback failed: %s", e) _debug(f"speak_text raised {type(e).__name__}: {e}") + finally: + _tts_playing.set() + _debug("speak_text: TTS done") + + # Re-arm the mic so the user can answer without pressing Ctrl+B. + # Small delay lets the OS flush speaker output and afplay fully + # release the audio device before sounddevice re-opens the input. + if paused_recording: + time.sleep(0.3) + with _continuous_lock: + if _continuous_active and _continuous_recorder is not None: + try: + _continuous_recorder.start( + on_silence_stop=_continuous_on_silence + ) + _debug("speak_text: recording resumed after TTS") + except Exception as e: + logger.warning( + "failed to resume recorder after TTS: %s", e + ) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index c061aa5dd5..7b742478ea 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -716,7 +716,9 @@ export function useMainApp(gw: GatewayClient) { statusColor: statusColorOf(ui.status, ui.theme.color), stickyPrompt, turnStartedAt: ui.sid ? turnStartedAt : null, - voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}` + // CLI parity: the classic prompt_toolkit status bar shows a red dot + // on REC (cli.py:_get_voice_status_fragments line 2344). + voiceLabel: voiceRecording ? '● REC' : voiceProcessing ? '◉ STT' : `voice ${voiceEnabled ? 'on' : 'off'}` }), [ cwd, diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx index 8de2a63019..7b697eedce 100644 --- a/ui-tui/src/components/appChrome.tsx +++ b/ui-tui/src/components/appChrome.tsx @@ -215,7 +215,20 @@ export function StatusRule({ ) : null} - {voiceLabel ? │ {voiceLabel} : null} + {voiceLabel ? ( + + {' │ '} + {voiceLabel} + + ) : null} {bgCount > 0 ? │ {bgCount} bg : null} {showCost && typeof usage.cost_usd === 'number' ? ( │ ${usage.cost_usd.toFixed(4)} From 3a9598337f772bade75432595b88556744e2f1a0 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:40:10 +0300 Subject: [PATCH 010/220] chore(tui): dump gateway crash traces to ~/.hermes/logs/tui_gateway_crash.log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the gateway subprocess raises an unhandled exception during a voice-mode turn, nothing survives: stdout is the JSON-RPC pipe, stderr flushes but the process is already exiting, and no log file catches Python's default traceback print. The user is left with an undiagnosable "gateway exited" banner. Install: - sys.excepthook → write full traceback to tui_gateway_crash.log + echo the first line to stderr (which the TUI pumps into Activity as a gateway.stderr event). Chains to the default hook so the process still terminates. - threading.excepthook → same, tagged with the thread name so it's clear when the crash came from a daemon thread (beep playback, TTS, silence callback, etc.). - Turn-dispatcher except block now also appends a traceback to the crash log before emitting the user-visible error event — str(e) alone was too terse to identify where in the voice pipeline the failure happened. Zero behavioural change on the happy path; purely forensics. --- tui_gateway/server.py | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index f31ff3b0e2..bcb53b8072 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -23,6 +23,75 @@ load_hermes_dotenv( hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env" ) + +# ── Panic logger ───────────────────────────────────────────────────── +# Gateway crashes in a TUI session leave no forensics: stdout is the +# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger +# only catches handled warnings, and the subprocess exits before stderr +# flushes through the stderr->gateway.stderr event pump. This hook +# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log +# AND re-emits a one-line summary to stderr so the TUI can surface it in +# Activity — exactly what was missing when the voice-mode turns started +# exiting the gateway mid-TTS. +_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log") + + +def _panic_hook(exc_type, exc_value, exc_tb): + import traceback + + trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb)) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + f.write(trace) + except Exception: + pass + # Stderr goes through to the TUI as a gateway.stderr Activity line — + # the first line here is what the user will see without opening any + # log files. Rest of the stack is still in the log for full context. + first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__ + print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True) + # Chain to the default hook so the process still terminates normally. + sys.__excepthook__(exc_type, exc_value, exc_tb) + + +sys.excepthook = _panic_hook + + +def _thread_panic_hook(args): + # threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread) + import traceback + + trace = "".join( + traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback) + ) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} " + f"· thread={args.thread.name} ===\n" + ) + f.write(trace) + except Exception: + pass + first_line = ( + str(args.exc_value).strip().splitlines()[0] + if str(args.exc_value).strip() + else args.exc_type.__name__ + ) + print( + f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}", + file=sys.stderr, + flush=True, + ) + + +threading.excepthook = _thread_panic_hook + try: from hermes_cli.banner import prefetch_update_check @@ -2149,6 +2218,20 @@ def _(rid, params: dict) -> dict: except Exception as e: logger.warning("voice TTS dispatch failed: %s", e) except Exception as e: + import traceback + + trace = traceback.format_exc() + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== turn-dispatcher exception · " + f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n" + ) + f.write(trace) + except Exception: + pass + print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True) _emit("error", sid, {"message": str(e)}) finally: try: From eeda18a9b75027408bccd1ac7308ac8a1c469d7c Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:44:14 +0300 Subject: [PATCH 011/220] chore(tui): record gateway exit reason in crash log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gateway exits weren't reaching the panic hook because entry.py calls sys.exit(0) on broken stdout — clean termination, no exception. That left "gateway exited" in the TUI with zero forensic trail when pipe breaks happened mid-turn. Entry.py now tags each exit path — startup-write failure, parse-error- response write failure, per-method response write failure, stdin EOF — with a one-line entry in ~/.hermes/logs/tui_gateway_crash.log and a gateway.stderr breadcrumb. Includes the JSON-RPC method name on the dispatch path, which is the only way to tell "died right after handling voice.toggle on" from "died emitting the second message.complete". --- tui_gateway/entry.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index d2b82b9dab..42f636d310 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -1,19 +1,44 @@ import json +import os import signal import sys +import time -from tui_gateway.server import dispatch, resolve_skin, write_json +from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json signal.signal(signal.SIGPIPE, signal.SIG_DFL) signal.signal(signal.SIGINT, signal.SIG_IGN) +def _log_exit(reason: str) -> None: + """Record why the gateway subprocess is shutting down. + + Three exit paths (startup write fail, parse-error-response write fail, + dispatch-response write fail, stdin EOF) all collapse into a silent + sys.exit(0) here. Without this trail the TUI shows "gateway exited" + with no actionable clue about WHICH broken pipe or WHICH message + triggered it — the main reason voice-mode turns look like phantom + crashes when the real story is "TUI read pipe closed on this event". + """ + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== gateway exit · {time.strftime('%Y-%m-%d %H:%M:%S')} " + f"· reason={reason} ===\n" + ) + except Exception: + pass + print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True) + + def main(): if not write_json({ "jsonrpc": "2.0", "method": "event", "params": {"type": "gateway.ready", "payload": {"skin": resolve_skin()}}, }): + _log_exit("startup write failed (broken stdout pipe before first event)") sys.exit(0) for raw in sys.stdin: @@ -25,14 +50,19 @@ def main(): req = json.loads(line) except json.JSONDecodeError: if not write_json({"jsonrpc": "2.0", "error": {"code": -32700, "message": "parse error"}, "id": None}): + _log_exit("parse-error-response write failed (broken stdout pipe)") sys.exit(0) continue + method = req.get("method") if isinstance(req, dict) else None resp = dispatch(req) if resp is not None: if not write_json(resp): + _log_exit(f"response write failed for method={method!r} (broken stdout pipe)") sys.exit(0) + _log_exit("stdin EOF (TUI closed the command pipe)") + if __name__ == "__main__": main() From 7baf370d3dde0f66938962a2516e728e3cdabc6f Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:49:59 +0300 Subject: [PATCH 012/220] chore(tui): capture signal-triggered gateway exits in crash log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SIG_DFL for SIGPIPE means the kernel reaps the gateway subprocess the instant a background thread (TTS playback, silence callback, voice status emitter) writes to a stdout the TUI stopped reading — before the Python interpreter can run excepthook, threading.excepthook, atexit, or the entry.py post-loop _log_exit. Replace the three SIG_DFL / SIG_IGN bindings with a _log_signal handler that: - records which signal (SIGPIPE / SIGTERM / SIGHUP) fired and when; - dumps the main-thread stack at signal delivery AND every live thread's stack via sys._current_frames — the background-thread write that provoked SIGPIPE is almost always visible here; - writes everything to ~/.hermes/logs/tui_gateway_crash.log and prints a [gateway-signal] breadcrumb to stderr so the TUI Activity surfaces it as well. SIGINT stays ignored (TUI handles Ctrl+C for the user). --- tui_gateway/entry.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 42f636d310..9974ccbebf 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -3,10 +3,50 @@ import os import signal import sys import time +import traceback from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json -signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +def _log_signal(signum: int, frame) -> None: + """Capture WHICH thread and WHERE a termination signal hit us. + + SIG_DFL for SIGPIPE kills the process silently the instant any + background thread (TTS playback, beep, voice status emitter, etc.) + writes to a stdout the TUI has stopped reading. Without this + handler the gateway-exited banner in the TUI has no trace — the + crash log never sees a Python exception because the kernel reaps + the process before the interpreter runs anything. + """ + name = { + signal.SIGPIPE: "SIGPIPE", + signal.SIGTERM: "SIGTERM", + signal.SIGHUP: "SIGHUP", + }.get(signum, f"signal {signum}") + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + if frame is not None: + f.write("main-thread stack at signal delivery:\n") + traceback.print_stack(frame, file=f) + # All live threads — signal may have been triggered by a + # background thread (write to broken stdout from TTS, etc.). + import threading as _threading + for tid, th in _threading._active.items(): + f.write(f"\n--- thread {th.name} (id={tid}) ---\n") + f.write("".join(traceback.format_stack(sys._current_frames().get(tid)))) + except Exception: + pass + print(f"[gateway-signal] {name}", file=sys.stderr, flush=True) + sys.exit(0) + + +signal.signal(signal.SIGPIPE, _log_signal) +signal.signal(signal.SIGTERM, _log_signal) +signal.signal(signal.SIGHUP, _log_signal) signal.signal(signal.SIGINT, signal.SIG_IGN) From 2af0848f3c61449b73b9ad68c98cf0386695c1fa Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 01:54:20 +0300 Subject: [PATCH 013/220] fix(tui): ignore SIGPIPE so stderr back-pressure can't kill the gateway MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crash-log stack trace (tui_gateway_crash.log) from the user's session pinned the regression: SIGPIPE arrived while main thread was blocked on for-raw-in-sys.stdin — i.e., a background thread (debug print to stderr, most likely from HERMES_VOICE_DEBUG=1) wrote to a pipe whose buffer the TUI hadn't drained yet, and SIG_DFL promptly killed the process. Two fixes that together restore CLI parity: - entry.py: SIGPIPE → SIG_IGN instead of the _log_signal handler that then exited. With SIG_IGN, Python raises BrokenPipeError on the offending write, which write_json already handles with a clean exit via _log_exit. SIGTERM / SIGHUP still route through _log_signal so real termination signals remain diagnosable. - hermes_cli/voice.py:_debug: wrap the stderr print in a BrokenPipeError / OSError try/except. This runs from daemon threads (silence callback, TTS playback, beep), so a broken stderr must not escape and ride up into the main event loop. Verified by spawning the gateway subprocess locally: voice.toggle status → 200 OK, process stays alive, clean exit on stdin close logs "reason=stdin EOF" instead of a silent reap. --- hermes_cli/voice.py | 11 ++++++++++- tui_gateway/entry.py | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 4deee8636f..0a355ce4fa 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -44,9 +44,18 @@ def _debug(msg: str) -> None: which createGatewayEventHandler shows as an Activity line — exactly what we need to diagnose "why didn't the loop auto-restart?" in the user's real terminal without shipping a separate debug RPC. + + Any OSError / BrokenPipeError is swallowed because this fires from + background threads (silence callback, TTS daemon, beep) where a + broken stderr pipe must not kill the whole gateway — the main + command pipe (stdin+stdout) is what actually matters. """ - if os.environ.get("HERMES_VOICE_DEBUG", "").strip() == "1": + if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1": + return + try: print(f"[voice] {msg}", file=sys.stderr, flush=True) + except (BrokenPipeError, OSError): + pass def _beeps_enabled() -> bool: diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 9974ccbebf..7eac6057ee 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -44,7 +44,16 @@ def _log_signal(signum: int, frame) -> None: sys.exit(0) -signal.signal(signal.SIGPIPE, _log_signal) +# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process +# silently whenever a *background* thread (TTS playback chain, voice +# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone +# quiet on — even though the main thread was perfectly fine waiting on +# stdin. Ignoring the signal lets Python raise BrokenPipeError on the +# offending write (write_json already handles that with a clean +# sys.exit(0) + _log_exit), which keeps the gateway alive as long as +# the main command pipe is still readable. Terminal signals still +# route through _log_signal so kills and hangups are diagnosable. +signal.signal(signal.SIGPIPE, signal.SIG_IGN) signal.signal(signal.SIGTERM, _log_signal) signal.signal(signal.SIGHUP, _log_signal) signal.signal(signal.SIGINT, signal.SIG_IGN) From 44a0cbe5253fda236eb7383e4e07fca7f7e99691 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Fri, 24 Apr 2026 02:06:01 +0300 Subject: [PATCH 014/220] fix(tui): voice mode starts OFF each launch (CLI parity) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The voice.toggle handler was persisting display.voice_enabled / display.voice_tts to config.yaml, so a TUI session that ever turned voice on would re-open with it already on (and the mic badge lit) on every subsequent launch. cli.py treats voice strictly as runtime state: _voice_mode = False at __init__, only /voice on flips it, and nothing writes it back to disk. Drop the _write_config_key calls in voice.toggle on/off/tts and the config.yaml fallback in _voice_mode_enabled / _voice_tts_enabled. State is now env-var-only (HERMES_VOICE / HERMES_VOICE_TTS), scoped to the live gateway subprocess — the next launch starts clean. --- tui_gateway/server.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index bcb53b8072..52408ed9f7 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -3576,21 +3576,20 @@ def _voice_emit(event: str, payload: dict | None = None) -> None: def _voice_mode_enabled() -> bool: - """Current voice-mode flag. HERMES_VOICE env var wins over config so - the gateway and CLI agree when one of them was launched with an - explicit override.""" - env = os.environ.get("HERMES_VOICE", "").strip() - if env in {"0", "1"}: - return env == "1" - return bool(_load_cfg().get("display", {}).get("voice_enabled", False)) + """Current voice-mode flag (runtime-only, CLI parity). + + cli.py initialises ``_voice_mode = False`` at startup and only flips + it via ``/voice on``; it never reads a persisted enable bit from + config.yaml. We match that: no config lookup, env var only. This + avoids the TUI auto-starting in REC the next time the user opens it + just because they happened to enable voice in a prior session. + """ + return os.environ.get("HERMES_VOICE", "").strip() == "1" def _voice_tts_enabled() -> bool: - """Whether agent replies should be spoken back via TTS.""" - env = os.environ.get("HERMES_VOICE_TTS", "").strip() - if env in {"0", "1"}: - return env == "1" - return bool(_load_cfg().get("display", {}).get("voice_tts", False)) + """Whether agent replies should be spoken back via TTS (runtime only).""" + return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1" @method("voice.toggle") @@ -3634,8 +3633,10 @@ def _(rid, params: dict) -> dict: if action in ("on", "off"): enabled = action == "on" + # Runtime-only flag (CLI parity) — no _write_config_key, so the + # next TUI launch starts with voice OFF instead of auto-REC from a + # persisted stale toggle. os.environ["HERMES_VOICE"] = "1" if enabled else "0" - _write_config_key("display.voice_enabled", enabled) if not enabled: # Disabling the mode must tear the continuous loop down; the @@ -3655,8 +3656,8 @@ def _(rid, params: dict) -> dict: if not _voice_mode_enabled(): return _err(rid, 4014, "enable voice mode first: /voice on") new_value = not _voice_tts_enabled() + # Runtime-only flag (CLI parity) — see voice.toggle on/off above. os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0" - _write_config_key("display.voice_tts", new_value) return _ok(rid, {"enabled": True, "tts": new_value}) return _err(rid, 4013, f"unknown voice action: {action}") From 9ff21437a03a28cfa33394bb330a4a41768f8c1c Mon Sep 17 00:00:00 2001 From: Dan Lynn Date: Sun, 19 Apr 2026 17:36:18 +0000 Subject: [PATCH 015/220] fix(mcp): coerce stringified arrays/objects in tool args When a tool schema declares `type: array` or `type: object` and the model emits the value as a JSON string (common with complex oneOf discriminated unions), the MCP server rejects it with -32602 "expected array, received string". Extend `_coerce_value` to attempt `json.loads` for these types and replace the string with the parsed value before dispatch. Root cause confirmed via live testing: `add_reminders.reminders` uses a oneOf discriminated union (relative/absolute/location) that triggers model output drift. Sending a real array passes validation; sending a string reproduces the exact error. Co-Authored-By: Claude Sonnet 4.6 --- model_tools.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/model_tools.py b/model_tools.py index bee80f49bd..2b7767fda3 100644 --- a/model_tools.py +++ b/model_tools.py @@ -418,6 +418,31 @@ def _coerce_value(value: str, expected_type): return _coerce_number(value, integer_only=(expected_type == "integer")) if expected_type == "boolean": return _coerce_boolean(value) + if expected_type == "array": + return _coerce_json(value, list) + if expected_type == "object": + return _coerce_json(value, dict) + return value + + +def _coerce_json(value: str, expected_python_type: type): + """Parse *value* as JSON when the schema expects an array or object. + + Handles model output drift where a complex oneOf/discriminated-union schema + causes the LLM to emit the array/object as a JSON string instead of a native + structure. Returns the original string if parsing fails or yields the wrong + Python type. + """ + try: + parsed = json.loads(value) + except (ValueError, TypeError): + return value + if isinstance(parsed, expected_python_type): + logger.debug( + "coerce_tool_args: coerced string to %s via json.loads", + expected_python_type.__name__, + ) + return parsed return value From 6a20e187ddfebeb97f87e968a6281e19113cea23 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 16:19:17 -0700 Subject: [PATCH 016/220] test,chore: cover stringified array/object coercion + AUTHOR_MAP entry Follow-up to the cherry-picked coercion commit: adds 9 regression tests covering array/object parsing, invalid-JSON passthrough, wrong-shape preservation, and the issue #3947 gmail-mcp scenario end-to-end. Adds dan@danlynn.com -> danklynn to scripts/release.py AUTHOR_MAP so the salvage PR's contributor attribution doesn't break CI. --- scripts/release.py | 1 + tests/run_agent/test_tool_arg_coercion.py | 51 +++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 5a38adc4f2..2a9169a5f4 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -167,6 +167,7 @@ AUTHOR_MAP = { "socrates1024@gmail.com": "socrates1024", "seanalt555@gmail.com": "Salt-555", "satelerd@gmail.com": "satelerd", + "dan@danlynn.com": "danklynn", "numman.ali@gmail.com": "nummanali", "rohithsaimidigudla@gmail.com": "whitehatjr1001", "0xNyk@users.noreply.github.com": "0xNyk", diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py index cf1876d4e4..bc84b2bf60 100644 --- a/tests/run_agent/test_tool_arg_coercion.py +++ b/tests/run_agent/test_tool_arg_coercion.py @@ -134,6 +134,31 @@ class TestCoerceValue: """A non-numeric string in [number, string] should stay a string.""" assert _coerce_value("hello", ["number", "string"]) == "hello" + def test_array_type_parsed_from_json_string(self): + """Stringified JSON arrays are parsed into native lists.""" + assert _coerce_value('["a", "b"]', "array") == ["a", "b"] + assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3] + + def test_object_type_parsed_from_json_string(self): + """Stringified JSON objects are parsed into native dicts.""" + assert _coerce_value('{"k": "v"}', "object") == {"k": "v"} + assert _coerce_value('{"n": 1}', "object") == {"n": 1} + + def test_array_invalid_json_preserved(self): + """Unparseable strings are returned unchanged.""" + assert _coerce_value("not-json", "array") == "not-json" + + def test_object_invalid_json_preserved(self): + assert _coerce_value("not-json", "object") == "not-json" + + def test_array_type_wrong_shape_preserved(self): + """A JSON object passed for an 'array' slot is preserved as a string.""" + assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}' + + def test_object_type_wrong_shape_preserved(self): + """A JSON array passed for an 'object' slot is preserved as a string.""" + assert _coerce_value('["a"]', "object") == '["a"]' + # ── Full coerce_tool_args with registry ─────────────────────────────────── @@ -212,6 +237,32 @@ class TestCoerceToolArgs: assert result["items"] == [1, 2, 3] assert result["config"] == {"key": "val"} + def test_coerces_stringified_array_arg(self): + """Regression for #3947 — MCP servers using z.array() expect lists, not strings.""" + schema = self._mock_schema({ + "messageIds": {"type": "array", "items": {"type": "string"}}, + }) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"messageIds": '["abc", "def"]'} + result = coerce_tool_args("test_tool", args) + assert result["messageIds"] == ["abc", "def"] + + def test_coerces_stringified_object_arg(self): + """Stringified JSON objects get parsed into dicts.""" + schema = self._mock_schema({"config": {"type": "object"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"config": '{"max": 50}'} + result = coerce_tool_args("test_tool", args) + assert result["config"] == {"max": 50} + + def test_invalid_json_array_preserved_as_string(self): + """If the string isn't valid JSON, pass it through — let the tool decide.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": "not-json"} + result = coerce_tool_args("test_tool", args) + assert result["items"] == "not-json" + def test_extra_args_without_schema_left_alone(self): """Args not in the schema properties are not touched.""" schema = self._mock_schema({"limit": {"type": "integer"}}) From bd929ea514d92d68b6597cee9c6665dcd8a9b93e Mon Sep 17 00:00:00 2001 From: Ari Lotter Date: Thu, 23 Apr 2026 18:48:08 -0400 Subject: [PATCH 017/220] perf(ink): cache text measurements across yoga flex re-passes Adds a per-ink-text measurement cache keyed by width|widthMode to avoid re-squashing and re-wrapping the same text when yoga calls measureFunc multiple times per frame with different widths during flex layout re-pass. --- ui-tui/packages/hermes-ink/src/ink/dom.ts | 49 +++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/dom.ts b/ui-tui/packages/hermes-ink/src/ink/dom.ts index 735ab0b0c5..9ff1be4119 100644 --- a/ui-tui/packages/hermes-ink/src/ink/dom.ts +++ b/ui-tui/packages/hermes-ink/src/ink/dom.ts @@ -83,6 +83,10 @@ export type DOMElement = { // Only set on ink-root. The document owns focus — any node can // reach it by walking parentNode, like browser getRootNode(). focusManager?: FocusManager + // Measurement cache for ink-text nodes: avoids re-squashing and re-wrapping + // text when yoga calls measureFunc multiple times per frame with different + // widths during flex re-pass. Keyed by `${width}|${widthMode}`. + _textMeasureCache?: { gen: number; entries: Map } } & InkNode export type TextNode = { @@ -311,10 +315,42 @@ export const createTextNode = (text: string): TextNode => { return node } +const MEASURE_CACHE_CAP = 16 + const measureTextNode = function ( node: DOMNode, width: number, widthMode: LayoutMeasureMode +): { width: number; height: number } { + const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode + if (elem && elem.nodeName === 'ink-text') { + let cache = elem._textMeasureCache + if (!cache) { + cache = { gen: 0, entries: new Map() } + elem._textMeasureCache = cache + } + const key = `${width}|${widthMode}` + const hit = cache.entries.get(key) + if (hit && hit._gen === cache.gen) { + return hit.result + } + const result = computeTextMeasure(node, width, widthMode) + // Enforce cap with FIFO eviction to avoid unbounded growth during + // pathological frames where yoga probes many widths. + if (cache.entries.size >= MEASURE_CACHE_CAP) { + const firstKey = cache.entries.keys().next().value + cache.entries.delete(firstKey) + } + cache.entries.set(key, { _gen: cache.gen, result }) + return result + } + return computeTextMeasure(node, width, widthMode) +} + +const computeTextMeasure = function ( + node: DOMNode, + width: number, + widthMode: LayoutMeasureMode ): { width: number; height: number } { const rawText = node.nodeName === '#text' ? node.nodeValue : squashTextNodes(node) @@ -378,13 +414,19 @@ export const markDirty = (node?: DOMNode): void => { while (current) { if (current.nodeName !== '#text') { - ;(current as DOMElement).dirty = true + const elem = current as DOMElement + elem.dirty = true // Only mark yoga dirty on leaf nodes that have measure functions - if (!markedYoga && (current.nodeName === 'ink-text' || current.nodeName === 'ink-raw-ansi') && current.yogaNode) { - current.yogaNode.markDirty() + if (!markedYoga && (elem.nodeName === 'ink-text' || elem.nodeName === 'ink-raw-ansi') && elem.yogaNode) { + elem.yogaNode.markDirty() markedYoga = true } + + // Invalidate text measurement cache — child text or style changed. + if (elem._textMeasureCache) { + elem._textMeasureCache.gen++ + } } current = current.parentNode @@ -433,6 +475,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => { for (const child of node.childNodes) { clearYogaNodeReferences(child) } + node._textMeasureCache = undefined } node.yogaNode = undefined From b08cbc7a79a8b66cfca8f900bd9d54d5801da43d Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 19:01:27 -0500 Subject: [PATCH 018/220] fix(tui): @ fuzzy-matches filenames across the repo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typing `@appChrome` in the composer should surface `ui-tui/src/components/appChrome.tsx` without requiring the user to first type the full directory path — matches the Cmd-P behaviour users expect from modern editors. The gateway's `complete.path` handler was doing a plain `os.listdir(".")` + `startswith` prefix match, so basenames only resolved inside the current working directory. This reworks it to: - enumerate repo files via `git ls-files -z --cached --others --exclude-standard` (fast, honours `.gitignore`); fall back to a bounded `os.walk` that skips common vendor / build dirs when the working dir isn't a git repo. Results cached per-root with a 5s TTL so rapid keystrokes don't respawn git processes. - rank basenames with a 5-tier scorer: exact → prefix → camelCase / word-boundary → substring → subsequence. Shorter basenames win ties; shorter rel paths break basename-length ties. - only take the fuzzy branch when the query is bare (no `/`), is a context reference (`@...`), and isn't `@folder:` — path-ish queries and folder tags fall through to the existing directory-listing path so explicit navigation intent is preserved. Completion rows now carry `display = basename`, `meta = directory`, so the picker renders `appChrome.tsx ui-tui/src/components` on one row (basename bold, directory dim) — the meta column was previously "dir" / "" and is a more useful signal for fuzzy hits. Reported by Ben Barclay during the TUI v2 blitz test. --- tests/gateway/test_complete_path_at_filter.py | 152 +++++- tui_gateway/server.py | 431 ++++++++---------- 2 files changed, 342 insertions(+), 241 deletions(-) diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py index 9e5031c0d9..07fb507ea6 100644 --- a/tests/gateway/test_complete_path_at_filter.py +++ b/tests/gateway/test_complete_path_at_filter.py @@ -1,22 +1,28 @@ """Regression tests for the TUI gateway's `complete.path` handler. -Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder` -with no colon yet) still surfaced files alongside directories in the -TUI composer, because the gateway-side completion lives in -`tui_gateway/server.py` and was never touched by the earlier fix to -`hermes_cli/commands.py`. +Reported during the TUI v2 blitz retest: + - typing `@folder:` (and `@folder` with no colon yet) surfaced files + alongside directories — the gateway-side completion lives in + `tui_gateway/server.py` and was never touched by the earlier fix to + `hermes_cli/commands.py`. + - typing `@appChrome` required the full `@ui-tui/src/components/app…` + path to find the file — users expect Cmd-P-style fuzzy basename + matching across the repo, not a strict directory prefix filter. Covers: - `@folder:` only yields directories - `@file:` only yields regular files - Bare `@folder` / `@file` (no colon) lists cwd directly - Explicit prefix is preserved in the completion text + - `@` with no slash fuzzy-matches basenames anywhere in the tree """ from __future__ import annotations from pathlib import Path +import pytest + from tui_gateway import server @@ -33,6 +39,15 @@ def _items(word: str): return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]] +@pytest.fixture(autouse=True) +def _reset_fuzzy_cache(monkeypatch): + # Each test walks a fresh tmp dir; clear the cached listing so prior + # roots can't leak through the TTL window. + server._fuzzy_cache.clear() + yield + server._fuzzy_cache.clear() + + def test_at_folder_colon_only_dirs(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) _fixture(tmp_path) @@ -89,3 +104,130 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch): for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"): assert expected in texts, f"missing static ref {expected!r} in {texts!r}" + + +# ── Fuzzy basename matching ────────────────────────────────────────────── +# Users shouldn't have to know the full path — typing `@appChrome` should +# find `ui-tui/src/components/appChrome.tsx`. + + +def _nested_fixture(tmp_path: Path): + (tmp_path / "readme.md").write_text("x") + (tmp_path / ".env").write_text("x") + (tmp_path / "ui-tui/src/components").mkdir(parents=True) + (tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x") + (tmp_path / "ui-tui/src/hooks").mkdir(parents=True) + (tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x") + (tmp_path / "tui_gateway").mkdir() + (tmp_path / "tui_gateway/server.py").write_text("x") + + +def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch): + """`@appChrome` — with no slash — should surface the nested file.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + entries = _items("@appChrome") + texts = [t for t, _, _ in entries] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + # Display is the basename, meta is the containing directory, so the + # picker can show `appChrome.tsx ui-tui/src/components` on one row. + row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx") + assert row[1] == "appChrome.tsx" + assert row[2] == "ui-tui/src/components" + + +def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch): + """Better matches sort before weaker matches regardless of path depth.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + (tmp_path / "server.py").write_text("x") # exact basename match at root + + texts = [t for t, _, _ in _items("@server")] + + # Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both + # rank 1 on basename but exact basename wins on the sort key; shorter + # rel path breaks ties. + assert texts[0] == "@file:server.py", texts + assert "@file:tui_gateway/server.py" in texts + + +def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch): + """Mid-basename camelCase pieces match without substring scanning.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@Chrome")] + + # `Chrome` starts a camelCase word inside `appChrome.tsx`. + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch): + """`@uCo` → `useCompletion.ts` via subsequence, last-resort tier.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@uCo")] + + assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts + + +def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch): + """Explicit `@file:` prefix still wins the completion tag.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@file:appChrome")] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch): + """Any `/` in the query = user is navigating; keep directory listing.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@ui-tui/src/components/app")] + + # Directory-listing mode prefixes with `@file:` / `@folder:` per entry. + # It should only surface direct children of the named dir — not the + # nested `useCompletion.ts`. + assert any("appChrome.tsx" in t for t in texts), texts + assert not any("useCompletion.ts" in t for t in texts), texts + + +def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch): + """`@folder:` still lists directories — fuzzy scanner only walks + files (git-tracked + untracked), so defer to the dir-listing path.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@folder:ui")] + + # Root has `ui-tui/` as a directory; the listing branch should surface it. + assert any(t.startswith("@folder:ui-tui") for t in texts), texts + + +def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch): + """`.env` doesn't leak into `@env` but does show for `@.env`.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + assert not any(".env" in t for t, _, _ in _items("@env")) + assert any(t.endswith(".env") for t, _, _ in _items("@.env")) + + +def test_fuzzy_caps_results(tmp_path, monkeypatch): + """The 30-item cap survives a big tree.""" + monkeypatch.chdir(tmp_path) + for i in range(60): + (tmp_path / f"mod_{i:03d}.py").write_text("x") + + items = _items("@mod") + + assert len(items) == 30 diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 52408ed9f7..c3a0388cb3 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -23,75 +23,6 @@ load_hermes_dotenv( hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env" ) - -# ── Panic logger ───────────────────────────────────────────────────── -# Gateway crashes in a TUI session leave no forensics: stdout is the -# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger -# only catches handled warnings, and the subprocess exits before stderr -# flushes through the stderr->gateway.stderr event pump. This hook -# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log -# AND re-emits a one-line summary to stderr so the TUI can surface it in -# Activity — exactly what was missing when the voice-mode turns started -# exiting the gateway mid-TTS. -_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log") - - -def _panic_hook(exc_type, exc_value, exc_tb): - import traceback - - trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb)) - try: - os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) - with open(_CRASH_LOG, "a", encoding="utf-8") as f: - f.write( - f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" - ) - f.write(trace) - except Exception: - pass - # Stderr goes through to the TUI as a gateway.stderr Activity line — - # the first line here is what the user will see without opening any - # log files. Rest of the stack is still in the log for full context. - first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__ - print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True) - # Chain to the default hook so the process still terminates normally. - sys.__excepthook__(exc_type, exc_value, exc_tb) - - -sys.excepthook = _panic_hook - - -def _thread_panic_hook(args): - # threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread) - import traceback - - trace = "".join( - traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback) - ) - try: - os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) - with open(_CRASH_LOG, "a", encoding="utf-8") as f: - f.write( - f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} " - f"· thread={args.thread.name} ===\n" - ) - f.write(trace) - except Exception: - pass - first_line = ( - str(args.exc_value).strip().splitlines()[0] - if str(args.exc_value).strip() - else args.exc_type.__name__ - ) - print( - f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}", - file=sys.stderr, - flush=True, - ) - - -threading.excepthook = _thread_panic_hook - try: from hermes_cli.banner import prefetch_update_check @@ -2195,43 +2126,7 @@ def _(rid, params: dict) -> dict: if rendered: payload["rendered"] = rendered _emit("message.complete", sid, payload) - - # CLI parity: when voice-mode TTS is on, speak the agent reply - # (cli.py:_voice_speak_response). Only the final text — tool - # calls / reasoning already stream separately and would be - # noisy to read aloud. - if ( - status == "complete" - and isinstance(raw, str) - and raw.strip() - and _voice_tts_enabled() - ): - try: - from hermes_cli.voice import speak_text - - spoken = raw - threading.Thread( - target=speak_text, args=(spoken,), daemon=True - ).start() - except ImportError: - logger.warning("voice TTS skipped: hermes_cli.voice unavailable") - except Exception as e: - logger.warning("voice TTS dispatch failed: %s", e) except Exception as e: - import traceback - - trace = traceback.format_exc() - try: - os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) - with open(_CRASH_LOG, "a", encoding="utf-8") as f: - f.write( - f"\n=== turn-dispatcher exception · " - f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n" - ) - f.write(trace) - except Exception: - pass - print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True) _emit("error", sid, {"message": str(e)}) finally: try: @@ -3256,6 +3151,145 @@ def _(rid, params: dict) -> dict: # ── Methods: complete ───────────────────────────────────────────────── +_FUZZY_CACHE_TTL_S = 5.0 +_FUZZY_CACHE_MAX_FILES = 20000 +_FUZZY_FALLBACK_EXCLUDES = frozenset( + { + ".git", + ".hg", + ".svn", + ".next", + ".cache", + ".venv", + "venv", + "node_modules", + "__pycache__", + "dist", + "build", + "target", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + } +) +_fuzzy_cache_lock = threading.Lock() +_fuzzy_cache: dict[str, tuple[float, list[str]]] = {} + + +def _list_repo_files(root: str) -> list[str]: + """Return repo-relative file paths rooted at ``root``. + + Uses ``git ls-files`` when available (fast, honours .gitignore) and falls + back to a bounded ``os.walk`` that skips common vendor/build dirs. The + result is cached per-root for ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes + don't respawn git processes. + """ + now = time.monotonic() + with _fuzzy_cache_lock: + cached = _fuzzy_cache.get(root) + if cached and now - cached[0] < _FUZZY_CACHE_TTL_S: + return cached[1] + + files: list[str] = [] + try: + result = subprocess.run( + ["git", "ls-files", "-z", "--cached", "--others", "--exclude-standard"], + cwd=root, + capture_output=True, + timeout=2.0, + check=False, + ) + if result.returncode == 0: + files = [ + p + for p in result.stdout.decode("utf-8", "replace").split("\0") + if p + ][:_FUZZY_CACHE_MAX_FILES] + except (OSError, subprocess.TimeoutExpired): + pass + + if not files: + # Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays + # tractable. Dotfiles themselves survive — the ranker decides based + # on whether the query starts with `.`. + try: + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + dirnames[:] = [ + d + for d in dirnames + if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".") + ] + rel_dir = os.path.relpath(dirpath, root) + for f in filenames: + rel = f if rel_dir == "." else f"{rel_dir}/{f}" + files.append(rel.replace(os.sep, "/")) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + except OSError: + pass + + with _fuzzy_cache_lock: + _fuzzy_cache[root] = (now, files) + + return files + + +def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None: + """Rank ``name`` against ``query``; lower is better. Returns None to reject. + + Tiers (kind): + 0 — exact basename + 1 — basename prefix (e.g. `app` → `appChrome.tsx`) + 2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`) + 3 — substring anywhere in basename + 4 — subsequence match (every query char appears in order) + + Secondary key is `len(name)` so shorter names win ties. + """ + if not query: + return (3, len(name)) + + nl = name.lower() + ql = query.lower() + + if nl == ql: + return (0, len(name)) + + if nl.startswith(ql): + return (1, len(name)) + + # Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"]. + # camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation; + # falls through to substring/subsequence if it misses. + parts: list[str] = [] + buf = "" + for ch in name: + if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()): + if buf: + parts.append(buf) + buf = ch if ch not in "-_." else "" + else: + buf += ch + if buf: + parts.append(buf) + for p in parts: + if p.lower().startswith(ql): + return (2, len(name)) + + if ql in nl: + return (3, len(name)) + + i = 0 + for ch in nl: + if ch == ql[i]: + i += 1 + if i == len(ql): + return (4, len(name)) + + return None + @method("complete.path") def _(rid, params: dict) -> dict: @@ -3291,6 +3325,43 @@ def _(rid, params: dict) -> dict: prefix_tag = "" path_part = query if is_context else query + # Fuzzy basename search across the repo when the user types a bare + # name with no path separator — `@appChrome` surfaces every file + # whose basename matches, regardless of directory depth. Matches what + # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with + # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing + # path so explicit navigation intent is preserved. + if ( + is_context + and path_part + and "/" not in path_part + and prefix_tag != "folder" + ): + root = os.getcwd() + ranked: list[tuple[tuple[int, int], str, str]] = [] + for rel in _list_repo_files(root): + basename = os.path.basename(rel) + if basename.startswith(".") and not path_part.startswith("."): + continue + rank = _fuzzy_basename_rank(basename, path_part) + if rank is None: + continue + ranked.append((rank, rel, basename)) + + ranked.sort(key=lambda r: (r[0], len(r[1]), r[1])) + tag = prefix_tag or "file" + for _, rel, basename in ranked[:30]: + directory = os.path.dirname(rel) + items.append( + { + "text": f"@{tag}:{rel}", + "display": basename, + "meta": directory, + } + ) + + return _ok(rid, {"items": items}) + expanded = _normalize_completion_path(path_part) if path_part else "." if expanded == "." or not expanded: search_dir, match = ".", "" @@ -3560,155 +3631,43 @@ def _(rid, params: dict) -> dict: # ── Methods: voice ─────────────────────────────────────────────────── -_voice_sid_lock = threading.Lock() -_voice_event_sid: str = "" - - -def _voice_emit(event: str, payload: dict | None = None) -> None: - """Emit a voice event toward the session that most recently turned the - mode on. Voice is process-global (one microphone), so there's only ever - one sid to target; the TUI handler treats an empty sid as "active - session". Kept separate from _emit to make the lack of per-call sid - argument explicit.""" - with _voice_sid_lock: - sid = _voice_event_sid - _emit(event, sid, payload) - - -def _voice_mode_enabled() -> bool: - """Current voice-mode flag (runtime-only, CLI parity). - - cli.py initialises ``_voice_mode = False`` at startup and only flips - it via ``/voice on``; it never reads a persisted enable bit from - config.yaml. We match that: no config lookup, env var only. This - avoids the TUI auto-starting in REC the next time the user opens it - just because they happened to enable voice in a prior session. - """ - return os.environ.get("HERMES_VOICE", "").strip() == "1" - - -def _voice_tts_enabled() -> bool: - """Whether agent replies should be spoken back via TTS (runtime only).""" - return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1" - - @method("voice.toggle") def _(rid, params: dict) -> dict: - """CLI parity for the ``/voice`` slash command. - - Subcommands: - - * ``status`` — report mode + TTS flags (default when action is unknown). - * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it - off also tears down any active continuous recording loop. Does NOT - start recording on its own; recording is driven by ``voice.record`` - (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split. - * ``tts`` — toggle speech-output of agent replies. Requires mode on - (mirrors CLI's _toggle_voice_tts guard). - """ action = params.get("action", "status") - if action == "status": - # Mirror CLI's _show_voice_status: include STT/TTS provider - # availability so the user can tell at a glance *why* voice mode - # isn't working ("STT provider: MISSING ..." is the common case). - payload: dict = { - "enabled": _voice_mode_enabled(), - "tts": _voice_tts_enabled(), - } - try: - from tools.voice_mode import check_voice_requirements - - reqs = check_voice_requirements() - payload["available"] = bool(reqs.get("available")) - payload["audio_available"] = bool(reqs.get("audio_available")) - payload["stt_available"] = bool(reqs.get("stt_available")) - payload["details"] = reqs.get("details") or "" - except Exception as e: - # check_voice_requirements pulls optional transcription deps — - # swallow so /voice status always returns something useful. - logger.warning("voice.toggle status: requirements probe failed: %s", e) - - return _ok(rid, payload) - + env = os.environ.get("HERMES_VOICE", "").strip() + if env in {"0", "1"}: + return _ok(rid, {"enabled": env == "1"}) + return _ok( + rid, + { + "enabled": bool( + _load_cfg().get("display", {}).get("voice_enabled", False) + ) + }, + ) if action in ("on", "off"): enabled = action == "on" - # Runtime-only flag (CLI parity) — no _write_config_key, so the - # next TUI launch starts with voice OFF instead of auto-REC from a - # persisted stale toggle. os.environ["HERMES_VOICE"] = "1" if enabled else "0" - - if not enabled: - # Disabling the mode must tear the continuous loop down; the - # loop holds the microphone and would otherwise keep running. - try: - from hermes_cli.voice import stop_continuous - - stop_continuous() - except ImportError: - pass - except Exception as e: - logger.warning("voice: stop_continuous failed during toggle off: %s", e) - - return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()}) - - if action == "tts": - if not _voice_mode_enabled(): - return _err(rid, 4014, "enable voice mode first: /voice on") - new_value = not _voice_tts_enabled() - # Runtime-only flag (CLI parity) — see voice.toggle on/off above. - os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0" - return _ok(rid, {"enabled": True, "tts": new_value}) - + _write_config_key("display.voice_enabled", enabled) + return _ok(rid, {"enabled": action == "on"}) return _err(rid, 4013, f"unknown voice action: {action}") @method("voice.record") def _(rid, params: dict) -> dict: - """VAD-driven continuous record loop, CLI-parity. - - ``start`` turns on a VAD loop that emits ``voice.transcript`` events - for each detected utterance and auto-restarts for the next turn. - ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while- - recording branch clearing ``_voice_continuous``). Three consecutive - silent cycles stop the loop automatically and emit a - ``voice.transcript`` with ``no_speech_limit=True``. - """ action = params.get("action", "start") - - if action not in {"start", "stop"}: - return _err(rid, 4019, f"unknown voice action: {action}") - try: if action == "start": - if not _voice_mode_enabled(): - return _err(rid, 4015, "voice mode is off — enable with /voice on") + from hermes_cli.voice import start_recording - with _voice_sid_lock: - global _voice_event_sid - _voice_event_sid = params.get("session_id") or _voice_event_sid - - from hermes_cli.voice import start_continuous - - voice_cfg = _load_cfg().get("voice", {}) - start_continuous( - on_transcript=lambda t: _voice_emit( - "voice.transcript", {"text": t} - ), - on_status=lambda s: _voice_emit("voice.status", {"state": s}), - on_silent_limit=lambda: _voice_emit( - "voice.transcript", {"no_speech_limit": True} - ), - silence_threshold=voice_cfg.get("silence_threshold", 200), - silence_duration=voice_cfg.get("silence_duration", 3.0), - ) + start_recording() return _ok(rid, {"status": "recording"}) + if action == "stop": + from hermes_cli.voice import stop_and_transcribe - # action == "stop" - from hermes_cli.voice import stop_continuous - - stop_continuous() - return _ok(rid, {"status": "stopped"}) + return _ok(rid, {"text": stop_and_transcribe() or ""}) + return _err(rid, 4019, f"unknown voice action: {action}") except ImportError: return _err( rid, 5025, "voice module not available — install audio dependencies" From 11b2942f1654a2366ccf77b3d4a5bd2f048b746a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 19:02:44 -0500 Subject: [PATCH 019/220] fix(tui): anchor inline_diff to the segment where the edit happened MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revisits #13729. That PR buffered each `tool.complete`'s inline_diff and merged them into the final assistant message body as a fenced ```diff block. The merge-at-end placement reads as "the agent wrote this after the summary", even when the edit fired mid-turn — which is both misleading and (per blitz feedback) feels like noise tacked onto the end of every task. Segment-anchored placement instead: - On tool.complete with inline_diff, `pushInlineDiffSegment` calls `flushStreamingSegment` first (so any in-progress narration lands as its own segment), then pushes the ```diff block as its own segment into segmentMessages. The diff is now anchored BETWEEN the narration that preceded the edit and whatever the agent streams afterwards, which is where the edit actually happened. - `recordMessageComplete` no longer merges buffered diffs. The only remaining dedupe is "drop diff-only segments whose body the final assistant text narrates verbatim (or whose diff fence the final text already contains)" — same tradeoff as before, kept so an agent that narrates its own diff doesn't render two stacked copies. - Drops `pendingInlineDiffs` and `queueInlineDiff` — buffer + end- merge machinery is gone; segmentMessages is now the only source of truth. Side benefit: Ctrl+C interrupt (`interruptTurn`) iterates segmentMessages, so diff segments are now preserved in the transcript when the user cancels after an edit. Previously the pending buffer was silently dropped on interrupt. Reported by Teknium during blitz usage: "no diffs are ever at the end because it didn't make this file edit after the final message". --- .../createGatewayEventHandler.test.ts | 86 +++++++++---------- ui-tui/src/app/createGatewayEventHandler.ts | 65 ++------------ ui-tui/src/app/turnController.ts | 85 ++++++++++++------ 3 files changed, 105 insertions(+), 131 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index ef55d807ca..07721d441e 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -15,8 +15,7 @@ const buildCtx = (appended: Msg[]) => composer: { dequeue: () => undefined, queueEditRef: ref(null), - sendQueued: vi.fn(), - setInput: vi.fn() + sendQueued: vi.fn() }, gateway: { gw: { request: vi.fn() }, @@ -30,9 +29,6 @@ const buildCtx = (appended: Msg[]) => resumeById: vi.fn(), setCatalog: vi.fn() }, - submission: { - submitRef: { current: vi.fn() } - }, system: { bellOnComplete: false, sys: vi.fn() @@ -42,11 +38,6 @@ const buildCtx = (appended: Msg[]) => panel: (title: string, sections: any[]) => appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }), setHistoryItems: vi.fn() - }, - voice: { - setProcessing: vi.fn(), - setRecording: vi.fn(), - setVoiceEnabled: vi.fn() } }) as any @@ -152,12 +143,16 @@ describe('createGatewayEventHandler', () => { expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer)) }) - it('attaches inline_diff to the assistant completion body', () => { + it('anchors inline_diff as its own segment where the edit happened', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new' const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' + // Narration → tool → tool-complete → more narration → message-complete. + // The diff MUST land between the two narration segments, not tacked + // onto the final one. + onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any) onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' @@ -167,24 +162,27 @@ describe('createGatewayEventHandler', () => { type: 'tool.complete' } as any) - // Diff is buffered for message.complete and sanitized (ANSI stripped). + // Diff is already committed to segmentMessages as its own segment — + // nothing is "pending" anymore. The pre-tool narration is also flushed. expect(appended).toHaveLength(0) - expect(turnController.pendingInlineDiffs).toEqual([cleaned]) + expect(turnController.segmentMessages.map(m => m.text)).toEqual([ + 'Editing the file', + `\`\`\`diff\n${cleaned}\n\`\`\`` + ]) - onEvent({ - payload: { text: 'patch applied' }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any) - // Diff is rendered in the same assistant message body as the completion. - expect(appended).toHaveLength(1) - expect(appended[0]).toMatchObject({ role: 'assistant' }) - expect(appended[0]?.text).toContain('patch applied') - expect(appended[0]?.text).toContain('```diff') - expect(appended[0]?.text).toContain(cleaned) + // Three messages in the transcript, in order: pre-tool narration → + // diff → post-tool narration. The final message does NOT contain + // `diff` content. + expect(appended).toHaveLength(3) + expect(appended[0]?.text).toBe('Editing the file') + expect(appended[1]?.text).toBe(`\`\`\`diff\n${cleaned}\n\`\`\``) + expect(appended[2]?.text).toBe('patch applied') + expect(appended[2]?.text).not.toContain('```diff') }) - it('does not append inline_diff twice when assistant text already contains it', () => { + it('drops the diff segment when the final assistant text narrates the same diff', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' @@ -194,17 +192,16 @@ describe('createGatewayEventHandler', () => { payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) - onEvent({ - payload: { text: assistantText }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) + // Only the final message — diff-only segment dropped so we don't + // render two stacked copies of the same patch. expect(appended).toHaveLength(1) expect(appended[0]?.text).toBe(assistantText) expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1) }) - it('strips the CLI "┊ review diff" header from queued inline diffs', () => { + it('strips the CLI "┊ review diff" header from inline diff segments', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const raw = ' \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' @@ -213,17 +210,16 @@ describe('createGatewayEventHandler', () => { payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) - onEvent({ - payload: { text: 'done' }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - expect(appended).toHaveLength(1) + // diff segment first, final narration second + expect(appended).toHaveLength(2) expect(appended[0]?.text).not.toContain('┊ review diff') expect(appended[0]?.text).toContain('--- a/foo.ts') + expect(appended[1]?.text).toBe('done') }) - it('suppresses inline_diff when assistant already wrote a diff fence', () => { + it('drops the diff segment when assistant writes its own ```diff fence', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' @@ -233,10 +229,7 @@ describe('createGatewayEventHandler', () => { payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) - onEvent({ - payload: { text: assistantText }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) expect(appended).toHaveLength(1) expect(appended[0]?.text).toBe(assistantText) @@ -252,15 +245,16 @@ describe('createGatewayEventHandler', () => { payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' }, type: 'tool.complete' } as any) - onEvent({ - payload: { text: 'done' }, - type: 'message.complete' - } as any) + onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - expect(appended).toHaveLength(1) - expect(appended[0]?.tools?.[0]).toContain('Review Diff') - expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts') + // Two segments: diff block (no tool row), final narration (tool row + // belongs here since pendingSegmentTools carries across the flush). + expect(appended).toHaveLength(2) expect(appended[0]?.text).toContain('```diff') + expect(appended[0]?.tools ?? []).toEqual([]) + expect(appended[1]?.text).toBe('done') + expect(appended[1]?.tools?.[0]).toContain('Review Diff') + expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts') }) it('shows setup panel for missing provider startup error', () => { diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 50f6fa3af4..2d3b48d39a 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -51,9 +51,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session const { bellOnComplete, stdout, sys } = ctx.system const { appendMessage, panel, setHistoryItems } = ctx.transcript - const { setInput } = ctx.composer - const { submitRef } = ctx.submission - const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice let pendingThinkingStatus = '' let thinkingStatusTimer: null | ReturnType = null @@ -264,57 +261,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } - case 'voice.status': { - // Continuous VAD loop reports its internal state so the status bar - // can show listening / transcribing / idle without polling. - const state = String(ev.payload?.state ?? '') - - if (state === 'listening') { - setVoiceRecording(true) - setVoiceProcessing(false) - } else if (state === 'transcribing') { - setVoiceRecording(false) - setVoiceProcessing(true) - } else { - setVoiceRecording(false) - setVoiceProcessing(false) - } - - return - } - - case 'voice.transcript': { - // CLI parity: the 3-strikes silence detector flipped off automatically. - // Mirror that on the UI side and tell the user why the mode is off. - if (ev.payload?.no_speech_limit) { - setVoiceEnabled(false) - setVoiceRecording(false) - setVoiceProcessing(false) - sys('voice: no speech detected 3 times, continuous mode stopped') - - return - } - - const text = String(ev.payload?.text ?? '').trim() - - if (!text) { - return - } - - // CLI parity: _pending_input.put(transcript) unconditionally feeds - // the transcript to the agent as its next turn — draft handling - // doesn't apply because voice-mode users are speaking, not typing. - // - // We can't branch on composer input from inside a setInput updater - // (React strict mode double-invokes it, duplicating the submit). - // Just clear + defer submit so the cleared input is committed before - // submit reads it. - setInput('') - setTimeout(() => submitRef.current(text), 0) - - return - } - case 'gateway.start_timeout': { const { cwd, python } = ev.payload ?? {} const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : '' @@ -385,10 +331,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } - // Keep inline diffs attached to the assistant completion body so - // they render in the same message flow, not as a standalone system - // artifact that can look out-of-place around tool rows. - turnController.queueInlineDiff(inlineDiffText) + // Anchor the diff to the segment where the edit actually happened + // (between the narration that preceded the tool call and whatever + // the agent streams afterwards). The previous end-merge put the + // diff at the bottom of the final message even when the edit fired + // mid-turn, which read as "the agent wrote this after saying + // that" — misleading, and dropped for #14XXX. + turnController.pushInlineDiffSegment(inlineDiffText) return } diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index 804394bb19..5bcbb05f28 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -19,6 +19,21 @@ const INTERRUPT_COOLDOWN_MS = 1500 const ACTIVITY_LIMIT = 8 const TRAIL_LIMIT = 8 +// Matches segments produced by pushInlineDiffSegment — a bare ```diff fence +// wrapping the raw patch, no surrounding prose. Used at message.complete to +// dedupe against final assistant text that narrates the same patch. +const DIFF_SEGMENT_RE = /^```diff\n([\s\S]*?)\n```$/ + +const diffSegmentBody = (msg: Msg): null | string => { + if (msg.role !== 'assistant' || msg.tools?.length) { + return null + } + + const m = msg.text.match(DIFF_SEGMENT_RE) + + return m ? m[1]! : null +} + export interface InterruptDeps { appendMessage: (msg: Msg) => void gw: { request: (method: string, params?: Record) => Promise } @@ -40,7 +55,6 @@ class TurnController { bufRef = '' interrupted = false lastStatusNote = '' - pendingInlineDiffs: string[] = [] persistedToolLabels = new Set() persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise protocolWarned = false @@ -79,7 +93,6 @@ class TurnController { this.activeTools = [] this.streamTimer = clear(this.streamTimer) this.bufRef = '' - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.segmentMessages = [] @@ -186,18 +199,35 @@ class TurnController { }, REASONING_PULSE_MS) } - queueInlineDiff(diffText: string) { + pushInlineDiffSegment(diffText: string) { // Strip CLI chrome the gateway emits before the unified diff (e.g. a // leading "┊ review diff" header written by `_emit_inline_diff` for the // terminal printer). That header only makes sense as stdout dressing, // not inside a markdown ```diff block. - const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim() + const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim() - if (!text || this.pendingInlineDiffs.includes(text)) { + if (!stripped) { return } - this.pendingInlineDiffs = [...this.pendingInlineDiffs, text] + // Flush any in-progress streaming text as its own segment first, so the + // diff lands BETWEEN the assistant narration that preceded the edit and + // whatever the agent streams afterwards — not glued onto the final + // message. This is the whole point of segment-anchored diffs: the diff + // renders where the edit actually happened. + this.flushStreamingSegment() + + const block = `\`\`\`diff\n${stripped}\n\`\`\`` + + // Skip consecutive duplicates (same tool firing tool.complete twice, or + // two edits producing the same patch). Keeping this cheap — deeper + // dedupe against the final assistant text happens at message.complete. + if (this.segmentMessages.at(-1)?.text === block) { + return + } + + this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text: block }] + patchTurnState({ streamSegments: this.segmentMessages }) } pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) { @@ -234,7 +264,6 @@ class TurnController { this.idle() this.clearReasoning() this.clearStatusTimer() - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.segmentMessages = [] this.turnTools = [] @@ -245,31 +274,35 @@ class TurnController { const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart() const split = splitReasoning(rawText) const finalText = split.text - // Skip appending if the assistant already narrated the diff inside a - // markdown fence of its own — otherwise we render two stacked diff - // blocks for the same edit. - const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText) - - const remainingInlineDiffs = assistantAlreadyHasDiff - ? [] - : this.pendingInlineDiffs.filter(diff => !finalText.includes(diff)) - - const inlineDiffBlock = remainingInlineDiffs.length - ? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\`` - : '' - - const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n') const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim() const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n') const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0 const savedToolTokens = this.toolTokenAcc const tools = this.pendingSegmentTools - const finalMessages = [...this.segmentMessages] - if (mergedText) { + // Drop diff-only segments the agent is about to narrate in the final + // reply. Without this, a closing "here's the diff …" message would + // render two stacked copies of the same patch. Only touches segments + // whose entire body is a ```diff``` fence emitted by pushInlineDiff- + // Segment — real assistant narration stays put. + const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText) + + const segments = this.segmentMessages.filter(msg => { + const body = diffSegmentBody(msg) + + if (body === null) { + return true + } + + return !finalHasOwnDiffFence && !finalText.includes(body) + }) + + const finalMessages = [...segments] + + if (finalText) { finalMessages.push({ role: 'assistant', - text: mergedText, + text: finalText, thinking: savedReasoning || undefined, thinkingTokens: savedReasoning ? savedReasoningTokens : undefined, toolTokens: savedToolTokens || undefined, @@ -300,7 +333,7 @@ class TurnController { this.bufRef = '' patchTurnState({ activity: [], outcome: '' }) - return { finalMessages, finalText: mergedText, wasInterrupted } + return { finalMessages, finalText, wasInterrupted } } recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) { @@ -406,7 +439,6 @@ class TurnController { this.bufRef = '' this.interrupted = false this.lastStatusNote = '' - this.pendingInlineDiffs = [] this.pendingSegmentTools = [] this.protocolWarned = false this.segmentMessages = [] @@ -452,7 +484,6 @@ class TurnController { this.endReasoningPhase() this.clearReasoning() this.activeTools = [] - this.pendingInlineDiffs = [] this.turnTools = [] this.toolTokenAcc = 0 this.persistedToolLabels.clear() From 2258a181f01eb1616abfe8e8b20afe623d1a63ff Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 19:11:59 -0500 Subject: [PATCH 020/220] fix(tui): give inline_diff segments blank-line breathing room Visual polish on top of the segment-anchor change: diff blocks were butting up against the narration around them. Tag diff-only segments with `kind: 'diff'` (extended on Msg) and give them `marginTop={1}` + `marginBottom={1}` in MessageLine, matching the spacing we already use for user messages. Also swaps the regex-based `diffSegmentBody` check for an explicit `kind === 'diff'` guard so the dedupe path is clearer. --- .../createGatewayEventHandler.test.ts | 18 ++++++++++-------- ui-tui/src/app/turnController.ts | 15 +++++++-------- ui-tui/src/components/messageLine.tsx | 9 +++++++-- ui-tui/src/types.ts | 2 +- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index 07721d441e..289c9b7b21 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -165,19 +165,19 @@ describe('createGatewayEventHandler', () => { // Diff is already committed to segmentMessages as its own segment — // nothing is "pending" anymore. The pre-tool narration is also flushed. expect(appended).toHaveLength(0) - expect(turnController.segmentMessages.map(m => m.text)).toEqual([ - 'Editing the file', - `\`\`\`diff\n${cleaned}\n\`\`\`` + expect(turnController.segmentMessages).toEqual([ + { role: 'assistant', text: 'Editing the file' }, + { kind: 'diff', role: 'assistant', text: `\`\`\`diff\n${cleaned}\n\`\`\`` } ]) onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any) // Three messages in the transcript, in order: pre-tool narration → - // diff → post-tool narration. The final message does NOT contain - // `diff` content. + // diff (kind='diff' so MessageLine gives it blank-line breathing room) + // → post-tool narration. The final message does NOT contain a diff. expect(appended).toHaveLength(3) expect(appended[0]?.text).toBe('Editing the file') - expect(appended[1]?.text).toBe(`\`\`\`diff\n${cleaned}\n\`\`\``) + expect(appended[1]).toMatchObject({ kind: 'diff', text: `\`\`\`diff\n${cleaned}\n\`\`\`` }) expect(appended[2]?.text).toBe('patch applied') expect(appended[2]?.text).not.toContain('```diff') }) @@ -214,6 +214,7 @@ describe('createGatewayEventHandler', () => { // diff segment first, final narration second expect(appended).toHaveLength(2) + expect(appended[0]?.kind).toBe('diff') expect(appended[0]?.text).not.toContain('┊ review diff') expect(appended[0]?.text).toContain('--- a/foo.ts') expect(appended[1]?.text).toBe('done') @@ -247,9 +248,10 @@ describe('createGatewayEventHandler', () => { } as any) onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - // Two segments: diff block (no tool row), final narration (tool row - // belongs here since pendingSegmentTools carries across the flush). + // Two segments: diff block (kind='diff', no tool row), final narration + // (tool row belongs here since pendingSegmentTools carries across the flush). expect(appended).toHaveLength(2) + expect(appended[0]?.kind).toBe('diff') expect(appended[0]?.text).toContain('```diff') expect(appended[0]?.tools ?? []).toEqual([]) expect(appended[1]?.text).toBe('done') diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index 5bcbb05f28..31b65cb860 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -19,17 +19,16 @@ const INTERRUPT_COOLDOWN_MS = 1500 const ACTIVITY_LIMIT = 8 const TRAIL_LIMIT = 8 -// Matches segments produced by pushInlineDiffSegment — a bare ```diff fence -// wrapping the raw patch, no surrounding prose. Used at message.complete to -// dedupe against final assistant text that narrates the same patch. -const DIFF_SEGMENT_RE = /^```diff\n([\s\S]*?)\n```$/ - +// Extracts the raw patch from a diff-only segment produced by +// pushInlineDiffSegment. Used at message.complete to dedupe against final +// assistant text that narrates the same patch. Returns null for anything +// else so real assistant narration never gets touched. const diffSegmentBody = (msg: Msg): null | string => { - if (msg.role !== 'assistant' || msg.tools?.length) { + if (msg.kind !== 'diff') { return null } - const m = msg.text.match(DIFF_SEGMENT_RE) + const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/) return m ? m[1]! : null } @@ -226,7 +225,7 @@ class TurnController { return } - this.segmentMessages = [...this.segmentMessages, { role: 'assistant', text: block }] + this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }] patchTurnState({ streamSegments: this.segmentMessages }) } diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx index 8d77a49e57..635c119a04 100644 --- a/ui-tui/src/components/messageLine.tsx +++ b/ui-tui/src/components/messageLine.tsx @@ -81,11 +81,16 @@ export const MessageLine = memo(function MessageLine({ return {msg.text} })() + // Diff segments (emitted by pushInlineDiffSegment between narration + // segments) need a blank line on both sides so the patch doesn't butt up + // against the prose around it. + const isDiffSegment = msg.kind === 'diff' + return ( {showDetails && ( diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index 63d6c6d4fe..191e639009 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -102,7 +102,7 @@ export interface ClarifyReq { export interface Msg { info?: SessionInfo - kind?: 'intro' | 'panel' | 'slash' | 'trail' + kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail' panelData?: PanelData role: Role text: string From 4ae5b58cb10d98cac50c16e93419533253ada8f9 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 19:22:41 -0500 Subject: [PATCH 021/220] fix(tui): restore voice handlers + address copilot review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebase-artefact cleanup on this branch: - Restore `voice.status` and `voice.transcript` cases in createGatewayEventHandler plus the `voice` / `submission` / `composer.setInput` ctx destructuring. They were added to main in the 58-commit gap that this branch was originally cut behind; dropping them was unintentional. - Rebase the test ctx shape to match main (voice.* fakes, submission.submitRef, composer.setInput) and apply the same segment-anchor test rewrites on top. - Drop the `#14XXX` placeholder from the tool.complete comment; replace with a plain-English rationale. - Rewrite the broken mid-word "pushInlineDiff- Segment" in turnController's dedupe comment to refer to pushInlineDiffSegment and `kind: 'diff'` plainly. - Collapse the filter predicate in recordMessageComplete from a 4-line if/return into one boolean expression — same semantics, reads left-to-right as a single predicate. Copilot review threads resolved: #3134668789, #3134668805, #3134668822. --- .../createGatewayEventHandler.test.ts | 57 ++++++++-------- ui-tui/src/app/createGatewayEventHandler.ts | 65 +++++++++++++++++-- ui-tui/src/app/turnController.ts | 10 +-- 3 files changed, 88 insertions(+), 44 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index 289c9b7b21..43a17e6692 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) => composer: { dequeue: () => undefined, queueEditRef: ref(null), - sendQueued: vi.fn() + sendQueued: vi.fn(), + setInput: vi.fn() }, gateway: { gw: { request: vi.fn() }, @@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) => resumeById: vi.fn(), setCatalog: vi.fn() }, + submission: { + submitRef: { current: vi.fn() } + }, system: { bellOnComplete: false, sys: vi.fn() @@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) => panel: (title: string, sections: any[]) => appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }), setHistoryItems: vi.fn() + }, + voice: { + setProcessing: vi.fn(), + setRecording: vi.fn(), + setVoiceEnabled: vi.fn() } }) as any @@ -148,36 +157,30 @@ describe('createGatewayEventHandler', () => { const onEvent = createGatewayEventHandler(buildCtx(appended)) const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new' const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' + const block = `\`\`\`diff\n${cleaned}\n\`\`\`` // Narration → tool → tool-complete → more narration → message-complete. // The diff MUST land between the two narration segments, not tacked // onto the final one. onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any) - onEvent({ - payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, - type: 'tool.start' - } as any) - onEvent({ - payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) + onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any) + onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) - // Diff is already committed to segmentMessages as its own segment — - // nothing is "pending" anymore. The pre-tool narration is also flushed. + // Diff is already committed to segmentMessages as its own segment. expect(appended).toHaveLength(0) expect(turnController.segmentMessages).toEqual([ { role: 'assistant', text: 'Editing the file' }, - { kind: 'diff', role: 'assistant', text: `\`\`\`diff\n${cleaned}\n\`\`\`` } + { kind: 'diff', role: 'assistant', text: block } ]) onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any) - // Three messages in the transcript, in order: pre-tool narration → - // diff (kind='diff' so MessageLine gives it blank-line breathing room) - // → post-tool narration. The final message does NOT contain a diff. + // Three transcript messages: pre-tool narration → diff (kind='diff', + // so MessageLine gives it blank-line breathing room) → post-tool + // narration. The final message does NOT contain a diff. expect(appended).toHaveLength(3) expect(appended[0]?.text).toBe('Editing the file') - expect(appended[1]).toMatchObject({ kind: 'diff', text: `\`\`\`diff\n${cleaned}\n\`\`\`` }) + expect(appended[1]).toMatchObject({ kind: 'diff', text: block }) expect(appended[2]?.text).toBe('patch applied') expect(appended[2]?.text).not.toContain('```diff') }) @@ -188,10 +191,7 @@ describe('createGatewayEventHandler', () => { const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\`` - onEvent({ - payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) + onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) // Only the final message — diff-only segment dropped so we don't @@ -206,13 +206,10 @@ describe('createGatewayEventHandler', () => { const onEvent = createGatewayEventHandler(buildCtx(appended)) const raw = ' \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' - onEvent({ - payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) + onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - // diff segment first, final narration second + // diff segment first (kind='diff'), final narration second expect(appended).toHaveLength(2) expect(appended[0]?.kind).toBe('diff') expect(appended[0]?.text).not.toContain('┊ review diff') @@ -226,10 +223,7 @@ describe('createGatewayEventHandler', () => { const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```' - onEvent({ - payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, - type: 'tool.complete' - } as any) + onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any) onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any) expect(appended).toHaveLength(1) @@ -248,8 +242,9 @@ describe('createGatewayEventHandler', () => { } as any) onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any) - // Two segments: diff block (kind='diff', no tool row), final narration - // (tool row belongs here since pendingSegmentTools carries across the flush). + // Two segments: the diff block (kind='diff', no tool row) and the final + // narration (tool row belongs here since pendingSegmentTools carries + // across the flushStreamingSegment call). expect(appended).toHaveLength(2) expect(appended[0]?.kind).toBe('diff') expect(appended[0]?.text).toContain('```diff') diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 2d3b48d39a..15cf00a5a9 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session const { bellOnComplete, stdout, sys } = ctx.system const { appendMessage, panel, setHistoryItems } = ctx.transcript + const { setInput } = ctx.composer + const { submitRef } = ctx.submission + const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice let pendingThinkingStatus = '' let thinkingStatusTimer: null | ReturnType = null @@ -261,6 +264,57 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } + case 'voice.status': { + // Continuous VAD loop reports its internal state so the status bar + // can show listening / transcribing / idle without polling. + const state = String(ev.payload?.state ?? '') + + if (state === 'listening') { + setVoiceRecording(true) + setVoiceProcessing(false) + } else if (state === 'transcribing') { + setVoiceRecording(false) + setVoiceProcessing(true) + } else { + setVoiceRecording(false) + setVoiceProcessing(false) + } + + return + } + + case 'voice.transcript': { + // CLI parity: the 3-strikes silence detector flipped off automatically. + // Mirror that on the UI side and tell the user why the mode is off. + if (ev.payload?.no_speech_limit) { + setVoiceEnabled(false) + setVoiceRecording(false) + setVoiceProcessing(false) + sys('voice: no speech detected 3 times, continuous mode stopped') + + return + } + + const text = String(ev.payload?.text ?? '').trim() + + if (!text) { + return + } + + // CLI parity: _pending_input.put(transcript) unconditionally feeds + // the transcript to the agent as its next turn — draft handling + // doesn't apply because voice-mode users are speaking, not typing. + // + // We can't branch on composer input from inside a setInput updater + // (React strict mode double-invokes it, duplicating the submit). + // Just clear + defer submit so the cleared input is committed before + // submit reads it. + setInput('') + setTimeout(() => submitRef.current(text), 0) + + return + } + case 'gateway.start_timeout': { const { cwd, python } = ev.payload ?? {} const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : '' @@ -331,12 +385,11 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return } - // Anchor the diff to the segment where the edit actually happened - // (between the narration that preceded the tool call and whatever - // the agent streams afterwards). The previous end-merge put the - // diff at the bottom of the final message even when the edit fired - // mid-turn, which read as "the agent wrote this after saying - // that" — misleading, and dropped for #14XXX. + // Anchor the diff to where the edit happened in the turn — between + // the narration that preceded the tool call and whatever the agent + // streams afterwards. The previous end-merge put the diff at the + // bottom of the final message even when the edit fired mid-turn, + // which read as "the agent wrote this after saying that". turnController.pushInlineDiffSegment(inlineDiffText) return diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index 31b65cb860..cbb03b4441 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -282,18 +282,14 @@ class TurnController { // Drop diff-only segments the agent is about to narrate in the final // reply. Without this, a closing "here's the diff …" message would // render two stacked copies of the same patch. Only touches segments - // whose entire body is a ```diff``` fence emitted by pushInlineDiff- - // Segment — real assistant narration stays put. + // with `kind: 'diff'` emitted by pushInlineDiffSegment — real + // assistant narration stays put. const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText) const segments = this.segmentMessages.filter(msg => { const body = diffSegmentBody(msg) - if (body === null) { - return true - } - - return !finalHasOwnDiffFence && !finalText.includes(body) + return body === null || (!finalHasOwnDiffFence && !finalText.includes(body)) }) const finalMessages = [...segments] From 0a679cb7ad5261601b760c260f56af51154df1e5 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 19:38:33 -0500 Subject: [PATCH 022/220] fix(tui): restore voice/panic handlers + scope fuzzy paths to cwd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes on top of the fuzzy-@ branch: (1) Rebase artefact: re-apply only the fuzzy additions on top of fresh `tui_gateway/server.py`. The earlier commit was cut from a base 58 commits behind main and clobbered ~170 lines of voice.toggle / voice.record handlers and the gateway crash hooks (`_panic_hook`, `_thread_panic_hook`). Reset server.py to origin/main and re-add only: - `_FUZZY_*` constants + `_list_repo_files` + `_fuzzy_basename_rank` - the new fuzzy branch in the `complete.path` handler (2) Path scoping (Copilot review): `git ls-files` returns repo-root- relative paths, but completions need to resolve under the gateway's cwd. When hermes is launched from a subdirectory, the previous code surfaced `@file:apps/web/src/foo.tsx` even though the agent would resolve that relative to `apps/web/` and miss. Fix: - `git -C root rev-parse --show-toplevel` to get repo top - `git -C top ls-files …` for the listing - `os.path.relpath(top + p, root)` per result, dropping anything starting with `../` so the picker stays scoped to cwd-and-below (matches Cmd-P workspace semantics) `apps/web/src/foo.tsx` ends up as `@file:src/foo.tsx` from inside `apps/web/`, and sibling subtrees + parent-of-cwd files don't leak. New test `test_fuzzy_paths_relative_to_cwd_inside_subdir` builds a 3-package mono-repo, runs from `apps/web/`, and verifies completion paths are subtree-relative + outside-of-cwd files don't appear. Copilot review threads addressed: #3134675504 (path scoping), #3134675532 (`voice.toggle` regression), #3134675541 (`voice.record` regression — both were stale-base artefacts, not behavioural changes). --- tests/gateway/test_complete_path_at_filter.py | 46 +++ tui_gateway/server.py | 303 ++++++++++++++++-- 2 files changed, 314 insertions(+), 35 deletions(-) diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py index 07fb507ea6..4a3e292b01 100644 --- a/tests/gateway/test_complete_path_at_filter.py +++ b/tests/gateway/test_complete_path_at_filter.py @@ -231,3 +231,49 @@ def test_fuzzy_caps_results(tmp_path, monkeypatch): items = _items("@mod") assert len(items) == 30 + + +def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch): + """When the gateway runs from a subdirectory of a git repo, fuzzy + completion paths must resolve under that cwd — not under the repo root. + + Without this, `@appChrome` from inside `apps/web/` would suggest + `@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would + look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every + `git ls-files` result back to a `relpath(root)` and drop anything + outside `root` so the completion contract stays "paths are cwd-relative". + """ + import subprocess + + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True) + + (tmp_path / "apps" / "web" / "src").mkdir(parents=True) + (tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x") + (tmp_path / "apps" / "api" / "src").mkdir(parents=True) + (tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x") + (tmp_path / "README.md").write_text("x") + + subprocess.run(["git", "add", "."], cwd=tmp_path, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True) + + # Run from `apps/web/` — completions should be relative to here, and + # files outside this subtree (apps/api, README.md at root) shouldn't + # appear at all. + monkeypatch.chdir(tmp_path / "apps" / "web") + + texts = [t for t, _, _ in _items("@appChrome")] + + assert "@file:src/appChrome.tsx" in texts, texts + assert not any("apps/web/" in t for t in texts), texts + + server._fuzzy_cache.clear() + other_texts = [t for t, _, _ in _items("@server")] + + assert not any("server.ts" in t for t in other_texts), other_texts + + server._fuzzy_cache.clear() + readme_texts = [t for t, _, _ in _items("@README")] + + assert not any("README.md" in t for t in readme_texts), readme_texts diff --git a/tui_gateway/server.py b/tui_gateway/server.py index c3a0388cb3..f0a870b6e1 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -23,6 +23,75 @@ load_hermes_dotenv( hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env" ) + +# ── Panic logger ───────────────────────────────────────────────────── +# Gateway crashes in a TUI session leave no forensics: stdout is the +# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger +# only catches handled warnings, and the subprocess exits before stderr +# flushes through the stderr->gateway.stderr event pump. This hook +# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log +# AND re-emits a one-line summary to stderr so the TUI can surface it in +# Activity — exactly what was missing when the voice-mode turns started +# exiting the gateway mid-TTS. +_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log") + + +def _panic_hook(exc_type, exc_value, exc_tb): + import traceback + + trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb)) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n" + ) + f.write(trace) + except Exception: + pass + # Stderr goes through to the TUI as a gateway.stderr Activity line — + # the first line here is what the user will see without opening any + # log files. Rest of the stack is still in the log for full context. + first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__ + print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True) + # Chain to the default hook so the process still terminates normally. + sys.__excepthook__(exc_type, exc_value, exc_tb) + + +sys.excepthook = _panic_hook + + +def _thread_panic_hook(args): + # threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread) + import traceback + + trace = "".join( + traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback) + ) + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} " + f"· thread={args.thread.name} ===\n" + ) + f.write(trace) + except Exception: + pass + first_line = ( + str(args.exc_value).strip().splitlines()[0] + if str(args.exc_value).strip() + else args.exc_type.__name__ + ) + print( + f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}", + file=sys.stderr, + flush=True, + ) + + +threading.excepthook = _thread_panic_hook + try: from hermes_cli.banner import prefetch_update_check @@ -2126,7 +2195,43 @@ def _(rid, params: dict) -> dict: if rendered: payload["rendered"] = rendered _emit("message.complete", sid, payload) + + # CLI parity: when voice-mode TTS is on, speak the agent reply + # (cli.py:_voice_speak_response). Only the final text — tool + # calls / reasoning already stream separately and would be + # noisy to read aloud. + if ( + status == "complete" + and isinstance(raw, str) + and raw.strip() + and _voice_tts_enabled() + ): + try: + from hermes_cli.voice import speak_text + + spoken = raw + threading.Thread( + target=speak_text, args=(spoken,), daemon=True + ).start() + except ImportError: + logger.warning("voice TTS skipped: hermes_cli.voice unavailable") + except Exception as e: + logger.warning("voice TTS dispatch failed: %s", e) except Exception as e: + import traceback + + trace = traceback.format_exc() + try: + os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True) + with open(_CRASH_LOG, "a", encoding="utf-8") as f: + f.write( + f"\n=== turn-dispatcher exception · " + f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n" + ) + f.write(trace) + except Exception: + pass + print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True) _emit("error", sid, {"message": str(e)}) finally: try: @@ -3177,12 +3282,16 @@ _fuzzy_cache: dict[str, tuple[float, list[str]]] = {} def _list_repo_files(root: str) -> list[str]: - """Return repo-relative file paths rooted at ``root``. + """Return file paths relative to ``root``. - Uses ``git ls-files`` when available (fast, honours .gitignore) and falls - back to a bounded ``os.walk`` that skips common vendor/build dirs. The - result is cached per-root for ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes - don't respawn git processes. + Uses ``git ls-files`` from the repo top (resolved via + ``rev-parse --show-toplevel``) so the listing covers tracked + untracked + files anywhere in the repo, then converts each path back to be relative + to ``root``. Files outside ``root`` (parent directories of cwd, sibling + subtrees) are excluded so the picker stays scoped to what's reachable + from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when + ``root`` isn't inside a git repo. Result cached per-root for + ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes. """ now = time.monotonic() with _fuzzy_cache_lock: @@ -3192,19 +3301,32 @@ def _list_repo_files(root: str) -> list[str]: files: list[str] = [] try: - result = subprocess.run( - ["git", "ls-files", "-z", "--cached", "--others", "--exclude-standard"], - cwd=root, + top_result = subprocess.run( + ["git", "-C", root, "rev-parse", "--show-toplevel"], capture_output=True, timeout=2.0, check=False, ) - if result.returncode == 0: - files = [ - p - for p in result.stdout.decode("utf-8", "replace").split("\0") - if p - ][:_FUZZY_CACHE_MAX_FILES] + if top_result.returncode == 0: + top = top_result.stdout.decode("utf-8", "replace").strip() + list_result = subprocess.run( + ["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"], + capture_output=True, + timeout=2.0, + check=False, + ) + if list_result.returncode == 0: + for p in list_result.stdout.decode("utf-8", "replace").split("\0"): + if not p: + continue + rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/") + # Skip parents/siblings of cwd — keep the picker scoped + # to root-and-below, matching Cmd-P workspace semantics. + if rel.startswith("../"): + continue + files.append(rel) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break except (OSError, subprocess.TimeoutExpired): pass @@ -3351,12 +3473,11 @@ def _(rid, params: dict) -> dict: ranked.sort(key=lambda r: (r[0], len(r[1]), r[1])) tag = prefix_tag or "file" for _, rel, basename in ranked[:30]: - directory = os.path.dirname(rel) items.append( { "text": f"@{tag}:{rel}", "display": basename, - "meta": directory, + "meta": os.path.dirname(rel), } ) @@ -3631,43 +3752,155 @@ def _(rid, params: dict) -> dict: # ── Methods: voice ─────────────────────────────────────────────────── +_voice_sid_lock = threading.Lock() +_voice_event_sid: str = "" + + +def _voice_emit(event: str, payload: dict | None = None) -> None: + """Emit a voice event toward the session that most recently turned the + mode on. Voice is process-global (one microphone), so there's only ever + one sid to target; the TUI handler treats an empty sid as "active + session". Kept separate from _emit to make the lack of per-call sid + argument explicit.""" + with _voice_sid_lock: + sid = _voice_event_sid + _emit(event, sid, payload) + + +def _voice_mode_enabled() -> bool: + """Current voice-mode flag (runtime-only, CLI parity). + + cli.py initialises ``_voice_mode = False`` at startup and only flips + it via ``/voice on``; it never reads a persisted enable bit from + config.yaml. We match that: no config lookup, env var only. This + avoids the TUI auto-starting in REC the next time the user opens it + just because they happened to enable voice in a prior session. + """ + return os.environ.get("HERMES_VOICE", "").strip() == "1" + + +def _voice_tts_enabled() -> bool: + """Whether agent replies should be spoken back via TTS (runtime only).""" + return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1" + + @method("voice.toggle") def _(rid, params: dict) -> dict: + """CLI parity for the ``/voice`` slash command. + + Subcommands: + + * ``status`` — report mode + TTS flags (default when action is unknown). + * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it + off also tears down any active continuous recording loop. Does NOT + start recording on its own; recording is driven by ``voice.record`` + (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split. + * ``tts`` — toggle speech-output of agent replies. Requires mode on + (mirrors CLI's _toggle_voice_tts guard). + """ action = params.get("action", "status") + if action == "status": - env = os.environ.get("HERMES_VOICE", "").strip() - if env in {"0", "1"}: - return _ok(rid, {"enabled": env == "1"}) - return _ok( - rid, - { - "enabled": bool( - _load_cfg().get("display", {}).get("voice_enabled", False) - ) - }, - ) + # Mirror CLI's _show_voice_status: include STT/TTS provider + # availability so the user can tell at a glance *why* voice mode + # isn't working ("STT provider: MISSING ..." is the common case). + payload: dict = { + "enabled": _voice_mode_enabled(), + "tts": _voice_tts_enabled(), + } + try: + from tools.voice_mode import check_voice_requirements + + reqs = check_voice_requirements() + payload["available"] = bool(reqs.get("available")) + payload["audio_available"] = bool(reqs.get("audio_available")) + payload["stt_available"] = bool(reqs.get("stt_available")) + payload["details"] = reqs.get("details") or "" + except Exception as e: + # check_voice_requirements pulls optional transcription deps — + # swallow so /voice status always returns something useful. + logger.warning("voice.toggle status: requirements probe failed: %s", e) + + return _ok(rid, payload) + if action in ("on", "off"): enabled = action == "on" + # Runtime-only flag (CLI parity) — no _write_config_key, so the + # next TUI launch starts with voice OFF instead of auto-REC from a + # persisted stale toggle. os.environ["HERMES_VOICE"] = "1" if enabled else "0" - _write_config_key("display.voice_enabled", enabled) - return _ok(rid, {"enabled": action == "on"}) + + if not enabled: + # Disabling the mode must tear the continuous loop down; the + # loop holds the microphone and would otherwise keep running. + try: + from hermes_cli.voice import stop_continuous + + stop_continuous() + except ImportError: + pass + except Exception as e: + logger.warning("voice: stop_continuous failed during toggle off: %s", e) + + return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()}) + + if action == "tts": + if not _voice_mode_enabled(): + return _err(rid, 4014, "enable voice mode first: /voice on") + new_value = not _voice_tts_enabled() + # Runtime-only flag (CLI parity) — see voice.toggle on/off above. + os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0" + return _ok(rid, {"enabled": True, "tts": new_value}) + return _err(rid, 4013, f"unknown voice action: {action}") @method("voice.record") def _(rid, params: dict) -> dict: + """VAD-driven continuous record loop, CLI-parity. + + ``start`` turns on a VAD loop that emits ``voice.transcript`` events + for each detected utterance and auto-restarts for the next turn. + ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while- + recording branch clearing ``_voice_continuous``). Three consecutive + silent cycles stop the loop automatically and emit a + ``voice.transcript`` with ``no_speech_limit=True``. + """ action = params.get("action", "start") + + if action not in {"start", "stop"}: + return _err(rid, 4019, f"unknown voice action: {action}") + try: if action == "start": - from hermes_cli.voice import start_recording + if not _voice_mode_enabled(): + return _err(rid, 4015, "voice mode is off — enable with /voice on") - start_recording() + with _voice_sid_lock: + global _voice_event_sid + _voice_event_sid = params.get("session_id") or _voice_event_sid + + from hermes_cli.voice import start_continuous + + voice_cfg = _load_cfg().get("voice", {}) + start_continuous( + on_transcript=lambda t: _voice_emit( + "voice.transcript", {"text": t} + ), + on_status=lambda s: _voice_emit("voice.status", {"state": s}), + on_silent_limit=lambda: _voice_emit( + "voice.transcript", {"no_speech_limit": True} + ), + silence_threshold=voice_cfg.get("silence_threshold", 200), + silence_duration=voice_cfg.get("silence_duration", 3.0), + ) return _ok(rid, {"status": "recording"}) - if action == "stop": - from hermes_cli.voice import stop_and_transcribe - return _ok(rid, {"text": stop_and_transcribe() or ""}) - return _err(rid, 4019, f"unknown voice action: {action}") + # action == "stop" + from hermes_cli.voice import stop_continuous + + stop_continuous() + return _ok(rid, {"status": "stopped"}) except ImportError: return _err( rid, 5025, "voice module not available — install audio dependencies" From 7c59e1a87114de5ec6d7c53253a992e4159998e5 Mon Sep 17 00:00:00 2001 From: Ari Lotter Date: Thu, 23 Apr 2026 23:28:49 -0400 Subject: [PATCH 023/220] fix(tui): keep FloatingOverlays visible when input is blocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FloatingOverlays (SessionPicker, ModelPicker, SkillsHub, pager, completions) was nested inside the !isBlocked guard in ComposerPane. When any overlay opened, isBlocked became true, which removed the entire composer box from the tree — including the overlay that was trying to render. This made /resume with no args appear to do nothing (the input line vanished and no picker appeared). Since 99d859ce (feat: refactor by splitting up app and doing proper state), isBlocked gated only the text input lines so that approval/clarify prompts and pickers rendered above a hidden composer. The regression happened in 408fc893 (fix(tui): tighten composer — status sits directly above input, overlays anchor to input) when FloatingOverlays was moved into the input row for anchoring but accidentally kept inside the !isBlocked guard. so here, we render FloatingOverlays outside the !isBlocked guard inside the same position:relative Box, so overlays stay visible even when text input is hidden. Only the actual input buffer lines and TextInput are gated now. Fixes: /resume, /history, /logs, /model, /skills, and completion dropdowns when blocked overlays are active. --- ui-tui/src/components/appLayout.tsx | 90 +++++++++++++++-------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx index 164ef5dd4a..19bc7736b7 100644 --- a/ui-tui/src/components/appLayout.tsx +++ b/ui-tui/src/components/appLayout.tsx @@ -185,56 +185,58 @@ const ComposerPane = memo(function ComposerPane({ - {!isBlocked && ( - - + + - {composer.inputBuf.map((line, i) => ( - - - {i === 0 ? `${ui.theme.brand.prompt} ` : ' '} + {!isBlocked && ( + <> + {composer.inputBuf.map((line, i) => ( + + + {i === 0 ? `${ui.theme.brand.prompt} ` : ' '} + + + {line || ' '} + + ))} + + + + {sh ? ( + $ + ) : ( + + {composer.inputBuf.length ? ' ' : `${ui.theme.brand.prompt} `} + + )} - {line || ' '} - - ))} + + {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */} + - - - {sh ? ( - $ - ) : ( - - {composer.inputBuf.length ? ' ' : `${ui.theme.brand.prompt} `} - - )} - - - - {/* subtract NoSelect paddingX={1} (2 cols) + pw so wrap-ansi and cursorLayout agree */} - - - - + + + - - - )} + + )} + {!composer.empty && !ui.sid && ⚕ {ui.status}} From 379b2273d95566e8a69eadabceabc6ee08521b49 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:50:25 -0700 Subject: [PATCH 024/220] fix(mcp): route stdio subprocess stderr to log file, not user TTY (#14901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MCP stdio servers' stderr was being dumped directly onto the user's terminal during hermes launch. Servers like FastMCP-based ones print a large ASCII banner at startup; slack-mcp-server emits JSON logs; etc. With prompt_toolkit / Rich rendering the TUI concurrently, these unsolicited writes corrupt the terminal state — hanging the session ~80% of the time for one user with Google Ads Tools + slack-mcp configured, forcing Ctrl+C and restart loops. Root cause: `stdio_client(server_params)` in tools/mcp_tool.py was called without `errlog=`, and the SDK's default is `sys.stderr` — i.e. the real parent-process stderr, which is the TTY. Fix: open a shared, append-mode log at $HERMES_HOME/logs/mcp-stderr.log (created once per process, line-buffered, real fd required by asyncio's subprocess machinery) and pass it as `errlog` to every stdio_client. Each server's spawn writes a timestamped header so the shared log stays readable when multiple servers are running. Falls back to /dev/null if the log file cannot be opened. Verified by E2E spawning a subprocess with the log fd as its stderr: banner lines land in the log file, nothing reaches the calling TTY. --- tools/mcp_tool.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 3ed612eda1..2b7f681ed6 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -78,12 +78,86 @@ import math import os import re import shutil +import sys import threading import time +from datetime import datetime from typing import Any, Dict, List, Optional logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Stdio subprocess stderr redirection +# --------------------------------------------------------------------------- +# +# The MCP SDK's ``stdio_client(server, errlog=sys.stderr)`` defaults the +# subprocess stderr stream to the parent process's real stderr, i.e. the +# user's TTY. That means any MCP server we spawn at startup (FastMCP +# banners, slack-mcp-server JSON startup logs, etc.) writes directly onto +# the terminal while prompt_toolkit / Rich is rendering the TUI — which +# corrupts the display and can hang the session. +# +# Instead we redirect every stdio MCP subprocess's stderr into a shared +# per-profile log file (~/.hermes/logs/mcp-stderr.log), tagged with the +# server name so individual servers remain debuggable. +# +# Fallback is os.devnull if opening the log file fails for any reason. + +_mcp_stderr_log_fh: Optional[Any] = None +_mcp_stderr_log_lock = threading.Lock() + + +def _get_mcp_stderr_log() -> Any: + """Return a shared append-mode file handle for MCP subprocess stderr. + + Opened once per process and reused for every stdio server. Must have a + real OS-level file descriptor (``fileno()``) because asyncio's subprocess + machinery wires the child's stderr directly to that fd. Falls back to + ``/dev/null`` if opening the log file fails. + """ + global _mcp_stderr_log_fh + with _mcp_stderr_log_lock: + if _mcp_stderr_log_fh is not None: + return _mcp_stderr_log_fh + try: + from hermes_constants import get_hermes_home + log_dir = get_hermes_home() / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / "mcp-stderr.log" + # Line-buffered so server output lands on disk promptly; errors= + # "replace" tolerates garbled binary output from misbehaving + # servers. + fh = open(log_path, "a", encoding="utf-8", errors="replace", buffering=1) + # Sanity-check: confirm a real fd is available before we commit. + fh.fileno() + _mcp_stderr_log_fh = fh + except Exception as exc: # pragma: no cover — best-effort fallback + logger.debug("Failed to open MCP stderr log, using devnull: %s", exc) + try: + _mcp_stderr_log_fh = open(os.devnull, "w", encoding="utf-8") + except Exception: + # Last resort: the real stderr. Not ideal for TUI users but + # it matches pre-fix behavior. + _mcp_stderr_log_fh = sys.stderr + return _mcp_stderr_log_fh + + +def _write_stderr_log_header(server_name: str) -> None: + """Write a human-readable session marker before launching a server. + + Gives operators a way to find each server's output in the shared + ``mcp-stderr.log`` file without needing per-line prefixes (which would + require a pipe + reader thread and complicate shutdown). + """ + fh = _get_mcp_stderr_log() + try: + ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + fh.write(f"\n===== [{ts}] starting MCP server '{server_name}' =====\n") + fh.flush() + except Exception: + pass + # --------------------------------------------------------------------------- # Graceful import -- MCP SDK is an optional dependency # --------------------------------------------------------------------------- @@ -962,7 +1036,13 @@ class MCPServerTask: # Snapshot child PIDs before spawning so we can track the new one. pids_before = _snapshot_child_pids() - async with stdio_client(server_params) as (read_stream, write_stream): + # Redirect subprocess stderr into a shared log file so MCP servers + # (FastMCP banners, slack-mcp startup JSON, etc.) don't dump onto + # the user's TTY and corrupt the TUI. Preserves debuggability via + # ~/.hermes/logs/mcp-stderr.log. + _write_stderr_log_header(self.name) + _errlog = _get_mcp_stderr_log() + async with stdio_client(server_params, errlog=_errlog) as (read_stream, write_stream): # Capture the newly spawned subprocess PID for force-kill cleanup. new_pids = _snapshot_child_pids() - pids_before if new_pids: From 983bbe2d40f7b23b263c35d98e7454fcdd474cf1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:51:19 -0700 Subject: [PATCH 025/220] feat(skills): add design-md skill for Google's DESIGN.md spec (#14876) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(config): make tool output truncation limits configurable Port from anomalyco/opencode#23770: expose a new `tool_output` config section so users can tune the hardcoded truncation caps that apply to terminal output and read_file pagination. Three knobs under `tool_output`: - max_bytes (default 50_000) — terminal stdout/stderr cap - max_lines (default 2000) — read_file pagination cap - max_line_length (default 2000) — per-line cap in line-numbered view All three keep their existing hardcoded values as defaults, so behaviour is unchanged when the section is absent. Power users on big-context models can raise them; small-context local models can lower them. Implementation: - New `tools/tool_output_limits.py` reads the section with defensive fallback (missing/invalid values → defaults, never raises). - `tools/terminal_tool.py` MAX_OUTPUT_CHARS now comes from get_max_bytes(). - `tools/file_operations.py` normalize_read_pagination() and _add_line_numbers() now pull the limits at call time. - `hermes_cli/config.py` DEFAULT_CONFIG gains the `tool_output` section so `hermes setup` writes defaults into fresh configs. - Docs page `user-guide/configuration.md` gains a "Tool Output Truncation Limits" section with large-context and small-context example configs. Tests (18 new in tests/tools/test_tool_output_limits.py): - Default resolution with missing / malformed / non-dict config. - Full and partial user overrides. - Coercion of bad values (None, negative, wrong type, str int). - Shortcut accessors delegate correctly. - DEFAULT_CONFIG exposes the section with the right defaults. - Integration: normalize_read_pagination clamps to the configured max_lines. * feat(skills): add design-md skill for Google's DESIGN.md spec Built-in skill under skills/creative/ that teaches the agent to author, lint, diff, and export DESIGN.md files — Google's open-source (Apache-2.0) format for describing a visual identity to coding agents. Covers: - YAML front matter + markdown body anatomy - Full token schema (colors, typography, rounded, spacing, components) - Canonical section order + duplicate-heading rejection - Component property whitelist + variants-as-siblings pattern - CLI workflow via 'npx @google/design.md' (lint/diff/export/spec) - Lint rule reference including WCAG contrast checks - Common YAML pitfalls (quoted hex, negative dimensions, dotted refs) - Starter template at templates/starter.md Package verified live on npm (@google/design.md@0.1.1). --- hermes_cli/config.py | 22 +- skills/creative/design-md/SKILL.md | 196 ++++++++++++++++++ .../creative/design-md/templates/starter.md | 99 +++++++++ tests/tools/test_tool_output_limits.py | 152 ++++++++++++++ tools/file_operations.py | 13 +- tools/terminal_tool.py | 3 +- tools/tool_output_limits.py | 92 ++++++++ website/docs/user-guide/configuration.md | 29 +++ 8 files changed, 601 insertions(+), 5 deletions(-) create mode 100644 skills/creative/design-md/SKILL.md create mode 100644 skills/creative/design-md/templates/starter.md create mode 100644 tests/tools/test_tool_output_limits.py create mode 100644 tools/tool_output_limits.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c578ded969..282327c840 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -486,7 +486,27 @@ DEFAULT_CONFIG = { # exceed this are rejected with guidance to use offset+limit. # 100K chars ≈ 25–35K tokens across typical tokenisers. "file_read_max_chars": 100_000, - + + # Tool-output truncation thresholds. When terminal output or a + # single read_file page exceeds these limits, Hermes truncates the + # payload sent to the model (keeping head + tail for terminal, + # enforcing pagination for read_file). Tuning these trades context + # footprint against how much raw output the model can see in one + # shot. Ported from anomalyco/opencode PR #23770. + # + # - max_bytes: terminal_tool output cap, in chars + # (default 50_000 ≈ 12-15K tokens). + # - max_lines: read_file pagination cap — the maximum `limit` + # a single read_file call can request before + # being clamped (default 2000). + # - max_line_length: per-line cap applied when read_file emits a + # line-numbered view (default 2000 chars). + "tool_output": { + "max_bytes": 50_000, + "max_lines": 2000, + "max_line_length": 2000, + }, + "compression": { "enabled": True, "threshold": 0.50, # compress when context usage exceeds this ratio diff --git a/skills/creative/design-md/SKILL.md b/skills/creative/design-md/SKILL.md new file mode 100644 index 0000000000..36c4138db9 --- /dev/null +++ b/skills/creative/design-md/SKILL.md @@ -0,0 +1,196 @@ +--- +name: design-md +description: Author, validate, diff, and export DESIGN.md files — Google's open-source format spec that gives coding agents a persistent, structured understanding of a design system (tokens + rationale in one file). Use when building a design system, porting style rules between projects, generating UI with consistent brand, or auditing accessibility/contrast. +version: 1.0.0 +author: Hermes Agent +license: MIT +metadata: + hermes: + tags: [design, design-system, tokens, ui, accessibility, wcag, tailwind, dtcg, google] + related_skills: [popular-web-designs, excalidraw, architecture-diagram] +--- + +# DESIGN.md Skill + +DESIGN.md is Google's open spec (Apache-2.0, `google-labs-code/design.md`) for +describing a visual identity to coding agents. One file combines: + +- **YAML front matter** — machine-readable design tokens (normative values) +- **Markdown body** — human-readable rationale, organized into canonical sections + +Tokens give exact values. Prose tells agents *why* those values exist and how to +apply them. The CLI (`npx @google/design.md`) lints structure + WCAG contrast, +diffs versions for regressions, and exports to Tailwind or W3C DTCG JSON. + +## When to use this skill + +- User asks for a DESIGN.md file, design tokens, or a design system spec +- User wants consistent UI/brand across multiple projects or tools +- User pastes an existing DESIGN.md and asks to lint, diff, export, or extend it +- User asks to port a style guide into a format agents can consume +- User wants contrast / WCAG accessibility validation on their color palette + +For purely visual inspiration or layout examples, use `popular-web-designs` +instead. This skill is for the *formal spec file* itself. + +## File anatomy + +```md +--- +version: alpha +name: Heritage +description: Architectural minimalism meets journalistic gravitas. +colors: + primary: "#1A1C1E" + secondary: "#6C7278" + tertiary: "#B8422E" + neutral: "#F7F5F2" +typography: + h1: + fontFamily: Public Sans + fontSize: 3rem + fontWeight: 700 + lineHeight: 1.1 + letterSpacing: "-0.02em" + body-md: + fontFamily: Public Sans + fontSize: 1rem +rounded: + sm: 4px + md: 8px + lg: 16px +spacing: + sm: 8px + md: 16px + lg: 24px +components: + button-primary: + backgroundColor: "{colors.tertiary}" + textColor: "#FFFFFF" + rounded: "{rounded.sm}" + padding: 12px + button-primary-hover: + backgroundColor: "{colors.primary}" +--- + +## Overview + +Architectural Minimalism meets Journalistic Gravitas... + +## Colors + +- **Primary (#1A1C1E):** Deep ink for headlines and core text. +- **Tertiary (#B8422E):** "Boston Clay" — the sole driver for interaction. + +## Typography + +Public Sans for everything except small all-caps labels... + +## Components + +`button-primary` is the only high-emphasis action on a page... +``` + +## Token types + +| Type | Format | Example | +|------|--------|---------| +| Color | `#` + hex (sRGB) | `"#1A1C1E"` | +| Dimension | number + unit (`px`, `em`, `rem`) | `48px`, `-0.02em` | +| Token reference | `{path.to.token}` | `{colors.primary}` | +| Typography | object with `fontFamily`, `fontSize`, `fontWeight`, `lineHeight`, `letterSpacing`, `fontFeature`, `fontVariation` | see above | + +Component property whitelist: `backgroundColor`, `textColor`, `typography`, +`rounded`, `padding`, `size`, `height`, `width`. Variants (hover, active, +pressed) are **separate component entries** with related key names +(`button-primary-hover`), not nested. + +## Canonical section order + +Sections are optional, but present ones MUST appear in this order. Duplicate +headings reject the file. + +1. Overview (alias: Brand & Style) +2. Colors +3. Typography +4. Layout (alias: Layout & Spacing) +5. Elevation & Depth (alias: Elevation) +6. Shapes +7. Components +8. Do's and Don'ts + +Unknown sections are preserved, not errored. Unknown token names are accepted +if the value type is valid. Unknown component properties produce a warning. + +## Workflow: authoring a new DESIGN.md + +1. **Ask the user** (or infer) the brand tone, accent color, and typography + direction. If they provided a site, image, or vibe, translate it to the + token shape above. +2. **Write `DESIGN.md`** in their project root using `write_file`. Always + include `name:` and `colors:`; other sections optional but encouraged. +3. **Use token references** (`{colors.primary}`) in the `components:` section + instead of re-typing hex values. Keeps the palette single-source. +4. **Lint it** (see below). Fix any broken references or WCAG failures + before returning. +5. **If the user has an existing project**, also write Tailwind or DTCG + exports next to the file (`tailwind.theme.json`, `tokens.json`). + +## Workflow: lint / diff / export + +The CLI is `@google/design.md` (Node). Use `npx` — no global install needed. + +```bash +# Validate structure + token references + WCAG contrast +npx -y @google/design.md lint DESIGN.md + +# Compare two versions, fail on regression (exit 1 = regression) +npx -y @google/design.md diff DESIGN.md DESIGN-v2.md + +# Export to Tailwind theme JSON +npx -y @google/design.md export --format tailwind DESIGN.md > tailwind.theme.json + +# Export to W3C DTCG (Design Tokens Format Module) JSON +npx -y @google/design.md export --format dtcg DESIGN.md > tokens.json + +# Print the spec itself — useful when injecting into an agent prompt +npx -y @google/design.md spec --rules-only --format json +``` + +All commands accept `-` for stdin. `lint` returns exit 1 on errors. Use the +`--format json` flag and parse the output if you need to report findings +structurally. + +### Lint rule reference (what the 7 rules catch) + +- `broken-ref` (error) — `{colors.missing}` points at a non-existent token +- `duplicate-section` (error) — same `## Heading` appears twice +- `invalid-color`, `invalid-dimension`, `invalid-typography` (error) +- `wcag-contrast` (warning/info) — component `textColor` vs `backgroundColor` + ratio against WCAG AA (4.5:1) and AAA (7:1) +- `unknown-component-property` (warning) — outside the whitelist above + +When the user cares about accessibility, call this out explicitly in your +summary — WCAG findings are the most load-bearing reason to use the CLI. + +## Pitfalls + +- **Don't nest component variants.** `button-primary.hover` is wrong; + `button-primary-hover` as a sibling key is right. +- **Hex colors must be quoted strings.** YAML will otherwise choke on `#` or + truncate values like `#1A1C1E` oddly. +- **Negative dimensions need quotes too.** `letterSpacing: -0.02em` parses as + a YAML flow — write `letterSpacing: "-0.02em"`. +- **Section order is enforced.** If the user gives you prose in a random order, + reorder it to match the canonical list before saving. +- **`version: alpha` is the current spec version** (as of Apr 2026). The spec + is marked alpha — watch for breaking changes. +- **Token references resolve by dotted path.** `{colors.primary}` works; + `{primary}` does not. + +## Spec source of truth + +- Repo: https://github.com/google-labs-code/design.md (Apache-2.0) +- CLI: `@google/design.md` on npm +- License of generated DESIGN.md files: whatever the user's project uses; + the spec itself is Apache-2.0. diff --git a/skills/creative/design-md/templates/starter.md b/skills/creative/design-md/templates/starter.md new file mode 100644 index 0000000000..03d54785fc --- /dev/null +++ b/skills/creative/design-md/templates/starter.md @@ -0,0 +1,99 @@ +--- +version: alpha +name: MyBrand +description: One-sentence description of the visual identity. +colors: + primary: "#0F172A" + secondary: "#64748B" + tertiary: "#2563EB" + neutral: "#F8FAFC" + on-primary: "#FFFFFF" + on-tertiary: "#FFFFFF" +typography: + h1: + fontFamily: Inter + fontSize: 3rem + fontWeight: 700 + lineHeight: 1.1 + letterSpacing: "-0.02em" + h2: + fontFamily: Inter + fontSize: 2rem + fontWeight: 600 + lineHeight: 1.2 + body-md: + fontFamily: Inter + fontSize: 1rem + lineHeight: 1.5 + label-caps: + fontFamily: Inter + fontSize: 0.75rem + fontWeight: 600 + letterSpacing: "0.08em" +rounded: + sm: 4px + md: 8px + lg: 16px + full: 9999px +spacing: + xs: 4px + sm: 8px + md: 16px + lg: 24px + xl: 48px +components: + button-primary: + backgroundColor: "{colors.tertiary}" + textColor: "{colors.on-tertiary}" + rounded: "{rounded.sm}" + padding: 12px + button-primary-hover: + backgroundColor: "{colors.primary}" + textColor: "{colors.on-primary}" + card: + backgroundColor: "{colors.neutral}" + textColor: "{colors.primary}" + rounded: "{rounded.md}" + padding: 24px +--- + +## Overview + +Describe the voice and feel of the brand in one or two paragraphs. What mood +does it evoke? What emotional response should a user have on first impression? + +## Colors + +- **Primary ({colors.primary}):** Core text, headlines, high-emphasis surfaces. +- **Secondary ({colors.secondary}):** Supporting text, borders, metadata. +- **Tertiary ({colors.tertiary}):** Interaction driver — buttons, links, + selected states. Use sparingly to preserve its signal. +- **Neutral ({colors.neutral}):** Page background and surface fills. + +## Typography + +Inter for everything. Weight and size carry hierarchy, not font family. Tight +letter-spacing on display sizes; default tracking on body. + +## Layout + +Spacing scale is a 4px baseline. Use `md` (16px) for intra-component gaps, +`lg` (24px) for inter-component gaps, `xl` (48px) for section breaks. + +## Shapes + +Rounded corners are modest — `sm` on interactive elements, `md` on cards. +`full` is reserved for avatars and pill badges. + +## Components + +- `button-primary` is the only high-emphasis action per screen. +- `card` is the default surface for grouped content. No shadow by default. + +## Do's and Don'ts + +- **Do** use token references (`{colors.primary}`) instead of literal hex in + component definitions. +- **Don't** introduce colors outside the palette — extend the palette first. +- **Don't** nest component variants. `button-primary-hover` is a sibling, + not a child. diff --git a/tests/tools/test_tool_output_limits.py b/tests/tools/test_tool_output_limits.py new file mode 100644 index 0000000000..19fa3fc05a --- /dev/null +++ b/tests/tools/test_tool_output_limits.py @@ -0,0 +1,152 @@ +"""Tests for tools.tool_output_limits. + +Covers: +1. Default values when no config is provided. +2. Config override picks up user-supplied max_bytes / max_lines / + max_line_length. +3. Malformed values (None, negative, wrong type) fall back to defaults + rather than raising. +4. Integration: the helpers return what the terminal_tool and + file_operations call paths will actually consume. + +Port-tracking: anomalyco/opencode PR #23770 +(feat(truncate): allow configuring tool output truncation limits). +""" + +from __future__ import annotations + +from unittest.mock import patch + +import pytest + +from tools import tool_output_limits as tol + + +class TestDefaults: + def test_defaults_match_previous_hardcoded_values(self): + assert tol.DEFAULT_MAX_BYTES == 50_000 + assert tol.DEFAULT_MAX_LINES == 2000 + assert tol.DEFAULT_MAX_LINE_LENGTH == 2000 + + def test_get_limits_returns_defaults_when_config_missing(self): + with patch("hermes_cli.config.load_config", return_value={}): + limits = tol.get_tool_output_limits() + assert limits == { + "max_bytes": tol.DEFAULT_MAX_BYTES, + "max_lines": tol.DEFAULT_MAX_LINES, + "max_line_length": tol.DEFAULT_MAX_LINE_LENGTH, + } + + def test_get_limits_returns_defaults_when_config_not_a_dict(self): + # load_config should always return a dict but be defensive anyway. + with patch("hermes_cli.config.load_config", return_value="not a dict"): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + + def test_get_limits_returns_defaults_when_load_config_raises(self): + def _boom(): + raise RuntimeError("boom") + + with patch("hermes_cli.config.load_config", side_effect=_boom): + limits = tol.get_tool_output_limits() + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + + +class TestOverrides: + def test_user_config_overrides_all_three(self): + cfg = { + "tool_output": { + "max_bytes": 100_000, + "max_lines": 5000, + "max_line_length": 4096, + } + } + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits == { + "max_bytes": 100_000, + "max_lines": 5000, + "max_line_length": 4096, + } + + def test_partial_override_preserves_other_defaults(self): + cfg = {"tool_output": {"max_bytes": 200_000}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == 200_000 + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + def test_section_not_a_dict_falls_back(self): + cfg = {"tool_output": "nonsense"} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + + +class TestCoercion: + @pytest.mark.parametrize("bad", [None, "not a number", -1, 0, [], {}]) + def test_invalid_values_fall_back_to_defaults(self, bad): + cfg = {"tool_output": {"max_bytes": bad, "max_lines": bad, "max_line_length": bad}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == tol.DEFAULT_MAX_BYTES + assert limits["max_lines"] == tol.DEFAULT_MAX_LINES + assert limits["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + def test_string_integer_is_coerced(self): + cfg = {"tool_output": {"max_bytes": "75000"}} + with patch("hermes_cli.config.load_config", return_value=cfg): + limits = tol.get_tool_output_limits() + assert limits["max_bytes"] == 75_000 + + +class TestShortcuts: + def test_individual_accessors_delegate_to_get_tool_output_limits(self): + cfg = { + "tool_output": { + "max_bytes": 111, + "max_lines": 222, + "max_line_length": 333, + } + } + with patch("hermes_cli.config.load_config", return_value=cfg): + assert tol.get_max_bytes() == 111 + assert tol.get_max_lines() == 222 + assert tol.get_max_line_length() == 333 + + +class TestDefaultConfigHasSection: + """The DEFAULT_CONFIG in hermes_cli.config must expose tool_output so + that ``hermes setup`` and default installs stay in sync with the + helpers here.""" + + def test_default_config_contains_tool_output_section(self): + from hermes_cli.config import DEFAULT_CONFIG + assert "tool_output" in DEFAULT_CONFIG + section = DEFAULT_CONFIG["tool_output"] + assert isinstance(section, dict) + assert section["max_bytes"] == tol.DEFAULT_MAX_BYTES + assert section["max_lines"] == tol.DEFAULT_MAX_LINES + assert section["max_line_length"] == tol.DEFAULT_MAX_LINE_LENGTH + + +class TestIntegrationReadPagination: + """normalize_read_pagination uses get_max_lines() — verify the plumbing.""" + + def test_pagination_limit_clamped_by_config_value(self): + from tools.file_operations import normalize_read_pagination + cfg = {"tool_output": {"max_lines": 50}} + with patch("hermes_cli.config.load_config", return_value=cfg): + offset, limit = normalize_read_pagination(offset=1, limit=1000) + # limit should have been clamped to 50 (the configured max_lines) + assert limit == 50 + assert offset == 1 + + def test_pagination_default_when_config_missing(self): + from tools.file_operations import normalize_read_pagination + with patch("hermes_cli.config.load_config", return_value={}): + offset, limit = normalize_read_pagination(offset=10, limit=100000) + # Clamped to default MAX_LINES (2000). + assert limit == tol.DEFAULT_MAX_LINES + assert offset == 10 diff --git a/tools/file_operations.py b/tools/file_operations.py index 7e75578b2b..9e0b44c145 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -292,10 +292,15 @@ def normalize_read_pagination(offset: Any = DEFAULT_READ_OFFSET, Tool schemas declare minimum/maximum values, but not every caller or provider enforces schemas before dispatch. Clamp here so invalid values cannot leak into sed ranges like ``0,-1p``. + + The upper bound on ``limit`` comes from ``tool_output.max_lines`` in + config.yaml (defaults to the module-level ``MAX_LINES`` constant). """ + from tools.tool_output_limits import get_max_lines + max_lines = get_max_lines() normalized_offset = max(1, _coerce_int(offset, DEFAULT_READ_OFFSET)) normalized_limit = _coerce_int(limit, DEFAULT_READ_LIMIT) - normalized_limit = max(1, min(normalized_limit, MAX_LINES)) + normalized_limit = max(1, min(normalized_limit, max_lines)) return normalized_offset, normalized_limit @@ -414,12 +419,14 @@ class ShellFileOperations(FileOperations): def _add_line_numbers(self, content: str, start_line: int = 1) -> str: """Add line numbers to content in LINE_NUM|CONTENT format.""" + from tools.tool_output_limits import get_max_line_length + max_line_length = get_max_line_length() lines = content.split('\n') numbered = [] for i, line in enumerate(lines, start=start_line): # Truncate long lines - if len(line) > MAX_LINE_LENGTH: - line = line[:MAX_LINE_LENGTH] + "... [truncated]" + if len(line) > max_line_length: + line = line[:max_line_length] + "... [truncated]" numbered.append(f"{i:6d}|{line}") return '\n'.join(numbered) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 22c8dcbc68..b288d4ad9b 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1805,7 +1805,8 @@ def terminal_tool( pass # Truncate output if too long, keeping both head and tail - MAX_OUTPUT_CHARS = 50000 + from tools.tool_output_limits import get_max_bytes + MAX_OUTPUT_CHARS = get_max_bytes() if len(output) > MAX_OUTPUT_CHARS: head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early) tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output) diff --git a/tools/tool_output_limits.py b/tools/tool_output_limits.py new file mode 100644 index 0000000000..fd24a2da35 --- /dev/null +++ b/tools/tool_output_limits.py @@ -0,0 +1,92 @@ +"""Configurable tool-output truncation limits. + +Ported from anomalyco/opencode PR #23770 (``feat(truncate): allow +configuring tool output truncation limits``). + +OpenCode hardcoded ``MAX_LINES = 2000`` and ``MAX_BYTES = 50 * 1024`` +as tool-output truncation thresholds. Hermes-agent had the same +hardcoded constants in two places: + +* ``tools/terminal_tool.py`` — ``MAX_OUTPUT_CHARS = 50000`` (terminal + stdout/stderr cap) +* ``tools/file_operations.py`` — ``MAX_LINES = 2000`` / + ``MAX_LINE_LENGTH = 2000`` (read_file pagination cap + per-line cap) + +This module centralises those values behind a single config section +(``tool_output`` in ``config.yaml``) so power users can tune them +without patching the source. The existing hardcoded numbers remain as +defaults, so behaviour is unchanged when the config key is absent. + +Example ``config.yaml``:: + + tool_output: + max_bytes: 100000 # terminal output cap (chars) + max_lines: 5000 # read_file pagination + truncation cap + max_line_length: 2000 # per-line length cap before '... [truncated]' + +The limits reader is defensive: any error (missing config file, invalid +value type, etc.) falls back to the built-in defaults so tools never +fail because of a malformed config. +""" + +from __future__ import annotations + +from typing import Any, Dict + +# Hardcoded defaults — these match the pre-existing values, so adding +# this module is behaviour-preserving for users who don't set +# ``tool_output`` in config.yaml. +DEFAULT_MAX_BYTES = 50_000 # terminal_tool.MAX_OUTPUT_CHARS +DEFAULT_MAX_LINES = 2000 # file_operations.MAX_LINES +DEFAULT_MAX_LINE_LENGTH = 2000 # file_operations.MAX_LINE_LENGTH + + +def _coerce_positive_int(value: Any, default: int) -> int: + """Return ``value`` as a positive int, or ``default`` on any issue.""" + try: + iv = int(value) + except (TypeError, ValueError): + return default + if iv <= 0: + return default + return iv + + +def get_tool_output_limits() -> Dict[str, int]: + """Return resolved tool-output limits, reading ``tool_output`` from config. + + Keys: ``max_bytes``, ``max_lines``, ``max_line_length``. Missing or + invalid entries fall through to the ``DEFAULT_*`` constants. This + function NEVER raises. + """ + try: + from hermes_cli.config import load_config + cfg = load_config() or {} + section = cfg.get("tool_output") if isinstance(cfg, dict) else None + if not isinstance(section, dict): + section = {} + except Exception: + section = {} + + return { + "max_bytes": _coerce_positive_int(section.get("max_bytes"), DEFAULT_MAX_BYTES), + "max_lines": _coerce_positive_int(section.get("max_lines"), DEFAULT_MAX_LINES), + "max_line_length": _coerce_positive_int( + section.get("max_line_length"), DEFAULT_MAX_LINE_LENGTH + ), + } + + +def get_max_bytes() -> int: + """Shortcut for terminal-tool callers that only need the byte cap.""" + return get_tool_output_limits()["max_bytes"] + + +def get_max_lines() -> int: + """Shortcut for file-ops callers that only need the line cap.""" + return get_tool_output_limits()["max_lines"] + + +def get_max_line_length() -> int: + """Shortcut for file-ops callers that only need the per-line cap.""" + return get_tool_output_limits()["max_line_length"] diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 055fdb3d3e..420ca14682 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -431,6 +431,35 @@ file_read_max_chars: 30000 The agent also deduplicates file reads automatically — if the same file region is read twice and the file hasn't changed, a lightweight stub is returned instead of re-sending the content. This resets on context compression so the agent can re-read files after their content is summarized away. +## Tool Output Truncation Limits + +Three related caps control how much raw output a tool can return before Hermes truncates it: + +```yaml +tool_output: + max_bytes: 50000 # terminal output cap (chars) + max_lines: 2000 # read_file pagination cap + max_line_length: 2000 # per-line cap in read_file's line-numbered view +``` + +- **`max_bytes`** — When a `terminal` command produces more than this many characters of combined stdout/stderr, Hermes keeps the first 40% and last 60% and inserts a `[OUTPUT TRUNCATED]` notice between them. Default `50000` (≈12-15K tokens across typical tokenisers). +- **`max_lines`** — Upper bound on the `limit` parameter of a single `read_file` call. Requests above this are clamped so a single read can't flood the context window. Default `2000`. +- **`max_line_length`** — Per-line cap applied when `read_file` emits the line-numbered view. Lines longer than this are truncated to this many chars followed by `... [truncated]`. Default `2000`. + +Raise the limits on models with large context windows that can afford more raw output per call. Lower them for small-context models to keep tool results compact: + +```yaml +# Large context model (200K+) +tool_output: + max_bytes: 150000 + max_lines: 5000 + +# Small local model (16K context) +tool_output: + max_bytes: 20000 + max_lines: 500 +``` + ## Git Worktree Isolation Enable isolated git worktrees for running multiple agents in parallel on the same repo: From e5d2815b4167dd873e1fdf6deb7e4be87455abdd Mon Sep 17 00:00:00 2001 From: Austin Pickett Date: Wed, 22 Apr 2026 23:25:17 -0400 Subject: [PATCH 026/220] feat: add sidebar --- ui-tui/package-lock.json | 40 +- web/package-lock.json | 34 +- web/package.json | 2 +- web/src/App.tsx | 691 ++++++++++++++------- web/src/components/DeleteConfirmDialog.tsx | 40 ++ web/src/components/PlatformsCard.tsx | 97 +++ web/src/components/SidebarFooter.tsx | 40 ++ web/src/components/SidebarStatusStrip.tsx | 70 +++ web/src/components/ThemeSwitcher.tsx | 13 +- web/src/components/ui/confirm-dialog.tsx | 138 ++++ web/src/components/ui/segmented.tsx | 80 +++ web/src/components/ui/switch.tsx | 3 + web/src/contexts/PageHeaderProvider.tsx | 89 +++ web/src/contexts/SystemActions.tsx | 120 ++++ web/src/contexts/page-header-context.ts | 12 + web/src/contexts/system-actions-context.ts | 18 + web/src/contexts/usePageHeader.ts | 10 + web/src/contexts/useSystemActions.ts | 15 + web/src/hooks/useConfirmDelete.ts | 41 ++ web/src/hooks/useSidebarStatus.ts | 27 + web/src/i18n/en.ts | 57 +- web/src/i18n/types.ts | 66 +- web/src/i18n/zh.ts | 53 +- web/src/index.css | 14 + web/src/lib/api.ts | 7 +- web/src/lib/resolve-page-title.ts | 31 + web/src/main.tsx | 5 +- web/src/pages/AnalyticsPage.tsx | 70 ++- web/src/pages/ConfigPage.tsx | 144 +++-- web/src/pages/CronPage.tsx | 60 +- web/src/pages/EnvPage.tsx | 88 ++- web/src/pages/LogsPage.tsx | 303 ++++----- web/src/pages/SessionsPage.tsx | 325 ++++++++-- web/src/pages/SkillsPage.tsx | 288 +++++---- web/src/pages/StatusPage.tsx | 614 ------------------ web/src/plugins/PluginPage.tsx | 64 ++ web/src/plugins/index.ts | 1 + web/src/plugins/registry.ts | 16 + web/src/plugins/types.ts | 21 +- web/src/plugins/usePlugins.ts | 49 +- web/vite.config.ts | 4 + 41 files changed, 2469 insertions(+), 1391 deletions(-) create mode 100644 web/src/components/DeleteConfirmDialog.tsx create mode 100644 web/src/components/PlatformsCard.tsx create mode 100644 web/src/components/SidebarFooter.tsx create mode 100644 web/src/components/SidebarStatusStrip.tsx create mode 100644 web/src/components/ui/confirm-dialog.tsx create mode 100644 web/src/components/ui/segmented.tsx create mode 100644 web/src/contexts/PageHeaderProvider.tsx create mode 100644 web/src/contexts/SystemActions.tsx create mode 100644 web/src/contexts/page-header-context.ts create mode 100644 web/src/contexts/system-actions-context.ts create mode 100644 web/src/contexts/usePageHeader.ts create mode 100644 web/src/contexts/useSystemActions.ts create mode 100644 web/src/hooks/useConfirmDelete.ts create mode 100644 web/src/hooks/useSidebarStatus.ts create mode 100644 web/src/lib/resolve-page-title.ts delete mode 100644 web/src/pages/StatusPage.tsx create mode 100644 web/src/plugins/PluginPage.tsx diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json index 46c83d195d..522b416e58 100644 --- a/ui-tui/package-lock.json +++ b/ui-tui/package-lock.json @@ -89,6 +89,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -318,31 +319,6 @@ "node": ">=6.9.0" } }, - "node_modules/@emnapi/core": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", - "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==", - "dev": true, - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "@emnapi/wasi-threads": "1.2.1", - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", - "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", - "dev": true, - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, "node_modules/@emnapi/wasi-threads": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", @@ -1509,6 +1485,7 @@ "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.19.0" } @@ -1519,6 +1496,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1529,6 +1507,7 @@ "integrity": "sha512-eSkwoemjo76bdXl2MYqtxg51HNwUSkWfODUOQ3PaTLZGh9uIWWFZIjyjaJnex7wXDu+TRx+ATsnSxdN9YWfRTQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.12.2", "@typescript-eslint/scope-manager": "8.58.1", @@ -1558,6 +1537,7 @@ "integrity": "sha512-gGkiNMPqerb2cJSVcruigx9eHBlLG14fSdPdqMoOcBfh+vvn4iCq2C8MzUB89PrxOXk0y3GZ1yIWb9aOzL93bw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.58.1", "@typescript-eslint/types": "8.58.1", @@ -1875,6 +1855,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2210,6 +2191,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", @@ -2895,6 +2877,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -3790,6 +3773,7 @@ "resolved": "https://registry.npmjs.org/ink-text-input/-/ink-text-input-6.0.0.tgz", "integrity": "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw==", "license": "MIT", + "peer": true, "dependencies": { "chalk": "^5.3.0", "type-fest": "^4.18.2" @@ -5146,6 +5130,7 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5245,6 +5230,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz", "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -6017,6 +6003,7 @@ "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.27.0", "get-tsconfig": "^4.7.5" @@ -6143,6 +6130,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -6252,6 +6240,7 @@ "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", @@ -6660,6 +6649,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/web/package-lock.json b/web/package-lock.json index c522d8ba0e..bc806a371c 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -8,7 +8,7 @@ "name": "web", "version": "0.0.0", "dependencies": { - "@nous-research/ui": "^0.3.0", + "@nous-research/ui": "^0.4.0", "@observablehq/plot": "^0.6.17", "@react-three/fiber": "^9.6.0", "@tailwindcss/vite": "^4.2.1", @@ -70,6 +70,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -1058,9 +1059,9 @@ } }, "node_modules/@nous-research/ui": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.3.0.tgz", - "integrity": "sha512-konGgtV9lkzqYkWuoUGnROqavq1svTnGbERLKItvEXmsRz4xRtbAMHI8rK6sjGpHDpwvOUN3olcOhRLTGuVfcA==", + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.4.0.tgz", + "integrity": "sha512-wA9YImWLFjx3yWsb3TsquwG9VKZunupdovkOjnRboFjNAb3Jcf57o67xWafEPEm3VX6k6RP/+Y9zHWX0PUtZ4w==", "license": "MIT", "dependencies": { "@nanostores/react": "^1.0.0", @@ -1103,6 +1104,7 @@ "resolved": "https://registry.npmjs.org/@observablehq/plot/-/plot-0.6.17.tgz", "integrity": "sha512-/qaXP/7mc4MUS0s4cPPFASDRjtsWp85/TbfsciqDgU1HwYixbSbbytNuInD8AcTYC3xaxACgVX06agdfQy9W+g==", "license": "ISC", + "peer": true, "dependencies": { "d3": "^7.9.0", "interval-tree-1d": "^1.0.0", @@ -1755,6 +1757,7 @@ "resolved": "https://registry.npmjs.org/@react-three/fiber/-/fiber-9.6.0.tgz", "integrity": "sha512-90abYK2q5/qDM+GACs9zRvc5KhEEpEWqWlHSd64zTPNxg+9wCJvTfyD9x2so7hlQhjRYO1Fa6flR3BC/kpTFkA==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.17.8", "@types/webxr": "*", @@ -2489,6 +2492,7 @@ "integrity": "sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -2498,6 +2502,7 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.14.tgz", "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -2508,6 +2513,7 @@ "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==", "devOptional": true, "license": "MIT", + "peer": true, "peerDependencies": { "@types/react": "^19.2.0" } @@ -2572,6 +2578,7 @@ "integrity": "sha512-XZzOmihLIr8AD1b9hL9ccNMzEMWt/dE2u7NyTY9jJG6YNiNthaD5XtUHVF2uCXZ15ng+z2hT3MVuxnUYhq6k1g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.57.0", "@typescript-eslint/types": "8.57.0", @@ -2867,6 +2874,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -3019,6 +3027,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3526,6 +3535,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -3839,6 +3849,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -4217,7 +4228,8 @@ "version": "3.15.0", "resolved": "https://registry.npmjs.org/gsap/-/gsap-3.15.0.tgz", "integrity": "sha512-dMW4CWBTUK1AEEDeZc1g4xpPGIrSf9fJF960qbTZmN/QwZIWY5wgliS6JWl9/25fpTGJrMRtSjGtOmPnfjZB+A==", - "license": "Standard 'no charge' license: https://gsap.com/standard-license." + "license": "Standard 'no charge' license: https://gsap.com/standard-license.", + "peer": true }, "node_modules/has-flag": { "version": "4.0.0", @@ -4532,6 +4544,7 @@ "resolved": "https://registry.npmjs.org/leva/-/leva-0.10.1.tgz", "integrity": "sha512-BcjnfUX8jpmwZUz2L7AfBtF9vn4ggTH33hmeufDULbP3YgNZ/C+ss/oO3stbrqRQyaOmRwy70y7BGTGO81S3rA==", "license": "MIT", + "peer": true, "dependencies": { "@radix-ui/react-portal": "^1.1.4", "@radix-ui/react-tooltip": "^1.1.8", @@ -4953,6 +4966,7 @@ } ], "license": "MIT", + "peer": true, "engines": { "node": "^20.0.0 || >=22.0.0" } @@ -5080,6 +5094,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5151,6 +5166,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -5170,6 +5186,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -5532,7 +5549,8 @@ "version": "0.180.0", "resolved": "https://registry.npmjs.org/three/-/three-0.180.0.tgz", "integrity": "sha512-o+qycAMZrh+TsE01GqWUxUIKR1AL0S8pq7zDkYOQw8GqfX8b8VoCKYUoHbhiX5j+7hr8XsuHDVU6+gkQJQKg9w==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/tinyglobby": { "version": "0.2.15", @@ -5597,6 +5615,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -5682,6 +5701,7 @@ "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.6.0.tgz", "integrity": "sha512-Pp6GSwGP/NrPIrxVFAIkOQeyw8lFenOHijQWkUTrDvrF4ALqylP2C/KCkeS9dpUM3KvYRQhna5vt7IL95+ZQ9w==", "license": "MIT", + "peer": true, "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } @@ -5697,6 +5717,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -5818,6 +5839,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/web/package.json b/web/package.json index 8882c5c1c8..5ca2288ef8 100644 --- a/web/package.json +++ b/web/package.json @@ -13,7 +13,7 @@ "preview": "vite preview" }, "dependencies": { - "@nous-research/ui": "^0.3.0", + "@nous-research/ui": "^0.4.0", "@observablehq/plot": "^0.6.17", "@react-three/fiber": "^9.6.0", "@tailwindcss/vite": "^4.2.1", diff --git a/web/src/App.tsx b/web/src/App.tsx index 9c6e3c3376..e3f93fda30 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -1,31 +1,47 @@ -import { useMemo } from "react"; -import { Routes, Route, NavLink, Navigate } from "react-router-dom"; +import { + useCallback, + useEffect, + useMemo, + useState, + type ComponentType, + type ReactNode, +} from "react"; +import { Routes, Route, NavLink, Navigate, useNavigate } from "react-router-dom"; import { Activity, BarChart3, Clock, + Code, + Database, + Download, + Eye, FileText, + Globe, + Heart, KeyRound, + Loader2, + Menu, MessageSquare, Package, - Settings, Puzzle, - Sparkles, - Terminal, - Globe, - Database, + RotateCw, + Settings, Shield, - Wrench, - Zap, - Heart, + Sparkles, Star, - Code, - Eye, + Terminal, + Wrench, + X, + Zap, } from "lucide-react"; -import { Cell, Grid, SelectionSwitcher, Typography } from "@nous-research/ui"; +import { SelectionSwitcher, Typography } from "@nous-research/ui"; import { cn } from "@/lib/utils"; import { Backdrop } from "@/components/Backdrop"; -import StatusPage from "@/pages/StatusPage"; +import { SidebarFooter } from "@/components/SidebarFooter"; +import { SidebarStatusStrip } from "@/components/SidebarStatusStrip"; +import { PageHeaderProvider } from "@/contexts/PageHeaderProvider"; +import { useSystemActions } from "@/contexts/useSystemActions"; +import type { SystemAction } from "@/contexts/system-actions-context"; import ConfigPage from "@/pages/ConfigPage"; import EnvPage from "@/pages/EnvPage"; import SessionsPage from "@/pages/SessionsPage"; @@ -36,15 +52,17 @@ import SkillsPage from "@/pages/SkillsPage"; import { LanguageSwitcher } from "@/components/LanguageSwitcher"; import { ThemeSwitcher } from "@/components/ThemeSwitcher"; import { useI18n } from "@/i18n"; -import { PluginSlot, usePlugins } from "@/plugins"; -import type { RegisteredPlugin } from "@/plugins"; +import { PluginPage, PluginSlot, usePlugins } from "@/plugins"; +import type { PluginManifest } from "@/plugins"; import { useTheme } from "@/themes"; -/** Built-in route → default page component. Used both for standard routing - * and for resolving plugin `tab.override` values. Keys must match the - * `path` in `BUILTIN_NAV` so `/path` lookups stay consistent. */ -const BUILTIN_ROUTES: Record = { - "/": StatusPage, +function RootRedirect() { + return ; +} + +/** Built-in route → page component. Used for routing and for plugin `tab.path` / `tab.override` resolution. */ +const BUILTIN_ROUTES: Record = { + "/": RootRedirect, "/sessions": SessionsPage, "/analytics": AnalyticsPage, "/logs": LogsPage, @@ -55,7 +73,6 @@ const BUILTIN_ROUTES: Record = { }; const BUILTIN_NAV: NavItem[] = [ - { path: "/", labelKey: "status", label: "Status", icon: Activity }, { path: "/sessions", labelKey: "sessions", @@ -75,9 +92,7 @@ const BUILTIN_NAV: NavItem[] = [ { path: "/env", labelKey: "keys", label: "Keys", icon: KeyRound }, ]; -// Plugins can reference any of these by name in their manifest — keeps bundle -// size sane vs. importing the full lucide-react set. -const ICON_MAP: Record> = { +const ICON_MAP: Record> = { Activity, BarChart3, Clock, @@ -100,24 +115,15 @@ const ICON_MAP: Record> = { Eye, }; -function resolveIcon( - name: string, -): React.ComponentType<{ className?: string }> { +function resolveIcon(name: string): ComponentType<{ className?: string }> { return ICON_MAP[name] ?? Puzzle; } -function buildNavItems( - builtIn: NavItem[], - plugins: RegisteredPlugin[], -): NavItem[] { +function buildNavItems(builtIn: NavItem[], manifests: PluginManifest[]): NavItem[] { const items = [...builtIn]; - for (const { manifest } of plugins) { - // Plugins that replace a built-in route don't add a new tab entry — - // they reuse the existing tab. The nav just lights up the original - // built-in entry when the user visits `/`. + for (const manifest of manifests) { if (manifest.tab.override) continue; - // Hidden plugins register their component + slots but skip the nav. if (manifest.tab.hidden) continue; const pluginItem: NavItem = { @@ -145,54 +151,58 @@ function buildNavItems( return items; } -/** Build the final route table, letting plugins override built-in pages. - * - * Returns (path, Component, key) tuples. Plugins with `tab.override` - * win over both built-ins and other plugins (last registration wins if - * two plugins claim the same override, but we warn in dev). Plugins with - * a regular `tab.path` register alongside built-ins as standalone - * routes. */ -function buildRoutes( - plugins: RegisteredPlugin[], -): Array<{ key: string; path: string; Component: React.ComponentType }> { - const overrides = new Map(); - const addons: RegisteredPlugin[] = []; +function buildRoutes(manifests: PluginManifest[]): Array<{ + key: string; + path: string; + element: ReactNode; +}> { + const byOverride = new Map(); + const addons: PluginManifest[] = []; - for (const p of plugins) { - if (p.manifest.tab.override) { - overrides.set(p.manifest.tab.override, p); + for (const m of manifests) { + if (m.tab.override) { + byOverride.set(m.tab.override, m); } else { - addons.push(p); + addons.push(m); } } const routes: Array<{ key: string; path: string; - Component: React.ComponentType; + element: ReactNode; }> = []; for (const [path, Component] of Object.entries(BUILTIN_ROUTES)) { - const override = overrides.get(path); - if (override) { + const om = byOverride.get(path); + if (om) { routes.push({ - key: `override:${override.manifest.name}`, + key: `override:${om.name}`, path, - Component: override.component, + element: , }); } else { - routes.push({ key: `builtin:${path}`, path, Component }); + routes.push({ key: `builtin:${path}`, path, element: }); } } - for (const addon of addons) { - // Don't double-register a plugin that shadows a built-in path via - // `tab.path` — `override` is the supported mechanism for that. - if (BUILTIN_ROUTES[addon.manifest.tab.path]) continue; + for (const m of addons) { + if (m.tab.hidden) continue; + if (BUILTIN_ROUTES[m.tab.path]) continue; routes.push({ - key: `plugin:${addon.manifest.name}`, - path: addon.manifest.tab.path, - Component: addon.component, + key: `plugin:${m.name}`, + path: m.tab.path, + element: , + }); + } + + for (const m of manifests) { + if (!m.tab.hidden) continue; + if (BUILTIN_ROUTES[m.tab.path] || m.tab.override) continue; + routes.push({ + key: `plugin:hidden:${m.name}`, + path: m.tab.path, + element: , }); } @@ -201,154 +211,125 @@ function buildRoutes( export default function App() { const { t } = useI18n(); - const { plugins } = usePlugins(); + const { manifests } = usePlugins(); const { theme } = useTheme(); + const [mobileOpen, setMobileOpen] = useState(false); + const closeMobile = useCallback(() => setMobileOpen(false), []); const navItems = useMemo( - () => buildNavItems(BUILTIN_NAV, plugins), - [plugins], + () => buildNavItems(BUILTIN_NAV, manifests), + [manifests], + ); + const routes = useMemo(() => buildRoutes(manifests), [manifests]); + const pluginTabMeta = useMemo( + () => + manifests + .filter((m) => !m.tab.hidden) + .map((m) => ({ + path: m.tab.override ?? m.tab.path, + label: m.label, + })), + [manifests], ); - const routes = useMemo(() => buildRoutes(plugins), [plugins]); const layoutVariant = theme.layoutVariant ?? "standard"; - const showSidebar = layoutVariant === "cockpit"; - // Tiled layout drops the 1600px clamp so pages can use the full viewport; - // standard + cockpit keep the centered reading width. - const mainMaxWidth = layoutVariant === "tiled" ? "max-w-none" : "max-w-[1600px]"; + const mainMaxWidth = + layoutVariant === "tiled" ? "max-w-none" : "max-w-[1600px]"; + + useEffect(() => { + if (!mobileOpen) return; + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") setMobileOpen(false); + }; + document.addEventListener("keydown", onKey); + const prevOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + return () => { + document.removeEventListener("keydown", onKey); + document.body.style.overflow = prevOverflow; + }; + }, [mobileOpen]); + + useEffect(() => { + const mql = window.matchMedia("(min-width: 1024px)"); + const onChange = (e: MediaQueryListEvent) => { + if (e.matches) setMobileOpen(false); + }; + mql.addEventListener("change", onChange); + return () => mql.removeEventListener("change", onChange); + }, []); return (
- {/* Themes can style backdrop chrome via `componentStyles.backdrop.*` - CSS vars read by . Plugins can also inject full - components into the backdrop layer via the `backdrop` slot — - useful for scanlines, parallax stars, hero artwork, etc. */}
-
- -
- - - - Hermes -
- Agent -
-
+ - {navItems.map(({ path, label, labelKey, icon: Icon }) => ( - - - cn( - "group relative flex h-full w-full items-center gap-1.5", - "px-2.5 sm:px-4 py-2", - "font-mondwest text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em]", - "whitespace-nowrap transition-colors cursor-pointer", - "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground", - isActive - ? "text-midground" - : "opacity-60 hover:opacity-100", - ) - } - style={{ - clipPath: "var(--component-tab-clip-path)", - }} - > - {({ isActive }) => ( - <> - - - {labelKey - ? ((t.app.nav as Record)[ - labelKey - ] ?? label) - : label} - - - - - {isActive && ( - - )} - - )} - - - ))} -
-
- - - - - - - - {t.app.webUi} - - - -
+ + {t.app.brand} +
- {/* Full-width banner slot under the nav, outside the main clamp — - useful for marquee/alert/status strips themes want to show - above page content. */} + {mobileOpen && ( + +
+ +
+ +
+ + + + + +
+
+ + + +
+
+ +
+ + {t.app.brand} + + } + /> + + {t.app.footer.org} + + } + /> +
+ + + + +
+ + +
+ +
+ + {routes.map(({ key, path, element }) => ( + + ))} + } + /> + +
+ +
+
+
-
- - - - {t.app.footer.name} - - } - /> - - - - {t.app.footer.org} - - } - /> - - -
- - {/* Fixed-position overlay plugins (scanlines, vignettes, etc.) render - above everything else. Each plugin is responsible for its own - pointer-events and z-index. */} ); } +function SidebarSystemActions({ onNavigate }: { onNavigate: () => void }) { + const { t } = useI18n(); + const navigate = useNavigate(); + const { activeAction, isBusy, isRunning, pendingAction, runAction } = + useSystemActions(); + + const items: SystemActionItem[] = [ + { + action: "restart", + icon: RotateCw, + label: t.status.restartGateway, + runningLabel: t.status.restartingGateway, + spin: true, + }, + { + action: "update", + icon: Download, + label: t.status.updateHermes, + runningLabel: t.status.updatingHermes, + spin: false, + }, + ]; + + const handleClick = (action: SystemAction) => { + if (isBusy) return; + void runAction(action); + navigate("/sessions"); + onNavigate(); + }; + + return ( +
+ + {t.app.system} + + + + +
    + {items.map(({ action, icon: Icon, label, runningLabel, spin }) => { + const isPending = pendingAction === action; + const isActionRunning = + activeAction === action && isRunning && !isPending; + const busy = isPending || isActionRunning; + const displayLabel = isActionRunning ? runningLabel : label; + const disabled = isBusy && !busy; + + return ( +
  • + +
  • + ); + })} +
+
+ ); +} + interface NavItem { - icon: React.ComponentType<{ className?: string }>; + icon: ComponentType<{ className?: string }>; label: string; labelKey?: string; path: string; } + +interface SystemActionItem { + action: SystemAction; + icon: ComponentType<{ className?: string }>; + label: string; + runningLabel: string; + spin: boolean; +} diff --git a/web/src/components/DeleteConfirmDialog.tsx b/web/src/components/DeleteConfirmDialog.tsx new file mode 100644 index 0000000000..9e2e82c680 --- /dev/null +++ b/web/src/components/DeleteConfirmDialog.tsx @@ -0,0 +1,40 @@ +import { ConfirmDialog } from "@/components/ui/confirm-dialog"; +import { useI18n } from "@/i18n"; + +export function DeleteConfirmDialog({ + cancelLabel, + confirmLabel, + description, + loading, + onCancel, + onConfirm, + open, + title, +}: DeleteConfirmDialogProps) { + const { t } = useI18n(); + + return ( + + ); +} + +interface DeleteConfirmDialogProps { + cancelLabel?: string; + confirmLabel?: string; + description?: string; + loading: boolean; + onCancel: () => void; + onConfirm: () => void; + open: boolean; + title: string; +} diff --git a/web/src/components/PlatformsCard.tsx b/web/src/components/PlatformsCard.tsx new file mode 100644 index 0000000000..c0412e4005 --- /dev/null +++ b/web/src/components/PlatformsCard.tsx @@ -0,0 +1,97 @@ +import { AlertTriangle, Radio, Wifi, WifiOff } from "lucide-react"; +import type { PlatformStatus } from "@/lib/api"; +import { isoTimeAgo } from "@/lib/utils"; +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { useI18n } from "@/i18n"; + +export function PlatformsCard({ platforms }: PlatformsCardProps) { + const { t } = useI18n(); + const platformStateBadge: Record< + string, + { variant: "success" | "warning" | "destructive"; label: string } + > = { + connected: { variant: "success", label: t.status.connected }, + disconnected: { variant: "warning", label: t.status.disconnected }, + fatal: { variant: "destructive", label: t.status.error }, + }; + + return ( + + +
+ + + {t.status.connectedPlatforms} + +
+
+ + + {platforms.map(([name, info]) => { + const display = platformStateBadge[info.state] ?? { + variant: "outline" as const, + label: info.state, + }; + const IconComponent = + info.state === "connected" + ? Wifi + : info.state === "fatal" + ? AlertTriangle + : WifiOff; + + return ( +
+
+ + +
+ + {name} + + + {info.error_message && ( + + {info.error_message} + + )} + + {info.updated_at && ( + + {t.status.lastUpdate}: {isoTimeAgo(info.updated_at)} + + )} +
+
+ + + {display.variant === "success" && ( + + )} + {display.label} + +
+ ); + })} +
+
+ ); +} + +interface PlatformsCardProps { + platforms: [string, PlatformStatus][]; +} diff --git a/web/src/components/SidebarFooter.tsx b/web/src/components/SidebarFooter.tsx new file mode 100644 index 0000000000..e28623d722 --- /dev/null +++ b/web/src/components/SidebarFooter.tsx @@ -0,0 +1,40 @@ +import { Typography } from "@nous-research/ui"; +import { useSidebarStatus } from "@/hooks/useSidebarStatus"; +import { cn } from "@/lib/utils"; +import { useI18n } from "@/i18n"; + +export function SidebarFooter() { + const status = useSidebarStatus(); + const { t } = useI18n(); + + return ( +
+ + {status?.version != null ? `v${status.version}` : "—"} + + + + {t.app.footer.org} + +
+ ); +} diff --git a/web/src/components/SidebarStatusStrip.tsx b/web/src/components/SidebarStatusStrip.tsx new file mode 100644 index 0000000000..b96603cec4 --- /dev/null +++ b/web/src/components/SidebarStatusStrip.tsx @@ -0,0 +1,70 @@ +import { Link } from "react-router-dom"; +import type { StatusResponse } from "@/lib/api"; +import { useSidebarStatus } from "@/hooks/useSidebarStatus"; +import { cn } from "@/lib/utils"; +import { useI18n } from "@/i18n"; + +/** Gateway + session summary for the System sidebar block (no separate strip chrome). */ +export function SidebarStatusStrip() { + const status = useSidebarStatus(); + const { t } = useI18n(); + + if (status === null) { + return ( +
+
+
+ ); + } + + const gw = gatewayLine(status, t); + const { activeSessionsLabel, gatewayStatusLabel } = t.app; + + return ( + +
+

+ {gatewayStatusLabel}{" "} + {gw.label} +

+ +

+ {activeSessionsLabel}{" "} + + {status.active_sessions} + +

+
+ + ); +} + +function gatewayLine( + status: StatusResponse, + t: ReturnType["t"], +): { label: string; tone: string } { + const g = t.app.gatewayStrip; + const byState: Record = { + running: { label: g.running, tone: "text-success" }, + starting: { label: g.starting, tone: "text-warning" }, + startup_failed: { label: g.failed, tone: "text-destructive" }, + stopped: { label: g.stopped, tone: "text-muted-foreground" }, + }; + if (status.gateway_state && byState[status.gateway_state]) { + return byState[status.gateway_state]; + } + return status.gateway_running + ? { label: g.running, tone: "text-success" } + : { label: g.off, tone: "text-muted-foreground" }; +} diff --git a/web/src/components/ThemeSwitcher.tsx b/web/src/components/ThemeSwitcher.tsx index b3475bf460..778afc21e4 100644 --- a/web/src/components/ThemeSwitcher.tsx +++ b/web/src/components/ThemeSwitcher.tsx @@ -11,8 +11,12 @@ import { cn } from "@/lib/utils"; * glow) so users can preview the palette before committing. User-defined * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in * `BUILTIN_THEMES` render without swatches and apply the default palette. + * + * When placed at the bottom of a container (e.g. the sidebar rail), pass + * `dropUp` so the menu opens above the trigger instead of clipping below + * the viewport. */ -export function ThemeSwitcher() { +export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { const { themeName, availableThemes, setTheme } = useTheme(); const { t } = useI18n(); const [open, setOpen] = useState(false); @@ -73,7 +77,8 @@ export function ThemeSwitcher() { role="listbox" aria-label={t.theme?.title ?? "Theme"} className={cn( - "absolute right-0 top-full mt-1 z-50 min-w-[240px]", + "absolute z-50 min-w-[240px]", + dropUp ? "left-0 bottom-full mb-1" : "right-0 top-full mt-1", "border border-current/20 bg-background-base/95 backdrop-blur-sm", "shadow-[0_12px_32px_-8px_rgba(0,0,0,0.6)]", )} @@ -166,3 +171,7 @@ function PlaceholderSwatch() { /> ); } + +interface ThemeSwitcherProps { + dropUp?: boolean; +} diff --git a/web/src/components/ui/confirm-dialog.tsx b/web/src/components/ui/confirm-dialog.tsx new file mode 100644 index 0000000000..48e58264f8 --- /dev/null +++ b/web/src/components/ui/confirm-dialog.tsx @@ -0,0 +1,138 @@ +import { useEffect, useRef } from "react"; +import { createPortal } from "react-dom"; +import { AlertTriangle } from "lucide-react"; +import { cn } from "@/lib/utils"; +import { Button } from "@/components/ui/button"; + +export function ConfirmDialog({ + cancelLabel = "Cancel", + confirmLabel = "Confirm", + description, + destructive = false, + loading = false, + onCancel, + onConfirm, + open, + title, +}: ConfirmDialogProps) { + const dialogRef = useRef(null); + + // Focus the confirm button when opened; trap ESC to cancel. + useEffect(() => { + if (!open) return; + + const prevActive = document.activeElement as HTMLElement | null; + dialogRef.current + ?.querySelector("[data-confirm]") + ?.focus(); + + const onKey = (e: KeyboardEvent) => { + if (e.key === "Escape") { + e.preventDefault(); + onCancel(); + } + }; + + document.addEventListener("keydown", onKey); + const prevOverflow = document.body.style.overflow; + document.body.style.overflow = "hidden"; + + return () => { + document.removeEventListener("keydown", onKey); + document.body.style.overflow = prevOverflow; + prevActive?.focus?.(); + }; + }, [open, onCancel]); + + if (!open) return null; + + return createPortal( +
{ + if (e.target === e.currentTarget) onCancel(); + }} + className={cn( + "fixed inset-0 z-50 flex items-center justify-center", + "bg-black/60 backdrop-blur-sm", + "animate-[fade-in_150ms_ease-out]", + )} + > +
+
+ {destructive && ( +
+ +
+ )} + +
+

+ {title} +

+ + {description && ( +

+ {description} +

+ )} +
+
+ +
+ + +
+
+
, + document.body, + ); +} + +interface ConfirmDialogProps { + cancelLabel?: string; + confirmLabel?: string; + description?: string; + destructive?: boolean; + loading?: boolean; + onCancel: () => void; + onConfirm: () => void; + open: boolean; + title: string; +} diff --git a/web/src/components/ui/segmented.tsx b/web/src/components/ui/segmented.tsx new file mode 100644 index 0000000000..eb4346e9e8 --- /dev/null +++ b/web/src/components/ui/segmented.tsx @@ -0,0 +1,80 @@ +import { cn } from "@/lib/utils"; + +export function Segmented({ + className, + onChange, + options, + size = "sm", + value, +}: SegmentedProps) { + return ( +
+ {options.map((opt) => { + const active = opt.value === value; + + return ( + + ); + })} +
+ ); +} + +export function FilterGroup({ + children, + className, + label, +}: FilterGroupProps) { + return ( +
+ + {label} + + {children} +
+ ); +} + +interface FilterGroupProps { + children: React.ReactNode; + className?: string; + label: string; +} + +interface SegmentedOption { + label: string; + value: T; +} + +interface SegmentedProps { + className?: string; + onChange: (value: T) => void; + options: SegmentedOption[]; + size?: "sm" | "md"; + value: T; +} diff --git a/web/src/components/ui/switch.tsx b/web/src/components/ui/switch.tsx index fe36c77553..ad2031277f 100644 --- a/web/src/components/ui/switch.tsx +++ b/web/src/components/ui/switch.tsx @@ -5,15 +5,18 @@ export function Switch({ onCheckedChange, className, disabled, + id, }: { checked: boolean; onCheckedChange: (v: boolean) => void; className?: string; disabled?: boolean; + id?: string; }) { return ( + ))} +
+ + , + ); + return () => { + setAfterTitle(null); + setEnd(null); + }; + }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]); + useEffect(() => { load(); }, [load]); return (
- {/* Period selector */} -
- {t.analytics.period} - {PERIODS.map((p) => ( - - ))} -
- {loading && !data && (
diff --git a/web/src/pages/ConfigPage.tsx b/web/src/pages/ConfigPage.tsx index b72f0dcdb6..80cef29e4c 100644 --- a/web/src/pages/ConfigPage.tsx +++ b/web/src/pages/ConfigPage.tsx @@ -1,4 +1,4 @@ -import { useEffect, useRef, useState, useMemo } from "react"; +import { useEffect, useLayoutEffect, useRef, useState, useMemo } from "react"; import { Code, Download, @@ -8,7 +8,6 @@ import { Search, Upload, X, - ChevronRight, Settings2, FileText, Settings, @@ -27,6 +26,7 @@ import { MessageCircle, Wrench, FileQuestion, + Filter, } from "lucide-react"; import { api } from "@/lib/api"; import { getNestedValue, setNestedValue } from "@/lib/nested"; @@ -38,6 +38,7 @@ import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; import { Badge } from "@/components/ui/badge"; import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; /* ------------------------------------------------------------------ */ /* Helpers */ @@ -85,6 +86,35 @@ export default function ConfigPage() { const { toast, showToast } = useToast(); const fileInputRef = useRef(null); const { t } = useI18n(); + const { setEnd } = usePageHeader(); + + useLayoutEffect(() => { + if (!config || !schema) { + setEnd(null); + return; + } + setEnd( +
+ + setSearchQuery(e.target.value)} + /> + {searchQuery && ( + + )} +
, + ); + return () => setEnd(null); + }, [config, schema, searchQuery, setEnd, t.common.search]); function prettyCategoryName(cat: string): string { const key = cat as keyof typeof t.config.categories; @@ -366,62 +396,66 @@ export default function ConfigPage() { ) : ( /* ═══════════════ Form Mode ═══════════════ */ -
- {/* ---- Sidebar — horizontal scroll on mobile, fixed column on sm+ ---- */} -
-
- {/* Search */} -
- - setSearchQuery(e.target.value)} - /> - {searchQuery && ( - - )} -
+
+ {/* ---- Filter panel ---- */} + {/* ---- Content ---- */}
diff --git a/web/src/pages/CronPage.tsx b/web/src/pages/CronPage.tsx index 5db9bac414..10fba6913e 100644 --- a/web/src/pages/CronPage.tsx +++ b/web/src/pages/CronPage.tsx @@ -1,9 +1,11 @@ -import { useEffect, useState } from "react"; +import { useCallback, useEffect, useState } from "react"; import { Clock, Pause, Play, Plus, Trash2, Zap } from "lucide-react"; import { H2 } from "@nous-research/ui"; import { api } from "@/lib/api"; import type { CronJob } from "@/lib/api"; +import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog"; import { useToast } from "@/hooks/useToast"; +import { useConfirmDelete } from "@/hooks/useConfirmDelete"; import { Toast } from "@/components/Toast"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; @@ -40,17 +42,17 @@ export default function CronPage() { const [deliver, setDeliver] = useState("local"); const [creating, setCreating] = useState(false); - const loadJobs = () => { + const loadJobs = useCallback(() => { api .getCronJobs() .then(setJobs) .catch(() => showToast(t.common.loading, "error")) .finally(() => setLoading(false)); - }; + }, [showToast, t.common.loading]); useEffect(() => { loadJobs(); - }, []); + }, [loadJobs]); const handleCreate = async () => { if (!prompt.trim() || !schedule.trim()) { @@ -113,18 +115,25 @@ export default function CronPage() { } }; - const handleDelete = async (job: CronJob) => { - try { - await api.deleteCronJob(job.id); - showToast( - `${t.common.delete}: "${job.name || job.prompt.slice(0, 30)}"`, - "success", - ); - loadJobs(); - } catch (e) { - showToast(`${t.status.error}: ${e}`, "error"); - } - }; + const jobDelete = useConfirmDelete({ + onDelete: useCallback( + async (id: string) => { + const job = jobs.find((j) => j.id === id); + try { + await api.deleteCronJob(id); + showToast( + `${t.common.delete}: "${job?.name || (job?.prompt ?? "").slice(0, 30) || id}"`, + "success", + ); + loadJobs(); + } catch (e) { + showToast(`${t.status.error}: ${e}`, "error"); + throw e; + } + }, + [jobs, loadJobs, showToast, t.common.delete, t.status.error], + ), + }); if (loading) { return ( @@ -134,10 +143,27 @@ export default function CronPage() { ); } + const pendingJob = jobDelete.pendingId + ? jobs.find((j) => j.id === jobDelete.pendingId) + : null; + return (
+ + {/* Create new job form */} @@ -311,7 +337,7 @@ export default function CronPage() { size="icon" title={t.common.delete} aria-label={t.common.delete} - onClick={() => handleDelete(job)} + onClick={() => jobDelete.requestDelete(job.id)} > diff --git a/web/src/pages/EnvPage.tsx b/web/src/pages/EnvPage.tsx index 68c4aa6790..ebb528a347 100644 --- a/web/src/pages/EnvPage.tsx +++ b/web/src/pages/EnvPage.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState, useMemo } from "react"; +import { useCallback, useEffect, useMemo, useState } from "react"; import { Eye, EyeOff, @@ -16,8 +16,10 @@ import { } from "lucide-react"; import { api } from "@/lib/api"; import type { EnvVarInfo } from "@/lib/api"; -import { useToast } from "@/hooks/useToast"; +import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog"; import { Toast } from "@/components/Toast"; +import { useConfirmDelete } from "@/hooks/useConfirmDelete"; +import { useToast } from "@/hooks/useToast"; import { OAuthProvidersCard } from "@/components/OAuthProvidersCard"; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; @@ -95,6 +97,7 @@ function EnvVarRow({ onClear, onReveal, onCancelEdit, + clearDialogOpen = false, compact = false, }: { varKey: string; @@ -107,6 +110,7 @@ function EnvVarRow({ onClear: (key: string) => void; onReveal: (key: string) => void; onCancelEdit: (key: string) => void; + clearDialogOpen?: boolean; compact?: boolean; }) { const { t } = useI18n(); @@ -219,7 +223,7 @@ function EnvVarRow({ {info.is_set && ( @@ -261,6 +265,7 @@ function ProviderGroupCard({ onClear, onReveal, onCancelEdit, + clearDialogOpen = false, }: { group: ProviderGroup; edits: Record; @@ -271,6 +276,7 @@ function ProviderGroupCard({ onClear: (key: string) => void; onReveal: (key: string) => void; onCancelEdit: (key: string) => void; + clearDialogOpen?: boolean; }) { const [expanded, setExpanded] = useState(false); const { t } = useI18n(); @@ -325,6 +331,7 @@ function ProviderGroupCard({ key={key} varKey={key} info={info} compact edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} onSave={onSave} onClear={onClear} onReveal={onReveal} onCancelEdit={onCancelEdit} + clearDialogOpen={clearDialogOpen} /> ))} {/* Base URLs (secondary) */} @@ -333,6 +340,7 @@ function ProviderGroupCard({ key={key} varKey={key} info={info} compact edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} onSave={onSave} onClear={onClear} onReveal={onReveal} onCancelEdit={onCancelEdit} + clearDialogOpen={clearDialogOpen} /> ))} {/* Anything else */} @@ -341,6 +349,7 @@ function ProviderGroupCard({ key={key} varKey={key} info={info} compact edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} onSave={onSave} onClear={onClear} onReveal={onReveal} onCancelEdit={onCancelEdit} + clearDialogOpen={clearDialogOpen} /> ))}
@@ -390,24 +399,30 @@ export default function EnvPage() { } }; - const handleClear = async (key: string) => { - setSaving(key); - try { - await api.deleteEnvVar(key); - setVars((prev) => - prev - ? { ...prev, [key]: { ...prev[key], is_set: false, redacted_value: null } } - : prev, - ); - setEdits((prev) => { const n = { ...prev }; delete n[key]; return n; }); - setRevealed((prev) => { const n = { ...prev }; delete n[key]; return n; }); - showToast(`${key} ${t.common.removed}`, "success"); - } catch (e) { - showToast(`${t.common.failedToRemove} ${key}: ${e}`, "error"); - } finally { - setSaving(null); - } - }; + const keyClear = useConfirmDelete({ + onDelete: useCallback( + async (key: string) => { + setSaving(key); + try { + await api.deleteEnvVar(key); + setVars((prev) => + prev + ? { ...prev, [key]: { ...prev[key], is_set: false, redacted_value: null } } + : prev, + ); + setEdits((prev) => { const n = { ...prev }; delete n[key]; return n; }); + setRevealed((prev) => { const n = { ...prev }; delete n[key]; return n; }); + showToast(`${key} ${t.common.removed}`, "success"); + } catch (e) { + showToast(`${t.common.failedToRemove} ${key}: ${e}`, "error"); + throw e; + } finally { + setSaving(null); + } + }, + [showToast, t.common.removed, t.common.failedToRemove], + ), + }); const handleReveal = async (key: string) => { if (revealed[key]) { @@ -488,10 +503,29 @@ export default function EnvPage() { const totalProviders = providerGroups.length; const configuredProviders = providerGroups.filter((g) => g.hasAnySet).length; + const pendingClearKey = keyClear.pendingId; + const pendingKeyDescription = + pendingClearKey && vars + ? vars[pendingClearKey]?.description + : undefined; + return (
+ +

@@ -530,7 +564,8 @@ export default function EnvPage() { key={group.name} group={group} edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} - onSave={handleSave} onClear={handleClear} onReveal={handleReveal} onCancelEdit={cancelEdit} + onSave={handleSave} onClear={keyClear.requestDelete} onReveal={handleReveal} onCancelEdit={cancelEdit} + clearDialogOpen={keyClear.isOpen} /> ))} @@ -557,7 +592,8 @@ export default function EnvPage() { ))} @@ -566,7 +602,8 @@ export default function EnvPage() { category={category} unsetEntries={unsetEntries} edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} - onSave={handleSave} onClear={handleClear} onReveal={handleReveal} onCancelEdit={cancelEdit} + onSave={handleSave} onClear={keyClear.requestDelete} onReveal={handleReveal} onCancelEdit={cancelEdit} + clearDialogOpen={keyClear.isOpen} /> )} @@ -592,6 +629,7 @@ function CollapsibleUnset({ onClear, onReveal, onCancelEdit, + clearDialogOpen = false, }: { category: string; unsetEntries: [string, EnvVarInfo][]; @@ -603,6 +641,7 @@ function CollapsibleUnset({ onClear: (key: string) => void; onReveal: (key: string) => void; onCancelEdit: (key: string) => void; + clearDialogOpen?: boolean; }) { const [collapsed, setCollapsed] = useState(true); const { t } = useI18n(); @@ -625,6 +664,7 @@ function CollapsibleUnset({ key={key} varKey={key} info={info} edits={edits} setEdits={setEdits} revealed={revealed} saving={saving} onSave={onSave} onClear={onClear} onReveal={onReveal} onCancelEdit={onCancelEdit} + clearDialogOpen={clearDialogOpen} /> ))} diff --git a/web/src/pages/LogsPage.tsx b/web/src/pages/LogsPage.tsx index ec4d7bc16a..e376ef29de 100644 --- a/web/src/pages/LogsPage.tsx +++ b/web/src/pages/LogsPage.tsx @@ -1,13 +1,14 @@ -import { useEffect, useState, useCallback, useRef } from "react"; -import { FileText, RefreshCw, ChevronRight } from "lucide-react"; -import { H2 } from "@nous-research/ui"; +import { useEffect, useLayoutEffect, useState, useCallback, useRef } from "react"; +import { FileText, RefreshCw } from "lucide-react"; import { api } from "@/lib/api"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Button } from "@/components/ui/button"; import { Badge } from "@/components/ui/badge"; import { Switch } from "@/components/ui/switch"; import { Label } from "@/components/ui/label"; +import { FilterGroup, Segmented } from "@/components/ui/segmented"; import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; const FILES = ["agent", "errors", "gateway"] as const; const LEVELS = ["ALL", "DEBUG", "INFO", "WARNING", "ERROR"] as const; @@ -34,38 +35,8 @@ const LINE_COLORS: Record = { debug: "text-muted-foreground/60", }; -function SidebarHeading({ children }: { children: React.ReactNode }) { - return ( - - {children} - - ); -} - -function SidebarItem({ - label, - value, - current, - onChange, -}: SidebarItemProps) { - const isActive = current === value; - return ( - - ); -} +const toOptions = (values: readonly T[]) => + values.map((v) => ({ value: v, label: v })); export default function LogsPage() { const [file, setFile] = useState<(typeof FILES)[number]>("agent"); @@ -79,6 +50,7 @@ export default function LogsPage() { const [error, setError] = useState(null); const scrollRef = useRef(null); const { t } = useI18n(); + const { setAfterTitle, setEnd } = usePageHeader(); const fetchLogs = useCallback(() => { setLoading(true); @@ -97,6 +69,66 @@ export default function LogsPage() { .finally(() => setLoading(false)); }, [file, lineCount, level, component]); + useLayoutEffect(() => { + setAfterTitle( + + {loading && ( +

+ )} + + {file} · {level} · {component} + + , + ); + setEnd( +
+
+ + + {autoRefresh && ( + + + {t.common.live} + + )} +
+ +
, + ); + return () => { + setAfterTitle(null); + setEnd(null); + }; + }, [ + autoRefresh, + component, + file, + level, + loading, + setAfterTitle, + setEnd, + t.common.live, + t.common.refresh, + t.logs.autoRefresh, + fetchLogs, + ]); + useEffect(() => { fetchLogs(); }, [fetchLogs]); @@ -109,145 +141,80 @@ export default function LogsPage() { return (
- {/* ═══════════════ Header ═══════════════ */} -
-
- -

{t.logs.title}

- {loading && ( -
- )} - - {file} · {level} · {component} - -
-
-
- - - {autoRefresh && ( - - - {t.common.live} - - )} -
- -
-
- - {/* ═══════════════ Sidebar + Content ═══════════════ */} + {/* ═══════════════ Filter toolbar ═══════════════ */}
- {/* ---- Sidebar ---- */} -
-
- {t.logs.file} - {FILES.map((f) => ( - - ))} + + + - {t.logs.level} - {LEVELS.map((l) => ( - - ))} + + + - {t.logs.component} - {COMPONENTS.map((c) => ( - - ))} + + + - {t.logs.lines} - {LINE_COUNTS.map((n) => ( - - setLineCount(Number(v) as (typeof LINE_COUNTS)[number]) - } - /> - ))} -
-
- - {/* ---- Content ---- */} -
- - - - - {file}.log - - - - {error && ( -
-

{error}

-
- )} - -
- {lines.length === 0 && !loading && ( -

- {t.logs.noLogLines} -

- )} - {lines.map((line, i) => { - const cls = classifyLine(line); - return ( -
- {line} -
- ); - })} -
-
-
-
+ + + setLineCount(Number(v) as (typeof LINE_COUNTS)[number]) + } + options={LINE_COUNTS.map((n) => ({ + value: String(n), + label: String(n), + }))} + /> +
+ + {/* ═══════════════ Log viewer ═══════════════ */} + + + + + {file}.log + + + + {error && ( +
+

{error}

+
+ )} + +
+ {lines.length === 0 && !loading && ( +

+ {t.logs.noLogLines} +

+ )} + {lines.map((line, i) => { + const cls = classifyLine(line); + return ( +
+ {line} +
+ ); + })} +
+
+
); } - -interface SidebarItemProps { - label: string; - value: T; - current: T; - onChange: (v: T) => void; -} diff --git a/web/src/pages/SessionsPage.tsx b/web/src/pages/SessionsPage.tsx index 370b499a8b..ad6bb74cef 100644 --- a/web/src/pages/SessionsPage.tsx +++ b/web/src/pages/SessionsPage.tsx @@ -1,8 +1,12 @@ -import { useEffect, useState, useCallback, useRef } from "react"; +import { useEffect, useLayoutEffect, useState, useCallback, useRef } from "react"; import { + AlertTriangle, + CheckCircle2, ChevronDown, ChevronLeft, ChevronRight, + Database, + Loader2, MessageSquare, Search, Trash2, @@ -13,19 +17,27 @@ import { Hash, X, } from "lucide-react"; -import { H2 } from "@nous-research/ui"; import { api } from "@/lib/api"; import type { SessionInfo, SessionMessage, SessionSearchResult, + StatusResponse, } from "@/lib/api"; import { timeAgo } from "@/lib/utils"; import { Markdown } from "@/components/Markdown"; +import { PlatformsCard } from "@/components/PlatformsCard"; +import { Toast } from "@/components/Toast"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { DeleteConfirmDialog } from "@/components/DeleteConfirmDialog"; +import { useConfirmDelete } from "@/hooks/useConfirmDelete"; import { Input } from "@/components/ui/input"; +import { useSystemActions } from "@/contexts/useSystemActions"; +import { useToast } from "@/hooks/useToast"; import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; const SOURCE_CONFIG: Record = { @@ -381,7 +393,62 @@ export default function SessionsPage() { >(null); const [searching, setSearching] = useState(false); const debounceRef = useRef>(null); + const logScrollRef = useRef(null); + const [status, setStatus] = useState(null); + const [overviewSessions, setOverviewSessions] = useState([]); + const { toast, showToast } = useToast(); const { t } = useI18n(); + const { setAfterTitle, setEnd } = usePageHeader(); + const { activeAction, actionStatus, dismissLog } = useSystemActions(); + + useLayoutEffect(() => { + if (loading) { + setAfterTitle(null); + setEnd(null); + return; + } + setAfterTitle( + + {total} + , + ); + setEnd( +
+ {searching ? ( +
+ ) : ( + + )} + setSearch(e.target.value)} + className="h-8 pr-7 pl-8 text-xs" + /> + {search && ( + + )} +
, + ); + return () => { + setAfterTitle(null); + setEnd(null); + }; + }, [ + loading, + search, + searching, + setAfterTitle, + setEnd, + t.sessions.searchPlaceholder, + total, + ]); const loadSessions = useCallback((p: number) => { setLoading(true); @@ -399,6 +466,24 @@ export default function SessionsPage() { loadSessions(page); }, [loadSessions, page]); + useEffect(() => { + const loadOverview = () => { + api.getStatus().then(setStatus).catch(() => {}); + api + .getSessions(50) + .then((r) => setOverviewSessions(r.sessions)) + .catch(() => {}); + }; + loadOverview(); + const id = setInterval(loadOverview, 5000); + return () => clearInterval(id); + }, []); + + useEffect(() => { + const el = logScrollRef.current; + if (el) el.scrollTop = el.scrollHeight; + }, [actionStatus?.lines]); + // Debounced FTS search useEffect(() => { if (debounceRef.current) clearTimeout(debounceRef.current); @@ -423,16 +508,27 @@ export default function SessionsPage() { }; }, [search]); - const handleDelete = async (id: string) => { - try { - await api.deleteSession(id); - setSessions((prev) => prev.filter((s) => s.id !== id)); - setTotal((prev) => prev - 1); - if (expandedId === id) setExpandedId(null); - } catch { - // ignore - } - }; + const sessionDelete = useConfirmDelete({ + onDelete: useCallback( + async (id: string) => { + try { + await api.deleteSession(id); + setSessions((prev) => prev.filter((s) => s.id !== id)); + setTotal((prev) => prev - 1); + if (expandedId === id) setExpandedId(null); + showToast(t.sessions.sessionDeleted, "success"); + } catch { + showToast(t.sessions.failedToDelete, "error"); + throw new Error("delete failed"); + } + }, + [expandedId, showToast, t.sessions.sessionDeleted, t.sessions.failedToDelete], + ), + }); + + const pendingSession = sessionDelete.pendingId + ? sessions.find((s) => s.id === sessionDelete.pendingId) + : null; // Build snippet map from search results (session_id → snippet) const snippetMap = new Map(); @@ -448,6 +544,36 @@ export default function SessionsPage() { ? sessions.filter((s) => snippetMap.has(s.id)) : sessions; + const platformEntries = status + ? Object.entries(status.gateway_platforms ?? {}) + : []; + const recentSessions = overviewSessions + .filter((s) => !s.is_active) + .slice(0, 5); + + const alerts: { message: string; detail?: string }[] = []; + if (status) { + if (status.gateway_state === "startup_failed") { + alerts.push({ + message: t.status.gatewayFailedToStart, + detail: status.gateway_exit_reason ?? undefined, + }); + } + const failedPlatformEntries = platformEntries.filter( + ([, info]) => info.state === "fatal" || info.state === "disconnected", + ); + for (const [name, info] of failedPlatformEntries) { + const stateLabel = + info.state === "fatal" + ? t.status.platformError + : t.status.platformDisconnected; + alerts.push({ + message: `${name.charAt(0).toUpperCase() + name.slice(1)} ${stateLabel}`, + detail: info.error_message ?? undefined, + }); + } + } + if (loading) { return (
@@ -458,38 +584,159 @@ export default function SessionsPage() { return (
- {/* Header outside card for lighter feel */} -
-
- -

{t.sessions.title}

- - {total} - + + + + + {alerts.length > 0 && ( +
+
+ +
+ {alerts.map((alert, i) => ( +
+

+ {alert.message} +

+ {alert.detail && ( +

+ {alert.detail} +

+ )} +
+ ))} +
+
-
- {searching ? ( -
- ) : ( - - )} - setSearch(e.target.value)} - className="pl-8 pr-7 h-8 text-xs" - /> - {search && ( + )} + + {activeAction && ( +
+
+
+ {actionStatus?.running ? ( + + ) : actionStatus?.exit_code === 0 ? ( + + ) : actionStatus !== null ? ( + + ) : ( + + )} + + + {activeAction === "restart" + ? t.status.restartGateway + : t.status.updateHermes} + + + + {actionStatus?.running + ? t.status.running + : actionStatus?.exit_code === 0 + ? t.status.actionFinished + : actionStatus + ? `${t.status.actionFailed} (${actionStatus.exit_code ?? "?"})` + : t.common.loading} + +
+ - )} +
+ +
+            {actionStatus?.lines && actionStatus.lines.length > 0
+              ? actionStatus.lines.join("\n")
+              : t.status.waitingForOutput}
+          
-
+ )} + + {platformEntries.length > 0 && status && ( + + )} + + {recentSessions.length > 0 && ( + + +
+ + + {t.status.recentSessions} + +
+
+ + + {recentSessions.map((s) => ( +
+
+ + {s.title ?? t.common.untitled} + + + + + {(s.model ?? t.common.unknown).split("/").pop()} + {" "} + · {s.message_count} {t.common.msgs} ·{" "} + {timeAgo(s.last_active)} + + + {s.preview && ( + + {s.preview} + + )} +
+ + + + {s.source ?? "local"} + +
+ ))} +
+
+ )} {filtered.length === 0 ? (
@@ -516,7 +763,7 @@ export default function SessionsPage() { onToggle={() => setExpandedId((prev) => (prev === s.id ? null : s.id)) } - onDelete={() => handleDelete(s.id)} + onDelete={() => sessionDelete.requestDelete(s.id)} /> ))}
diff --git a/web/src/pages/SkillsPage.tsx b/web/src/pages/SkillsPage.tsx index a007b3b9e1..c951d249e9 100644 --- a/web/src/pages/SkillsPage.tsx +++ b/web/src/pages/SkillsPage.tsx @@ -1,9 +1,8 @@ -import { useEffect, useState, useMemo } from "react"; +import { useEffect, useLayoutEffect, useState, useMemo } from "react"; import { Package, Search, Wrench, - ChevronRight, X, Cpu, Globe, @@ -14,8 +13,8 @@ import { Blocks, Code, Zap, + Filter, } from "lucide-react"; -import { H2 } from "@nous-research/ui"; import { api } from "@/lib/api"; import type { SkillInfo, ToolsetInfo } from "@/lib/api"; import { useToast } from "@/hooks/useToast"; @@ -25,6 +24,7 @@ import { Badge } from "@/components/ui/badge"; import { Input } from "@/components/ui/input"; import { Switch } from "@/components/ui/switch"; import { useI18n } from "@/i18n"; +import { usePageHeader } from "@/contexts/usePageHeader"; /* ------------------------------------------------------------------ */ /* Types & helpers */ @@ -98,6 +98,7 @@ export default function SkillsPage() { const [togglingSkills, setTogglingSkills] = useState>(new Set()); const { toast, showToast } = useToast(); const { t } = useI18n(); + const { setAfterTitle, setEnd } = usePageHeader(); useEffect(() => { Promise.all([api.getSkills(), api.getToolsets()]) @@ -182,6 +183,53 @@ export default function SkillsPage() { const enabledCount = skills.filter((s) => s.enabled).length; + useLayoutEffect(() => { + if (loading) { + setAfterTitle(null); + setEnd(null); + return; + } + setAfterTitle( + + {t.skills.enabledOf + .replace("{enabled}", String(enabledCount)) + .replace("{total}", String(skills.length))} + , + ); + setEnd( +
+ + setSearch(e.target.value)} + /> + {search && ( + + )} +
, + ); + return () => { + setAfterTitle(null); + setEnd(null); + }; + }, [ + enabledCount, + loading, + search, + setAfterTitle, + setEnd, + skills.length, + t, + ]); + const filteredToolsets = useMemo(() => { return toolsets.filter( (ts) => @@ -205,122 +253,98 @@ export default function SkillsPage() {
- {/* ═══════════════ Header ═══════════════ */} -
-
- -

{t.skills.title}

- - {t.skills.enabledOf - .replace("{enabled}", String(enabledCount)) - .replace("{total}", String(skills.length))} - -
-
+ {/* ═══════════════ Filter panel + Content ═══════════════ */} +
+ {/* ---- Filter panel ---- */} + {/* ---- Content ---- */}
@@ -522,9 +546,39 @@ function SkillRow({ ); } +function PanelItem({ active, icon: Icon, label, onClick }: PanelItemProps) { + return ( + + ); +} + +interface PanelItemProps { + active: boolean; + icon: React.ComponentType<{ className?: string }>; + label: string; + onClick: () => void; +} + interface SkillRowProps { + noDescriptionLabel: string; + onToggle: () => void; skill: SkillInfo; toggling: boolean; - onToggle: () => void; - noDescriptionLabel: string; } diff --git a/web/src/pages/StatusPage.tsx b/web/src/pages/StatusPage.tsx deleted file mode 100644 index 3c213b5cbb..0000000000 --- a/web/src/pages/StatusPage.tsx +++ /dev/null @@ -1,614 +0,0 @@ -import { useEffect, useRef, useState } from "react"; -import { - Activity, - AlertTriangle, - CheckCircle2, - Clock, - Cpu, - Database, - Download, - Loader2, - Radio, - RotateCw, - Wifi, - WifiOff, - Wrench, - X, -} from "lucide-react"; -import { Cell, Grid } from "@nous-research/ui"; -import { api } from "@/lib/api"; -import type { - ActionStatusResponse, - PlatformStatus, - SessionInfo, - StatusResponse, -} from "@/lib/api"; -import { cn, timeAgo, isoTimeAgo } from "@/lib/utils"; -import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { Badge } from "@/components/ui/badge"; -import { Button } from "@/components/ui/button"; -import { Toast } from "@/components/Toast"; -import { useI18n } from "@/i18n"; - -const ACTION_NAMES: Record<"restart" | "update", string> = { - restart: "gateway-restart", - update: "hermes-update", -}; - -export default function StatusPage() { - const [status, setStatus] = useState(null); - const [sessions, setSessions] = useState([]); - const [pendingAction, setPendingAction] = useState< - "restart" | "update" | null - >(null); - const [activeAction, setActiveAction] = useState<"restart" | "update" | null>( - null, - ); - const [actionStatus, setActionStatus] = useState( - null, - ); - const [toast, setToast] = useState(null); - const logScrollRef = useRef(null); - const { t } = useI18n(); - - useEffect(() => { - const load = () => { - api - .getStatus() - .then(setStatus) - .catch(() => {}); - api - .getSessions(50) - .then((resp) => setSessions(resp.sessions)) - .catch(() => {}); - }; - load(); - const interval = setInterval(load, 5000); - return () => clearInterval(interval); - }, []); - - useEffect(() => { - if (!toast) return; - const timer = setTimeout(() => setToast(null), 4000); - return () => clearTimeout(timer); - }, [toast]); - - useEffect(() => { - if (!activeAction) return; - const name = ACTION_NAMES[activeAction]; - let cancelled = false; - - const poll = async () => { - try { - const resp = await api.getActionStatus(name); - if (cancelled) return; - setActionStatus(resp); - if (!resp.running) { - const ok = resp.exit_code === 0; - setToast({ - type: ok ? "success" : "error", - message: ok - ? t.status.actionFinished - : `${t.status.actionFailed} (exit ${resp.exit_code ?? "?"})`, - }); - return; - } - } catch { - // transient fetch error; keep polling - } - if (!cancelled) setTimeout(poll, 1500); - }; - - poll(); - return () => { - cancelled = true; - }; - }, [activeAction, t.status.actionFinished, t.status.actionFailed]); - - useEffect(() => { - const el = logScrollRef.current; - if (el) el.scrollTop = el.scrollHeight; - }, [actionStatus?.lines]); - - const runAction = async (action: "restart" | "update") => { - setPendingAction(action); - setActionStatus(null); - try { - if (action === "restart") { - await api.restartGateway(); - } else { - await api.updateHermes(); - } - setActiveAction(action); - } catch (err) { - const detail = err instanceof Error ? err.message : String(err); - setToast({ - type: "error", - message: `${t.status.actionFailed}: ${detail}`, - }); - } finally { - setPendingAction(null); - } - }; - - const dismissLog = () => { - setActiveAction(null); - setActionStatus(null); - }; - - if (!status) { - return ( -
-
-
- ); - } - - const PLATFORM_STATE_BADGE: Record< - string, - { variant: "success" | "warning" | "destructive"; label: string } - > = { - connected: { variant: "success", label: t.status.connected }, - disconnected: { variant: "warning", label: t.status.disconnected }, - fatal: { variant: "destructive", label: t.status.error }, - }; - - const GATEWAY_STATE_DISPLAY: Record< - string, - { badge: "success" | "warning" | "destructive" | "outline"; label: string } - > = { - running: { badge: "success", label: t.status.running }, - starting: { badge: "warning", label: t.status.starting }, - startup_failed: { badge: "destructive", label: t.status.failed }, - stopped: { badge: "outline", label: t.status.stopped }, - }; - - function gatewayValue(): string { - if (status!.gateway_running && status!.gateway_health_url) - return status!.gateway_health_url; - if (status!.gateway_running && status!.gateway_pid) - return `${t.status.pid} ${status!.gateway_pid}`; - if (status!.gateway_running) return t.status.runningRemote; - if (status!.gateway_state === "startup_failed") return t.status.startFailed; - return t.status.notRunning; - } - - function gatewayBadge() { - const info = status!.gateway_state - ? GATEWAY_STATE_DISPLAY[status!.gateway_state] - : null; - if (info) return info; - return status!.gateway_running - ? { badge: "success" as const, label: t.status.running } - : { badge: "outline" as const, label: t.common.off }; - } - - const gwBadge = gatewayBadge(); - - const items = [ - { - icon: Cpu, - label: t.status.agent, - value: `v${status.version}`, - badgeText: t.common.live, - badgeVariant: "success" as const, - }, - { - icon: Radio, - label: t.status.gateway, - value: gatewayValue(), - badgeText: gwBadge.label, - badgeVariant: gwBadge.badge, - }, - { - icon: Activity, - label: t.status.activeSessions, - value: - status.active_sessions > 0 - ? `${status.active_sessions} ${t.status.running.toLowerCase()}` - : t.status.noneRunning, - badgeText: status.active_sessions > 0 ? t.common.live : t.common.off, - badgeVariant: (status.active_sessions > 0 ? "success" : "outline") as - | "success" - | "outline", - }, - ]; - - const platforms = Object.entries(status.gateway_platforms ?? {}); - const activeSessions = sessions.filter((s) => s.is_active); - const recentSessions = sessions.filter((s) => !s.is_active).slice(0, 5); - - // Collect alerts that need attention - const alerts: { message: string; detail?: string }[] = []; - if (status.gateway_state === "startup_failed") { - alerts.push({ - message: t.status.gatewayFailedToStart, - detail: status.gateway_exit_reason ?? undefined, - }); - } - const failedPlatforms = platforms.filter( - ([, info]) => info.state === "fatal" || info.state === "disconnected", - ); - for (const [name, info] of failedPlatforms) { - const stateLabel = - info.state === "fatal" - ? t.status.platformError - : t.status.platformDisconnected; - alerts.push({ - message: `${name.charAt(0).toUpperCase() + name.slice(1)} ${stateLabel}`, - detail: info.error_message ?? undefined, - }); - } - - return ( -
- - - {alerts.length > 0 && ( -
-
- -
- {alerts.map((alert, i) => ( -
-

- {alert.message} -

- {alert.detail && ( -

- {alert.detail} -

- )} -
- ))} -
-
-
- )} - - - {items.map(({ icon: Icon, label, value, badgeText, badgeVariant }) => ( - -
- {label} - -
- -
- {value} -
- - {badgeText && ( - - {badgeVariant === "success" && ( - - )} - {badgeText} - - )} -
- ))} - - -
- - {t.status.actions} - - -
- -
- - - -
-
-
- - {activeAction && ( -
-
-
- {actionStatus?.running ? ( - - ) : actionStatus?.exit_code === 0 ? ( - - ) : actionStatus !== null ? ( - - ) : ( - - )} - - - {activeAction === "restart" - ? t.status.restartGateway - : t.status.updateHermes} - - - - {actionStatus?.running - ? t.status.running - : actionStatus?.exit_code === 0 - ? t.status.actionFinished - : actionStatus - ? `${t.status.actionFailed} (${actionStatus.exit_code ?? "?"})` - : t.common.loading} - -
- - -
- -
-            {actionStatus?.lines && actionStatus.lines.length > 0
-              ? actionStatus.lines.join("\n")
-              : t.status.waitingForOutput}
-          
-
- )} - - {platforms.length > 0 && ( - - )} - - {activeSessions.length > 0 && ( - - -
- - - {t.status.activeSessions} - -
-
- - - {activeSessions.map((s) => ( -
-
-
- - {s.title ?? t.common.untitled} - - - - - {t.common.live} - -
- - - - {(s.model ?? t.common.unknown).split("/").pop()} - {" "} - · {s.message_count} {t.common.msgs} ·{" "} - {timeAgo(s.last_active)} - -
-
- ))} -
-
- )} - - {recentSessions.length > 0 && ( - - -
- - - {t.status.recentSessions} - -
-
- - - {recentSessions.map((s) => ( -
-
- - {s.title ?? t.common.untitled} - - - - - {(s.model ?? t.common.unknown).split("/").pop()} - {" "} - · {s.message_count} {t.common.msgs} ·{" "} - {timeAgo(s.last_active)} - - - {s.preview && ( - - {s.preview} - - )} -
- - - - {s.source ?? "local"} - -
- ))} -
-
- )} -
- ); -} - -function PlatformsCard({ platforms, platformStateBadge }: PlatformsCardProps) { - const { t } = useI18n(); - - return ( - - -
- - - {t.status.connectedPlatforms} - -
-
- - - {platforms.map(([name, info]) => { - const display = platformStateBadge[info.state] ?? { - variant: "outline" as const, - label: info.state, - }; - const IconComponent = - info.state === "connected" - ? Wifi - : info.state === "fatal" - ? AlertTriangle - : WifiOff; - - return ( -
-
- - -
- - {name} - - - {info.error_message && ( - - {info.error_message} - - )} - - {info.updated_at && ( - - {t.status.lastUpdate}: {isoTimeAgo(info.updated_at)} - - )} -
-
- - - {display.variant === "success" && ( - - )} - {display.label} - -
- ); - })} -
-
- ); -} - -interface ToastState { - message: string; - type: "success" | "error"; -} - -interface PlatformsCardProps { - platforms: [string, PlatformStatus][]; - platformStateBadge: Record< - string, - { variant: "success" | "warning" | "destructive"; label: string } - >; -} diff --git a/web/src/plugins/PluginPage.tsx b/web/src/plugins/PluginPage.tsx new file mode 100644 index 0000000000..4b8f937d62 --- /dev/null +++ b/web/src/plugins/PluginPage.tsx @@ -0,0 +1,64 @@ +import { useSyncExternalStore } from "react"; +import { Loader2 } from "lucide-react"; +import { + getPluginComponent, + getPluginLoadError, + onPluginRegistered, +} from "./registry"; +import { useI18n } from "@/i18n"; +import { cn } from "@/lib/utils"; +import type { Translations } from "@/i18n/types"; + +/** Renders a plugin tab once its bundle has called `register()`. */ +export function PluginPage({ name }: { name: string }) { + const { t } = useI18n(); + // Subscribe in render (via useSyncExternalStore) so we never miss + // `register()` if the script loads before a useEffect would run. + const Component = useSyncExternalStore( + (onChange) => onPluginRegistered(onChange), + () => getPluginComponent(name) ?? null, + () => null, + ); + const loadError = useSyncExternalStore( + (onChange) => onPluginRegistered(onChange), + () => getPluginLoadError(name) ?? null, + () => null, + ); + + if (Component) { + return ; + } + + if (loadError) { + const message = formatPluginError(loadError, t); + return ( +
+ {message} +
+ ); + } + + return ( +
+ + {t.common.loading} +
+ ); +} + +function formatPluginError(code: string, t: Translations): string { + if (code === "LOAD_FAILED") return t.common.pluginLoadFailed; + if (code === "NO_REGISTER") return t.common.pluginNotRegistered; + return code; +} diff --git a/web/src/plugins/index.ts b/web/src/plugins/index.ts index 27902fc935..da9c1bdef2 100644 --- a/web/src/plugins/index.ts +++ b/web/src/plugins/index.ts @@ -1,4 +1,5 @@ export { exposePluginSDK, getPluginComponent, onPluginRegistered, getRegisteredCount } from "./registry"; +export { PluginPage } from "./PluginPage"; export { usePlugins } from "./usePlugins"; export { PluginSlot, KNOWN_SLOT_NAMES, registerSlot, getSlotEntries, onSlotRegistered, unregisterPluginSlots } from "./slots"; export type { KnownSlotName } from "./slots"; diff --git a/web/src/plugins/registry.ts b/web/src/plugins/registry.ts index fec230c2e7..08a5c99902 100644 --- a/web/src/plugins/registry.ts +++ b/web/src/plugins/registry.ts @@ -37,6 +37,7 @@ import { registerSlot, PluginSlot } from "./slots"; type RegistryListener = () => void; const _registered: Map = new Map(); +const _loadErrors: Map = new Map(); const _listeners: Set = new Set(); function _notify() { @@ -45,8 +46,14 @@ function _notify() { } } +/** Re-run registry subscribers (e.g. after a plugin script onload, or dev HMR re-inject). */ +export function notifyPluginRegistry() { + _notify(); +} + /** Register a plugin component. Called by plugin JS bundles. */ function registerPlugin(name: string, component: React.ComponentType) { + _loadErrors.delete(name); _registered.set(name, component); _notify(); } @@ -56,6 +63,15 @@ export function getPluginComponent(name: string): React.ComponentType | undefine return _registered.get(name); } +export function getPluginLoadError(name: string): string | undefined { + return _loadErrors.get(name); +} + +export function setPluginLoadError(name: string, message: string) { + _loadErrors.set(name, message); + _notify(); +} + /** Subscribe to registry changes (returns unsubscribe fn). */ export function onPluginRegistered(fn: RegistryListener): () => void { _listeners.add(fn); diff --git a/web/src/plugins/types.ts b/web/src/plugins/types.ts index 6b56d3279c..dd11c35c22 100644 --- a/web/src/plugins/types.ts +++ b/web/src/plugins/types.ts @@ -1,5 +1,7 @@ /** Types for the dashboard plugin system. */ +import type { ComponentType } from "react"; + export interface PluginManifest { name: string; label: string; @@ -8,21 +10,14 @@ export interface PluginManifest { version: string; tab: { path: string; - position: string; // "end", "after:", "before:" - /** When set to a built-in route path (e.g. `"/"`, `"/sessions"`), this - * plugin's component replaces the built-in page at that route rather - * than adding a new tab. Useful for themes that want a custom home - * page without losing the rest of the dashboard. */ + /** "end", "after:", "before:" (e.g. "after:skills" → after `/skills`) */ + position?: string; + /** When set to a built-in route path, this plugin replaces that page instead of adding a new tab. */ override?: string; - /** When true, the plugin registers its component and slot contributors - * without adding a tab to the nav. Used by slot-only plugins (e.g. a - * plugin that just injects a header crest). */ + /** When true, the plugin may register without a sidebar tab (slot-only, etc.). */ hidden?: boolean; }; - /** Named shell slots this plugin populates. Mirrored by the backend's - * manifest discovery; used purely as a documentation/discovery aid — - * actual slot registration happens when the plugin's JS bundle calls - * `window.__HERMES_PLUGINS__.registerSlot(name, slot, Component)`. */ + /** Declared for discovery; actual slots use registerSlot in the plugin bundle. */ slots?: string[]; entry: string; css?: string | null; @@ -32,5 +27,5 @@ export interface PluginManifest { export interface RegisteredPlugin { manifest: PluginManifest; - component: React.ComponentType; + component: ComponentType; } diff --git a/web/src/plugins/usePlugins.ts b/web/src/plugins/usePlugins.ts index 79b38f3364..147b1f0a84 100644 --- a/web/src/plugins/usePlugins.ts +++ b/web/src/plugins/usePlugins.ts @@ -10,7 +10,12 @@ import { useState, useEffect, useRef } from "react"; import { api } from "@/lib/api"; import type { PluginManifest, RegisteredPlugin } from "./types"; -import { getPluginComponent, onPluginRegistered } from "./registry"; +import { + getPluginComponent, + onPluginRegistered, + notifyPluginRegistry, + setPluginLoadError, +} from "./registry"; export function usePlugins() { const [manifests, setManifests] = useState([]); @@ -33,6 +38,8 @@ export function usePlugins() { useEffect(() => { if (manifests.length === 0) return; + const injectedScripts: HTMLScriptElement[] = []; + for (const manifest of manifests) { // Inject CSS if specified. if (manifest.css) { @@ -45,23 +52,49 @@ export function usePlugins() { } } - // Load JS bundle. - const jsUrl = `/dashboard-plugins/${manifest.name}/${manifest.entry}`; - if (loadedScripts.current.has(jsUrl)) continue; - loadedScripts.current.add(jsUrl); + // Load JS bundle. In dev, cache-bust so Vite HMR can clear the + // in-memory registry while the browser would otherwise never + // re-execute a previously cached + + + + + + + + + + +``` + +Key implementation patterns: +- **Seeded randomness**: Always `randomSeed()` + `noiseSeed()` for reproducibility +- **Color mode**: Use `colorMode(HSB, 360, 100, 100, 100)` for intuitive color control +- **State separation**: CONFIG for parameters, PALETTE for colors, globals for mutable state +- **Class-based entities**: Particles, agents, shapes as classes with `update()` + `display()` methods +- **Offscreen buffers**: `createGraphics()` for layered composition, trails, masks + +### Step 4: Preview & Iterate + +- Open HTML file directly in browser — no server needed for basic sketches +- For `loadImage()`/`loadFont()` from local files: use `scripts/serve.sh` or `python3 -m http.server` +- Chrome DevTools Performance tab to verify 60fps +- Test at target export resolution, not just the window size +- Adjust parameters until the visual matches the concept from Step 1 + +### Step 5: Export + +| Format | Method | Command | +|--------|--------|---------| +| **PNG** | `saveCanvas('output', 'png')` in `keyPressed()` | Press 's' to save | +| **High-res PNG** | Puppeteer headless capture | `node scripts/export-frames.js sketch.html --width 3840 --height 2160 --frames 1` | +| **GIF** | `saveGif('output', 5)` — captures N seconds | Press 'g' to save | +| **Frame sequence** | `saveFrames('frame', 'png', 10, 30)` — 10s at 30fps | Then `ffmpeg -i frame-%04d.png -c:v libx264 output.mp4` | +| **MP4** | Puppeteer frame capture + ffmpeg | `bash scripts/render.sh sketch.html output.mp4 --duration 30 --fps 30` | +| **SVG** | `createCanvas(w, h, SVG)` with p5.js-svg | `save('output.svg')` | + +### Step 6: Quality Verification + +- **Does it match the vision?** Compare output to the creative concept. If it looks generic, go back to Step 1 +- **Resolution check**: Is it sharp at the target display size? No aliasing artifacts? +- **Performance check**: Does it hold 60fps in browser? (30fps minimum for animations) +- **Color check**: Do the colors work together? Test on both light and dark monitors +- **Edge cases**: What happens at canvas edges? On resize? After running for 10 minutes? + +## Critical Implementation Notes + +### Performance — Disable FES First + +The Friendly Error System (FES) adds up to 10x overhead. Disable it in every production sketch: + +```javascript +p5.disableFriendlyErrors = true; // BEFORE setup() + +function setup() { + pixelDensity(1); // prevent 2x-4x overdraw on retina + createCanvas(1920, 1080); +} +``` + +In hot loops (particles, pixel ops), use `Math.*` instead of p5 wrappers — measurably faster: + +```javascript +// In draw() or update() hot paths: +let a = Math.sin(t); // not sin(t) +let r = Math.sqrt(dx*dx+dy*dy); // not dist() — or better: skip sqrt, compare magSq +let v = Math.random(); // not random() — when seed not needed +let m = Math.min(a, b); // not min(a, b) +``` + +Never `console.log()` inside `draw()`. Never manipulate DOM in `draw()`. See `references/troubleshooting.md` § Performance. + +### Seeded Randomness — Always + +Every generative sketch must be reproducible. Same seed, same output. + +```javascript +function setup() { + randomSeed(CONFIG.seed); + noiseSeed(CONFIG.seed); + // All random() and noise() calls now deterministic +} +``` + +Never use `Math.random()` for generative content — only for performance-critical non-visual code. Always `random()` for visual elements. If you need a random seed: `CONFIG.seed = floor(random(99999))`. + +### Generative Art Platform Support (fxhash / Art Blocks) + +For generative art platforms, replace p5's PRNG with the platform's deterministic random: + +```javascript +// fxhash convention +const SEED = $fx.hash; // unique per mint +const rng = $fx.rand; // deterministic PRNG +$fx.features({ palette: 'warm', complexity: 'high' }); + +// In setup(): +randomSeed(SEED); // for p5's noise() +noiseSeed(SEED); + +// Replace random() with rng() for platform determinism +let x = rng() * width; // instead of random(width) +``` + +See `references/export-pipeline.md` § Platform Export. + +### Color Mode — Use HSB + +HSB (Hue, Saturation, Brightness) is dramatically easier to work with than RGB for generative art: + +```javascript +colorMode(HSB, 360, 100, 100, 100); +// Now: fill(hue, sat, bri, alpha) +// Rotate hue: fill((baseHue + offset) % 360, 80, 90) +// Desaturate: fill(hue, sat * 0.3, bri) +// Darken: fill(hue, sat, bri * 0.5) +``` + +Never hardcode raw RGB values. Define a palette object, derive variations procedurally. See `references/color-systems.md`. + +### Noise — Multi-Octave, Not Raw + +Raw `noise(x, y)` looks like smooth blobs. Layer octaves for natural texture: + +```javascript +function fbm(x, y, octaves = 4) { + let val = 0, amp = 1, freq = 1, sum = 0; + for (let i = 0; i < octaves; i++) { + val += noise(x * freq, y * freq) * amp; + sum += amp; + amp *= 0.5; + freq *= 2; + } + return val / sum; +} +``` + +For flowing organic forms, use **domain warping**: feed noise output back as noise input coordinates. See `references/visual-effects.md`. + +### createGraphics() for Layers — Not Optional + +Flat single-pass rendering looks flat. Use offscreen buffers for composition: + +```javascript +let bgLayer, fgLayer, trailLayer; +function setup() { + createCanvas(1920, 1080); + bgLayer = createGraphics(width, height); + fgLayer = createGraphics(width, height); + trailLayer = createGraphics(width, height); +} +function draw() { + renderBackground(bgLayer); + renderTrails(trailLayer); // persistent, fading + renderForeground(fgLayer); // cleared each frame + image(bgLayer, 0, 0); + image(trailLayer, 0, 0); + image(fgLayer, 0, 0); +} +``` + +### Performance — Vectorize Where Possible + +p5.js draw calls are expensive. For thousands of particles: + +```javascript +// SLOW: individual shapes +for (let p of particles) { + ellipse(p.x, p.y, p.size); +} + +// FAST: single shape with beginShape() +beginShape(POINTS); +for (let p of particles) { + vertex(p.x, p.y); +} +endShape(); + +// FASTEST: pixel buffer for massive counts +loadPixels(); +for (let p of particles) { + let idx = 4 * (floor(p.y) * width + floor(p.x)); + pixels[idx] = r; pixels[idx+1] = g; pixels[idx+2] = b; pixels[idx+3] = 255; +} +updatePixels(); +``` + +See `references/troubleshooting.md` § Performance. + +### Instance Mode for Multiple Sketches + +Global mode pollutes `window`. For production, use instance mode: + +```javascript +const sketch = (p) => { + p.setup = function() { + p.createCanvas(800, 800); + }; + p.draw = function() { + p.background(0); + p.ellipse(p.mouseX, p.mouseY, 50); + }; +}; +new p5(sketch, 'canvas-container'); +``` + +Required when embedding multiple sketches on one page or integrating with frameworks. + +### WebGL Mode Gotchas + +- `createCanvas(w, h, WEBGL)` — origin is center, not top-left +- Y-axis is inverted (positive Y goes up in WEBGL, down in P2D) +- `translate(-width/2, -height/2)` to get P2D-like coordinates +- `push()`/`pop()` around every transform — matrix stack overflows silently +- `texture()` before `rect()`/`plane()` — not after +- Custom shaders: `createShader(vert, frag)` — test on multiple browsers + +### Export — Key Bindings Convention + +Every sketch should include these in `keyPressed()`: + +```javascript +function keyPressed() { + if (key === 's' || key === 'S') saveCanvas('output', 'png'); + if (key === 'g' || key === 'G') saveGif('output', 5); + if (key === 'r' || key === 'R') { randomSeed(millis()); noiseSeed(millis()); } + if (key === ' ') CONFIG.paused = !CONFIG.paused; +} +``` + +### Headless Video Export — Use noLoop() + +For headless rendering via Puppeteer, the sketch **must** use `noLoop()` in setup. Without it, p5's draw loop runs freely while screenshots are slow — the sketch races ahead and you get skipped/duplicate frames. + +```javascript +function setup() { + createCanvas(1920, 1080); + pixelDensity(1); + noLoop(); // capture script controls frame advance + window._p5Ready = true; // signal readiness to capture script +} +``` + +The bundled `scripts/export-frames.js` detects `_p5Ready` and calls `redraw()` once per capture for exact 1:1 frame correspondence. See `references/export-pipeline.md` § Deterministic Capture. + +For multi-scene videos, use the per-clip architecture: one HTML per scene, render independently, stitch with `ffmpeg -f concat`. See `references/export-pipeline.md` § Per-Clip Architecture. + +### Agent Workflow + +When building p5.js sketches: + +1. **Write the HTML file** — single self-contained file, all code inline +2. **Open in browser** — `open sketch.html` (macOS) or `xdg-open sketch.html` (Linux) +3. **Local assets** (fonts, images) require a server: `python3 -m http.server 8080` in the project directory, then open `http://localhost:8080/sketch.html` +4. **Export PNG/GIF** — add `keyPressed()` shortcuts as shown above, tell the user which key to press +5. **Headless export** — `node scripts/export-frames.js sketch.html --frames 300` for automated frame capture (sketch must use `noLoop()` + `_p5Ready`) +6. **MP4 rendering** — `bash scripts/render.sh sketch.html output.mp4 --duration 30` +7. **Iterative refinement** — edit the HTML file, user refreshes browser to see changes +8. **Load references on demand** — use `skill_view(name="p5js", file_path="references/...")` to load specific reference files as needed during implementation + +## Performance Targets + +| Metric | Target | +|--------|--------| +| Frame rate (interactive) | 60fps sustained | +| Frame rate (animated export) | 30fps minimum | +| Particle count (P2D shapes) | 5,000-10,000 at 60fps | +| Particle count (pixel buffer) | 50,000-100,000 at 60fps | +| Canvas resolution | Up to 3840x2160 (export), 1920x1080 (interactive) | +| File size (HTML) | < 100KB (excluding CDN libraries) | +| Load time | < 2s to first frame | + +## References + +| File | Contents | +|------|----------| +| `references/core-api.md` | Canvas setup, coordinate system, draw loop, `push()`/`pop()`, offscreen buffers, composition patterns, `pixelDensity()`, responsive design | +| `references/shapes-and-geometry.md` | 2D primitives, `beginShape()`/`endShape()`, Bezier/Catmull-Rom curves, `vertex()` systems, custom shapes, `p5.Vector`, signed distance fields, SVG path conversion | +| `references/visual-effects.md` | Noise (Perlin, fractal, domain warp, curl), flow fields, particle systems (physics, flocking, trails), pixel manipulation, texture generation (stipple, hatch, halftone), feedback loops, reaction-diffusion | +| `references/animation.md` | Frame-based animation, easing functions, `lerp()`/`map()`, spring physics, state machines, timeline sequencing, `millis()`-based timing, transition patterns | +| `references/typography.md` | `text()`, `loadFont()`, `textToPoints()`, kinetic typography, text masks, font metrics, responsive text sizing | +| `references/color-systems.md` | `colorMode()`, HSB/HSL/RGB, `lerpColor()`, `paletteLerp()`, procedural palettes, color harmony, `blendMode()`, gradient rendering, curated palette library | +| `references/webgl-and-3d.md` | WEBGL renderer, 3D primitives, camera, lighting, materials, custom geometry, GLSL shaders (`createShader()`, `createFilterShader()`), framebuffers, post-processing | +| `references/interaction.md` | Mouse events, keyboard state, touch input, DOM elements, `createSlider()`/`createButton()`, audio input (p5.sound FFT/amplitude), scroll-driven animation, responsive events | +| `references/export-pipeline.md` | `saveCanvas()`, `saveGif()`, `saveFrames()`, deterministic headless capture, ffmpeg frame-to-video, CCapture.js, SVG export, per-clip architecture, platform export (fxhash), video gotchas | +| `references/troubleshooting.md` | Performance profiling, per-pixel budgets, common mistakes, browser compatibility, WebGL debugging, font loading issues, pixel density traps, memory leaks, CORS | +| `templates/viewer.html` | Interactive viewer template: seed navigation (prev/next/random/jump), parameter sliders, download PNG, responsive canvas. Start from this for explorable generative art | + +--- + +## Creative Divergence (use only when user requests experimental/creative/unique output) + +If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code. + +- **Conceptual Blending** — when the user names two things to combine or wants hybrid aesthetics +- **SCAMPER** — when the user wants a twist on a known generative art pattern +- **Distance Association** — when the user gives a single concept and wants exploration ("make something about time") + +### Conceptual Blending +1. Name two distinct visual systems (e.g., particle physics + handwriting) +2. Map correspondences (particles = ink drops, forces = pen pressure, fields = letterforms) +3. Blend selectively — keep mappings that produce interesting emergent visuals +4. Code the blend as a unified system, not two systems side-by-side + +### SCAMPER Transformation +Take a known generative pattern (flow field, particle system, L-system, cellular automata) and systematically transform it: +- **Substitute**: replace circles with text characters, lines with gradients +- **Combine**: merge two patterns (flow field + voronoi) +- **Adapt**: apply a 2D pattern to a 3D projection +- **Modify**: exaggerate scale, warp the coordinate space +- **Purpose**: use a physics sim for typography, a sorting algorithm for color +- **Eliminate**: remove the grid, remove color, remove symmetry +- **Reverse**: run the simulation backward, invert the parameter space + +### Distance Association +1. Anchor on the user's concept (e.g., "loneliness") +2. Generate associations at three distances: + - Close (obvious): empty room, single figure, silence + - Medium (interesting): one fish in a school swimming the wrong way, a phone with no notifications, the gap between subway cars + - Far (abstract): prime numbers, asymptotic curves, the color of 3am +3. Develop the medium-distance associations — they're specific enough to visualize but unexpected enough to be interesting diff --git a/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md new file mode 100644 index 0000000000..beecb38f08 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md @@ -0,0 +1,232 @@ +--- +title: "Pixel Art — Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +sidebar_label: "Pixel Art" +description: "Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pixel Art + +Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc.), and animate them into short videos. Presets cover arcade, SNES, and 10+ era-correct looks. Use `clarify` to let the user pick a style before generating. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/pixel-art` | +| Version | `2.0.0` | +| Author | dodo-reach | +| License | MIT | +| Tags | `creative`, `pixel-art`, `arcade`, `snes`, `nes`, `gameboy`, `retro`, `image`, `video` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pixel Art + +Convert any image into retro pixel art, then optionally animate it into a short +MP4 or GIF with era-appropriate effects (rain, fireflies, snow, embers). + +Two scripts ship with this skill: + +- `scripts/pixel_art.py` — photo → pixel-art PNG (Floyd-Steinberg dithering) +- `scripts/pixel_art_video.py` — pixel-art PNG → animated MP4 (+ optional GIF) + +Each is importable or runnable directly. Presets snap to hardware palettes +when you want era-accurate colors (NES, Game Boy, PICO-8, etc.), or use +adaptive N-color quantization for arcade/SNES-style looks. + +## When to Use + +- User wants retro pixel art from a source image +- User asks for NES / Game Boy / PICO-8 / C64 / arcade / SNES styling +- User wants a short looping animation (rain scene, night sky, snow, etc.) +- Posters, album covers, social posts, sprites, characters, avatars + +## Workflow + +Before generating, confirm the style with the user. Different presets produce +very different outputs and regenerating is costly. + +### Step 1 — Offer a style + +Call `clarify` with 4 representative presets. Pick the set based on what the +user asked for — don't just dump all 14. + +Default menu when the user's intent is unclear: + +```python +clarify( + question="Which pixel-art style do you want?", + choices=[ + "arcade — bold, chunky 80s cabinet feel (16 colors, 8px)", + "nes — Nintendo 8-bit hardware palette (54 colors, 8px)", + "gameboy — 4-shade green Game Boy DMG", + "snes — cleaner 16-bit look (32 colors, 4px)", + ], +) +``` + +When the user already named an era (e.g. "80s arcade", "Gameboy"), skip +`clarify` and use the matching preset directly. + +### Step 2 — Offer animation (optional) + +If the user asked for a video/GIF, or the output might benefit from motion, +ask which scene: + +```python +clarify( + question="Want to animate it? Pick a scene or skip.", + choices=[ + "night — stars + fireflies + leaves", + "urban — rain + neon pulse", + "snow — falling snowflakes", + "skip — just the image", + ], +) +``` + +Do NOT call `clarify` more than twice in a row. One for style, one for scene if +animation is on the table. If the user explicitly asked for a specific style +and scene in their message, skip `clarify` entirely. + +### Step 3 — Generate + +Run `pixel_art()` first; if animation was requested, chain into +`pixel_art_video()` on the result. + +## Preset Catalog + +| Preset | Era | Palette | Block | Best for | +|--------|-----|---------|-------|----------| +| `arcade` | 80s arcade | adaptive 16 | 8px | Bold posters, hero art | +| `snes` | 16-bit | adaptive 32 | 4px | Characters, detailed scenes | +| `nes` | 8-bit | NES (54) | 8px | True NES look | +| `gameboy` | DMG handheld | 4 green shades | 8px | Monochrome Game Boy | +| `gameboy_pocket` | Pocket handheld | 4 grey shades | 8px | Mono GB Pocket | +| `pico8` | PICO-8 | 16 fixed | 6px | Fantasy-console look | +| `c64` | Commodore 64 | 16 fixed | 8px | 8-bit home computer | +| `apple2` | Apple II hi-res | 6 fixed | 10px | Extreme retro, 6 colors | +| `teletext` | BBC Teletext | 8 pure | 10px | Chunky primary colors | +| `mspaint` | Windows MS Paint | 24 fixed | 8px | Nostalgic desktop | +| `mono_green` | CRT phosphor | 2 green | 6px | Terminal/CRT aesthetic | +| `mono_amber` | CRT amber | 2 amber | 6px | Amber monitor look | +| `neon` | Cyberpunk | 10 neons | 6px | Vaporwave/cyber | +| `pastel` | Soft pastel | 10 pastels | 6px | Kawaii / gentle | + +Named palettes live in `scripts/palettes.py` (see `references/palettes.md` for +the complete list — 28 named palettes total). Any preset can be overridden: + +```python +pixel_art("in.png", "out.png", preset="snes", palette="PICO_8", block=6) +``` + +## Scene Catalog (for video) + +| Scene | Effects | +|-------|---------| +| `night` | Twinkling stars + fireflies + drifting leaves | +| `dusk` | Fireflies + sparkles | +| `tavern` | Dust motes + warm sparkles | +| `indoor` | Dust motes | +| `urban` | Rain + neon pulse | +| `nature` | Leaves + fireflies | +| `magic` | Sparkles + fireflies | +| `storm` | Rain + lightning | +| `underwater` | Bubbles + light sparkles | +| `fire` | Embers + sparkles | +| `snow` | Snowflakes + sparkles | +| `desert` | Heat shimmer + dust | + +## Invocation Patterns + +### Python (import) + +```python +import sys +sys.path.insert(0, "/home/teknium/.hermes/skills/creative/pixel-art/scripts") +from pixel_art import pixel_art +from pixel_art_video import pixel_art_video + +# 1. Convert to pixel art +pixel_art("/path/to/photo.jpg", "/tmp/pixel.png", preset="nes") + +# 2. Animate (optional) +pixel_art_video( + "/tmp/pixel.png", + "/tmp/pixel.mp4", + scene="night", + duration=6, + fps=15, + seed=42, + export_gif=True, +) +``` + +### CLI + +```bash +cd /home/teknium/.hermes/skills/creative/pixel-art/scripts + +python pixel_art.py in.jpg out.png --preset gameboy +python pixel_art.py in.jpg out.png --preset snes --palette PICO_8 --block 6 + +python pixel_art_video.py out.png out.mp4 --scene night --duration 6 --gif +``` + +## Pipeline Rationale + +**Pixel conversion:** +1. Boost contrast/color/sharpness (stronger for smaller palettes) +2. Posterize to simplify tonal regions before quantization +3. Downscale by `block` with `Image.NEAREST` (hard pixels, no interpolation) +4. Quantize with Floyd-Steinberg dithering — against either an adaptive + N-color palette OR a named hardware palette +5. Upscale back with `Image.NEAREST` + +Quantizing AFTER downscale keeps dithering aligned with the final pixel grid. +Quantizing before would waste error-diffusion on detail that disappears. + +**Video overlay:** +- Copies the base frame each tick (static background) +- Overlays stateless-per-frame particle draws (one function per effect) +- Encodes via ffmpeg `libx264 -pix_fmt yuv420p -crf 18` +- Optional GIF via `palettegen` + `paletteuse` + +## Dependencies + +- Python 3.9+ +- Pillow (`pip install Pillow`) +- ffmpeg on PATH (only needed for video — Hermes installs package this) + +## Pitfalls + +- Pallet keys are case-sensitive (`"NES"`, `"PICO_8"`, `"GAMEBOY_ORIGINAL"`). +- Very small sources (<100px wide) collapse under 8-10px blocks. Upscale the + source first if it's tiny. +- Fractional `block` or `palette` will break quantization — keep them positive ints. +- Animation particle counts are tuned for ~640x480 canvases. On very large + images you may want a second pass with a different seed for density. +- `mono_green` / `mono_amber` force `color=0.0` (desaturate). If you override + and keep chroma, the 2-color palette can produce stripes on smooth regions. +- `clarify` loop: call it at most twice per turn (style, then scene). Don't + pepper the user with more picks. + +## Verification + +- PNG is created at the output path +- Clear square pixel blocks visible at the preset's block size +- Color count matches preset (eyeball the image or run `Image.open(p).getcolors()`) +- Video is a valid MP4 (`ffprobe` can open it) with non-zero size + +## Attribution + +Named hardware palettes and the procedural animation loops in `pixel_art_video.py` +are ported from [pixel-art-studio](https://github.com/Synero/pixel-art-studio) +(MIT). See `ATTRIBUTION.md` in this skill directory for details. diff --git a/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md new file mode 100644 index 0000000000..838a1c1799 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md @@ -0,0 +1,212 @@ +--- +title: "Popular Web Designs — 54 production-quality design systems extracted from real websites" +sidebar_label: "Popular Web Designs" +description: "54 production-quality design systems extracted from real websites" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Popular Web Designs + +54 production-quality design systems extracted from real websites. Load a template to generate HTML/CSS that matches the visual identity of sites like Stripe, Linear, Vercel, Notion, Airbnb, and more. Each template includes colors, typography, components, layout rules, and ready-to-use CSS values. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/popular-web-designs` | +| Version | `1.0.0` | +| Author | Hermes Agent + Teknium (design systems sourced from VoltAgent/awesome-design-md) | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Popular Web Designs + +54 real-world design systems ready for use when generating HTML/CSS. Each template captures a +site's complete visual language: color palette, typography hierarchy, component styles, spacing +system, shadows, responsive behavior, and practical agent prompts with exact CSS values. + +## How to Use + +1. Pick a design from the catalog below +2. Load it: `skill_view(name="popular-web-designs", file_path="templates/.md")` +3. Use the design tokens and component specs when generating HTML +4. Pair with the `generative-widgets` skill to serve the result via cloudflared tunnel + +Each template includes a **Hermes Implementation Notes** block at the top with: +- CDN font substitute and Google Fonts `` tag (ready to paste) +- CSS font-family stacks for primary and monospace +- Reminders to use `write_file` for HTML creation and `browser_vision` for verification + +## HTML Generation Pattern + +```html + + + + + + Page Title + + + + + + + + +``` + +Write the file with `write_file`, serve with the `generative-widgets` workflow (cloudflared tunnel), +and verify the result with `browser_vision` to confirm visual accuracy. + +## Font Substitution Reference + +Most sites use proprietary fonts unavailable via CDN. Each template maps to a Google Fonts +substitute that preserves the design's character. Common mappings: + +| Proprietary Font | CDN Substitute | Character | +|---|---|---| +| Geist / Geist Sans | Geist (on Google Fonts) | Geometric, compressed tracking | +| Geist Mono | Geist Mono (on Google Fonts) | Clean monospace, ligatures | +| sohne-var (Stripe) | Source Sans 3 | Light weight elegance | +| Berkeley Mono | JetBrains Mono | Technical monospace | +| Airbnb Cereal VF | DM Sans | Rounded, friendly geometric | +| Circular (Spotify) | DM Sans | Geometric, warm | +| figmaSans | Inter | Clean humanist | +| Pin Sans (Pinterest) | DM Sans | Friendly, rounded | +| NVIDIA-EMEA | Inter (or Arial system) | Industrial, clean | +| CoinbaseDisplay/Sans | DM Sans | Geometric, trustworthy | +| UberMove | DM Sans | Bold, tight | +| HashiCorp Sans | Inter | Enterprise, neutral | +| waldenburgNormal (Sanity) | Space Grotesk | Geometric, slightly condensed | +| IBM Plex Sans/Mono | IBM Plex Sans/Mono | Available on Google Fonts | +| Rubik (Sentry) | Rubik | Available on Google Fonts | + +When a template's CDN font matches the original (Inter, IBM Plex, Rubik, Geist), no +substitution loss occurs. When a substitute is used (DM Sans for Circular, Source Sans 3 +for sohne-var), follow the template's weight, size, and letter-spacing values closely — +those carry more visual identity than the specific font face. + +## Design Catalog + +### AI & Machine Learning + +| Template | Site | Style | +|---|---|---| +| `claude.md` | Anthropic Claude | Warm terracotta accent, clean editorial layout | +| `cohere.md` | Cohere | Vibrant gradients, data-rich dashboard aesthetic | +| `elevenlabs.md` | ElevenLabs | Dark cinematic UI, audio-waveform aesthetics | +| `minimax.md` | Minimax | Bold dark interface with neon accents | +| `mistral.ai.md` | Mistral AI | French-engineered minimalism, purple-toned | +| `ollama.md` | Ollama | Terminal-first, monochrome simplicity | +| `opencode.ai.md` | OpenCode AI | Developer-centric dark theme, full monospace | +| `replicate.md` | Replicate | Clean white canvas, code-forward | +| `runwayml.md` | RunwayML | Cinematic dark UI, media-rich layout | +| `together.ai.md` | Together AI | Technical, blueprint-style design | +| `voltagent.md` | VoltAgent | Void-black canvas, emerald accent, terminal-native | +| `x.ai.md` | xAI | Stark monochrome, futuristic minimalism, full monospace | + +### Developer Tools & Platforms + +| Template | Site | Style | +|---|---|---| +| `cursor.md` | Cursor | Sleek dark interface, gradient accents | +| `expo.md` | Expo | Dark theme, tight letter-spacing, code-centric | +| `linear.app.md` | Linear | Ultra-minimal dark-mode, precise, purple accent | +| `lovable.md` | Lovable | Playful gradients, friendly dev aesthetic | +| `mintlify.md` | Mintlify | Clean, green-accented, reading-optimized | +| `posthog.md` | PostHog | Playful branding, developer-friendly dark UI | +| `raycast.md` | Raycast | Sleek dark chrome, vibrant gradient accents | +| `resend.md` | Resend | Minimal dark theme, monospace accents | +| `sentry.md` | Sentry | Dark dashboard, data-dense, pink-purple accent | +| `supabase.md` | Supabase | Dark emerald theme, code-first developer tool | +| `superhuman.md` | Superhuman | Premium dark UI, keyboard-first, purple glow | +| `vercel.md` | Vercel | Black and white precision, Geist font system | +| `warp.md` | Warp | Dark IDE-like interface, block-based command UI | +| `zapier.md` | Zapier | Warm orange, friendly illustration-driven | + +### Infrastructure & Cloud + +| Template | Site | Style | +|---|---|---| +| `clickhouse.md` | ClickHouse | Yellow-accented, technical documentation style | +| `composio.md` | Composio | Modern dark with colorful integration icons | +| `hashicorp.md` | HashiCorp | Enterprise-clean, black and white | +| `mongodb.md` | MongoDB | Green leaf branding, developer documentation focus | +| `sanity.md` | Sanity | Red accent, content-first editorial layout | +| `stripe.md` | Stripe | Signature purple gradients, weight-300 elegance | + +### Design & Productivity + +| Template | Site | Style | +|---|---|---| +| `airtable.md` | Airtable | Colorful, friendly, structured data aesthetic | +| `cal.md` | Cal.com | Clean neutral UI, developer-oriented simplicity | +| `clay.md` | Clay | Organic shapes, soft gradients, art-directed layout | +| `figma.md` | Figma | Vibrant multi-color, playful yet professional | +| `framer.md` | Framer | Bold black and blue, motion-first, design-forward | +| `intercom.md` | Intercom | Friendly blue palette, conversational UI patterns | +| `miro.md` | Miro | Bright yellow accent, infinite canvas aesthetic | +| `notion.md` | Notion | Warm minimalism, serif headings, soft surfaces | +| `pinterest.md` | Pinterest | Red accent, masonry grid, image-first layout | +| `webflow.md` | Webflow | Blue-accented, polished marketing site aesthetic | + +### Fintech & Crypto + +| Template | Site | Style | +|---|---|---| +| `coinbase.md` | Coinbase | Clean blue identity, trust-focused, institutional feel | +| `kraken.md` | Kraken | Purple-accented dark UI, data-dense dashboards | +| `revolut.md` | Revolut | Sleek dark interface, gradient cards, fintech precision | +| `wise.md` | Wise | Bright green accent, friendly and clear | + +### Enterprise & Consumer + +| Template | Site | Style | +|---|---|---| +| `airbnb.md` | Airbnb | Warm coral accent, photography-driven, rounded UI | +| `apple.md` | Apple | Premium white space, SF Pro, cinematic imagery | +| `bmw.md` | BMW | Dark premium surfaces, precise engineering aesthetic | +| `ibm.md` | IBM | Carbon design system, structured blue palette | +| `nvidia.md` | NVIDIA | Green-black energy, technical power aesthetic | +| `spacex.md` | SpaceX | Stark black and white, full-bleed imagery, futuristic | +| `spotify.md` | Spotify | Vibrant green on dark, bold type, album-art-driven | +| `uber.md` | Uber | Bold black and white, tight type, urban energy | + +## Choosing a Design + +Match the design to the content: + +- **Developer tools / dashboards:** Linear, Vercel, Supabase, Raycast, Sentry +- **Documentation / content sites:** Mintlify, Notion, Sanity, MongoDB +- **Marketing / landing pages:** Stripe, Framer, Apple, SpaceX +- **Dark mode UIs:** Linear, Cursor, ElevenLabs, Warp, Superhuman +- **Light / clean UIs:** Vercel, Stripe, Notion, Cal.com, Replicate +- **Playful / friendly:** PostHog, Figma, Lovable, Zapier, Miro +- **Premium / luxury:** Apple, BMW, Stripe, Superhuman, Revolut +- **Data-dense / dashboards:** Sentry, Kraken, Cohere, ClickHouse +- **Monospace / terminal aesthetic:** Ollama, OpenCode, x.ai, VoltAgent diff --git a/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md new file mode 100644 index 0000000000..cd0b7fb148 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md @@ -0,0 +1,297 @@ +--- +title: "Songwriting And Ai Music" +sidebar_label: "Songwriting And Ai Music" +description: "Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songwriting And Ai Music + +Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned. These are tools and ideas, not rules. Break any of them when the art calls for it. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/songwriting-and-ai-music` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Songwriting & AI Music Generation + +Everything here is a GUIDELINE, not a rule. Art breaks rules on purpose. +Use what serves the song. Ignore what doesn't. + +--- + +## 1. Song Structure (Pick One or Invent Your Own) + +Common skeletons — mix, modify, or throw out as needed: + +``` +ABABCB Verse/Chorus/Verse/Chorus/Bridge/Chorus (most pop/rock) +AABA Verse/Verse/Bridge/Verse (refrain-based) (jazz standards, ballads) +ABAB Verse/Chorus alternating (simple, direct) +AAA Verse/Verse/Verse (strophic, no chorus) (folk, storytelling) +``` + +The six building blocks: +- Intro — set the mood, pull the listener in +- Verse — the story, the details, the world-building +- Pre-Chorus — optional tension ramp before the payoff +- Chorus — the emotional core, the part people remember +- Bridge — a detour, a shift in perspective or key +- Outro — the farewell, can echo or subvert the rest + +You don't need all of these. Some great songs are just one section +that evolves. Structure serves the emotion, not the other way around. + +--- + +## 2. Rhyme, Meter, and Sound + +RHYME TYPES (from tight to loose): +- Perfect: lean/mean +- Family: crate/braid +- Assonance: had/glass (same vowels, different endings) +- Consonance: scene/when (different vowels, similar endings) +- Near/slant: enough to suggest connection without locking it down + +Mix them. All perfect rhymes can sound like a nursery rhyme. +All slant rhymes can sound lazy. The blend is where it lives. + +INTERNAL RHYME: Rhyming within a line, not just at the ends. + "We pruned the lies from bleeding trees / Distilled the storm + from entropy" — "lies/flies," "trees/entropy" create internal echoes. + +METER: The rhythm of stressed vs unstressed syllables. +- Matching syllable counts between parallel lines helps singability +- The STRESSED syllables matter more than total count +- Say it out loud. If you stumble, the meter needs work. +- Intentionally breaking meter can create emphasis or surprise + +--- + +## 3. Emotional Arc and Dynamics + +Think of a song as a journey, not a flat road. + +ENERGY MAPPING (rough idea, not prescription): + Intro: 2-3 | Verse: 5-6 | Pre-Chorus: 7 + Chorus: 8-9 | Bridge: varies | Final Chorus: 9-10 + +The most powerful dynamic trick: CONTRAST. +- Whisper before a scream hits harder than just screaming +- Sparse before dense. Slow before fast. Low before high. +- The drop only works because of the buildup +- Silence is an instrument + +"Whisper to roar to whisper" — start intimate, build to full power, +strip back to vulnerability. Works for ballads, epics, anthems. + +--- + +## 4. Writing Lyrics That Work + +SHOW, DON'T TELL (usually): +- "I was sad" = flat +- "Your hoodie's still on the hook by the door" = alive +- But sometimes "I give my life" said plainly IS the power + +THE HOOK: +- The line people remember, hum, repeat +- Usually the title or core phrase +- Works best when melody + lyric + emotion all align +- Place it where it lands hardest (often first/last line of chorus) + +PROSODY — lyrics and music supporting each other: +- Stable feelings (resolution, peace) pair with settled melodies, + perfect rhymes, resolved chords +- Unstable feelings (longing, doubt) pair with wandering melodies, + near-rhymes, unresolved chords +- Verse melody typically sits lower, chorus goes higher +- But flip this if it serves the song + +AVOID (unless you're doing it on purpose): +- Cliches on autopilot ("heart of gold" without earning it) +- Forcing word order to hit a rhyme ("Yoda-speak") +- Same energy in every section (flat dynamics) +- Treating your first draft as sacred — revision is creation + +--- + +## 5. Parody and Adaptation + +When rewriting an existing song with new lyrics: + +THE SKELETON: Map the original's structure first. +- Count syllables per line +- Mark the rhyme scheme (ABAB, AABB, etc.) +- Identify which syllables are STRESSED +- Note where held/sustained notes fall + +FITTING NEW WORDS: +- Match stressed syllables to the same beats as the original +- Total syllable count can flex by 1-2 unstressed syllables +- On long held notes, try to match the VOWEL SOUND of the original + (if original holds "LOOOVE" with an "oo" vowel, "FOOOD" fits + better than "LIFE") +- Monosyllabic swaps in key spots keep rhythm intact + (Crime -> Code, Snake -> Noose) +- Sing your new words over the original — if you stumble, revise + +CONCEPT: +- Pick a concept strong enough to sustain the whole song +- Start from the title/hook and build outward +- Generate lots of raw material (puns, phrases, images) FIRST, + then fit the best ones into the structure +- If you need a specific line somewhere, reverse-engineer the + rhyme scheme backward to set it up + +KEEP SOME ORIGINALS: Leaving a few original lines or structures +intact adds recognizability and lets the audience feel the connection. + +--- + +## 6. Suno AI Prompt Engineering + +### Style/Genre Description Field + +FORMULA (adapt as needed): + Genre + Mood + Era + Instruments + Vocal Style + Production + Dynamics + +``` +BAD: "sad rock song" +GOOD: "Cinematic orchestral spy thriller, 1960s Cold War era, smoky + sultry female vocalist, big band jazz, brass section with + trumpets and french horns, sweeping strings, minor key, + vintage analog warmth" +``` + +DESCRIBE THE JOURNEY, not just the genre: +``` +"Begins as a haunting whisper over sparse piano. Gradually layers + in muted brass. Builds through the chorus with full orchestra. + Second verse erupts with raw belting intensity. Outro strips back + to a lone piano and a fragile whisper fading to silence." +``` + +TIPS: +- V4.5+ supports up to 1,000 chars in Style field — use them +- NO artist names or trademarks. Describe the sound instead. + "1960s Cold War spy thriller brass" not "James Bond style" + "90s grunge" not "Nirvana-style" +- Specify BPM and key when you have a preference +- Use Exclude Styles field for what you DON'T want +- Unexpected genre combos can be gold: "bossa nova trap", + "Appalachian gothic", "chiptune jazz" +- Build a vocal PERSONA, not just a gender: + "A weathered torch singer with a smoky alto, slight rasp, + who starts vulnerable and builds to devastating power" + +### Metatags (place in [brackets] inside lyrics field) + +STRUCTURE: + [Intro] [Verse] [Verse 1] [Pre-Chorus] [Chorus] + [Post-Chorus] [Hook] [Bridge] [Interlude] + [Instrumental] [Instrumental Break] [Guitar Solo] + [Breakdown] [Build-up] [Outro] [Silence] [End] + +VOCAL PERFORMANCE: + [Whispered] [Spoken Word] [Belted] [Falsetto] [Powerful] + [Soulful] [Raspy] [Breathy] [Smooth] [Gritty] + [Staccato] [Legato] [Vibrato] [Melismatic] + [Harmonies] [Choir] [Harmonized Chorus] + +DYNAMICS: + [High Energy] [Low Energy] [Building Energy] [Explosive] + [Emotional Climax] [Gradual swell] [Orchestral swell] + [Quiet arrangement] [Falling tension] [Slow Down] + +GENDER: + [Female Vocals] [Male Vocals] + +ATMOSPHERE: + [Melancholic] [Euphoric] [Nostalgic] [Aggressive] + [Dreamy] [Intimate] [Dark Atmosphere] + +SFX: + [Vinyl Crackle] [Rain] [Applause] [Static] [Thunder] + +Put tags in BOTH style field AND lyrics for reinforcement. +Keep to 5-8 tags per section max — too many confuses the AI. +Don't contradict yourself ([Calm] + [Aggressive] in same section). + +### Custom Mode +- Always use Custom Mode for serious work (separate Style + Lyrics) +- Lyrics field limit: ~3,000 chars (~40-60 lines) +- Always add structural tags — without them Suno defaults to + flat verse/chorus/verse with no emotional arc + +--- + +## 7. Phonetic Tricks for AI Singers + +AI vocalists don't read — they pronounce. Help them: + +PHONETIC RESPELLING: +- Spell words as they SOUND: "through" -> "thru" +- Proper nouns are highest failure rate — test early +- "Nous" -> "Noose" (forces correct pronunciation) +- Hyphenate to guide syllables: "Re-search", "bio-engineering" + +DELIVERY CONTROL: +- ALL CAPS = louder, more intense +- Vowel extension: "lo-o-o-ove" = sustained/melisma +- Ellipses: "I... need... you" = dramatic pauses +- Hyphenated stretch: "ne-e-ed" = emotional stretch + +ALWAYS: +- Spell out numbers: "24/7" -> "twenty four seven" +- Space acronyms: "AI" -> "A I" or "A-I" +- Test proper nouns/unusual words in a short 30-second clip first +- Once generated, pronunciation is baked in — fix in lyrics BEFORE + +--- + +## 8. Workflow + +1. Write the concept/hook first — what's the emotional core? +2. If adapting, map the original structure (syllables, rhyme, stress) +3. Generate raw material — brainstorm freely before structuring +4. Draft lyrics into the structure +5. Read/sing aloud — catch stumbles, fix meter +6. Build the Suno style description — paint the dynamic journey +7. Add metatags to lyrics for performance direction +8. Generate 3-5 variations minimum — treat them like recording takes +9. Pick the best, use Extend/Continue to build on promising sections +10. If something great happens by accident, keep it + +EXPECT: ~3-5 generations per 1 good result. Revision is normal. +Style can drift in extensions — restate genre/mood when extending. + +--- + +## 9. Lessons Learned + +- Describing the dynamic ARC in the style field matters way more + than just listing genres. "Whisper to roar to whisper" gives + Suno a performance map. +- Keeping some original lines intact in a parody adds recognizability + and emotional weight — the audience feels the ghost of the original. +- The bridge slot in a song is where you can transform imagery. + Swap the original's specific references for your theme's metaphors + while keeping the emotional function (reflection, shift, revelation). +- Monosyllabic word swaps in hooks/tags are the cleanest way to + maintain rhythm while changing meaning. +- A strong vocal persona description in the style field makes a + bigger difference than any single metatag. +- Don't be precious about rules. If a line breaks meter but hits + harder, keep it. The feeling is what matters. Craft serves art, + not the other way around. diff --git a/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md new file mode 100644 index 0000000000..027156ccdd --- /dev/null +++ b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md @@ -0,0 +1,183 @@ +--- +title: "Jupyter Live Kernel — Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +sidebar_label: "Jupyter Live Kernel" +description: "Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Jupyter Live Kernel + +Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results — data science, ML experimentation, API exploration, or building up complex code step-by-step. Uses terminal to run CLI commands against a live Jupyter kernel. No new tools required. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/data-science/jupyter-live-kernel` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `jupyter`, `notebook`, `repl`, `data-science`, `exploration`, `iterative` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Jupyter Live Kernel (hamelnb) + +Gives you a **stateful Python REPL** via a live Jupyter kernel. Variables persist +across executions. Use this instead of `execute_code` when you need to build up +state incrementally, explore APIs, inspect DataFrames, or iterate on complex code. + +## When to Use This vs Other Tools + +| Tool | Use When | +|------|----------| +| **This skill** | Iterative exploration, state across steps, data science, ML, "let me try this and check" | +| `execute_code` | One-shot scripts needing hermes tool access (web_search, file ops). Stateless. | +| `terminal` | Shell commands, builds, installs, git, process management | + +**Rule of thumb:** If you'd want a Jupyter notebook for the task, use this skill. + +## Prerequisites + +1. **uv** must be installed (check: `which uv`) +2. **JupyterLab** must be installed: `uv tool install jupyterlab` +3. A Jupyter server must be running (see Setup below) + +## Setup + +The hamelnb script location: +``` +SCRIPT="$HOME/.agent-skills/hamelnb/skills/jupyter-live-kernel/scripts/jupyter_live_kernel.py" +``` + +If not cloned yet: +``` +git clone https://github.com/hamelsmu/hamelnb.git ~/.agent-skills/hamelnb +``` + +### Starting JupyterLab + +Check if a server is already running: +``` +uv run "$SCRIPT" servers +``` + +If no servers found, start one: +``` +jupyter-lab --no-browser --port=8888 --notebook-dir=$HOME/notebooks \ + --IdentityProvider.token='' --ServerApp.password='' > /tmp/jupyter.log 2>&1 & +sleep 3 +``` + +Note: Token/password disabled for local agent access. The server runs headless. + +### Creating a Notebook for REPL Use + +If you just need a REPL (no existing notebook), create a minimal notebook file: +``` +mkdir -p ~/notebooks +``` +Write a minimal .ipynb JSON file with one empty code cell, then start a kernel +session via the Jupyter REST API: +``` +curl -s -X POST http://127.0.0.1:8888/api/sessions \ + -H "Content-Type: application/json" \ + -d '{"path":"scratch.ipynb","type":"notebook","name":"scratch.ipynb","kernel":{"name":"python3"}}' +``` + +## Core Workflow + +All commands return structured JSON. Always use `--compact` to save tokens. + +### 1. Discover servers and notebooks + +``` +uv run "$SCRIPT" servers --compact +uv run "$SCRIPT" notebooks --compact +``` + +### 2. Execute code (primary operation) + +``` +uv run "$SCRIPT" execute --path --code '' --compact +``` + +State persists across execute calls. Variables, imports, objects all survive. + +Multi-line code works with $'...' quoting: +``` +uv run "$SCRIPT" execute --path scratch.ipynb --code $'import os\nfiles = os.listdir(".")\nprint(f"Found {len(files)} files")' --compact +``` + +### 3. Inspect live variables + +``` +uv run "$SCRIPT" variables --path list --compact +uv run "$SCRIPT" variables --path preview --name --compact +``` + +### 4. Edit notebook cells + +``` +# View current cells +uv run "$SCRIPT" contents --path --compact + +# Insert a new cell +uv run "$SCRIPT" edit --path insert \ + --at-index --cell-type code --source '' --compact + +# Replace cell source (use cell-id from contents output) +uv run "$SCRIPT" edit --path replace-source \ + --cell-id --source '' --compact + +# Delete a cell +uv run "$SCRIPT" edit --path delete --cell-id --compact +``` + +### 5. Verification (restart + run all) + +Only use when the user asks for a clean verification or you need to confirm +the notebook runs top-to-bottom: + +``` +uv run "$SCRIPT" restart-run-all --path --save-outputs --compact +``` + +## Practical Tips from Experience + +1. **First execution after server start may timeout** — the kernel needs a moment + to initialize. If you get a timeout, just retry. + +2. **The kernel Python is JupyterLab's Python** — packages must be installed in + that environment. If you need additional packages, install them into the + JupyterLab tool environment first. + +3. **--compact flag saves significant tokens** — always use it. JSON output can + be very verbose without it. + +4. **For pure REPL use**, create a scratch.ipynb and don't bother with cell editing. + Just use `execute` repeatedly. + +5. **Argument order matters** — subcommand flags like `--path` go BEFORE the + sub-subcommand. E.g.: `variables --path nb.ipynb list` not `variables list --path nb.ipynb`. + +6. **If a session doesn't exist yet**, you need to start one via the REST API + (see Setup section). The tool can't execute without a live kernel session. + +7. **Errors are returned as JSON** with traceback — read the `ename` and `evalue` + fields to understand what went wrong. + +8. **Occasional websocket timeouts** — some operations may timeout on first try, + especially after a kernel restart. Retry once before escalating. + +## Timeout Defaults + +The script has a 30-second default timeout per execution. For long-running +operations, pass `--timeout 120`. Use generous timeouts (60+) for initial +setup or heavy computation. diff --git a/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md new file mode 100644 index 0000000000..8b5b8ade8f --- /dev/null +++ b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md @@ -0,0 +1,221 @@ +--- +title: "Webhook Subscriptions" +sidebar_label: "Webhook Subscriptions" +description: "Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Webhook Subscriptions + +Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/devops/webhook-subscriptions` | +| Version | `1.1.0` | +| Tags | `webhook`, `events`, `automation`, `integrations`, `notifications`, `push` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Webhook Subscriptions + +Create dynamic webhook subscriptions so external services (GitHub, GitLab, Stripe, CI/CD, IoT sensors, monitoring tools) can trigger Hermes agent runs by POSTing events to a URL. + +## Setup (Required First) + +The webhook platform must be enabled before subscriptions can be created. Check with: +```bash +hermes webhook list +``` + +If it says "Webhook platform is not enabled", set it up: + +### Option 1: Setup wizard +```bash +hermes gateway setup +``` +Follow the prompts to enable webhooks, set the port, and set a global HMAC secret. + +### Option 2: Manual config +Add to `~/.hermes/config.yaml`: +```yaml +platforms: + webhook: + enabled: true + extra: + host: "0.0.0.0" + port: 8644 + secret: "generate-a-strong-secret-here" +``` + +### Option 3: Environment variables +Add to `~/.hermes/.env`: +```bash +WEBHOOK_ENABLED=true +WEBHOOK_PORT=8644 +WEBHOOK_SECRET=generate-a-strong-secret-here +``` + +After configuration, start (or restart) the gateway: +```bash +hermes gateway run +# Or if using systemd: +systemctl --user restart hermes-gateway +``` + +Verify it's running: +```bash +curl http://localhost:8644/health +``` + +## Commands + +All management is via the `hermes webhook` CLI command: + +### Create a subscription +```bash +hermes webhook subscribe \ + --prompt "Prompt template with {payload.fields}" \ + --events "event1,event2" \ + --description "What this does" \ + --skills "skill1,skill2" \ + --deliver telegram \ + --deliver-chat-id "12345" \ + --secret "optional-custom-secret" +``` + +Returns the webhook URL and HMAC secret. The user configures their service to POST to that URL. + +### List subscriptions +```bash +hermes webhook list +``` + +### Remove a subscription +```bash +hermes webhook remove +``` + +### Test a subscription +```bash +hermes webhook test +hermes webhook test --payload '{"key": "value"}' +``` + +## Prompt Templates + +Prompts support `{dot.notation}` for accessing nested payload fields: + +- `{issue.title}` — GitHub issue title +- `{pull_request.user.login}` — PR author +- `{data.object.amount}` — Stripe payment amount +- `{sensor.temperature}` — IoT sensor reading + +If no prompt is specified, the full JSON payload is dumped into the agent prompt. + +## Common Patterns + +### GitHub: new issues +```bash +hermes webhook subscribe github-issues \ + --events "issues" \ + --prompt "New GitHub issue #{issue.number}: {issue.title}\n\nAction: {action}\nAuthor: {issue.user.login}\nBody:\n{issue.body}\n\nPlease triage this issue." \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +Then in GitHub repo Settings → Webhooks → Add webhook: +- Payload URL: the returned webhook_url +- Content type: application/json +- Secret: the returned secret +- Events: "Issues" + +### GitHub: PR reviews +```bash +hermes webhook subscribe github-prs \ + --events "pull_request" \ + --prompt "PR #{pull_request.number} {action}: {pull_request.title}\nBy: {pull_request.user.login}\nBranch: {pull_request.head.ref}\n\n{pull_request.body}" \ + --skills "github-code-review" \ + --deliver github_comment +``` + +### Stripe: payment events +```bash +hermes webhook subscribe stripe-payments \ + --events "payment_intent.succeeded,payment_intent.payment_failed" \ + --prompt "Payment {data.object.status}: {data.object.amount} cents from {data.object.receipt_email}" \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +### CI/CD: build notifications +```bash +hermes webhook subscribe ci-builds \ + --events "pipeline" \ + --prompt "Build {object_attributes.status} on {project.name} branch {object_attributes.ref}\nCommit: {commit.message}" \ + --deliver discord \ + --deliver-chat-id "1234567890" +``` + +### Generic monitoring alert +```bash +hermes webhook subscribe alerts \ + --prompt "Alert: {alert.name}\nSeverity: {alert.severity}\nMessage: {alert.message}\n\nPlease investigate and suggest remediation." \ + --deliver origin +``` + +### Direct delivery (no agent, zero LLM cost) + +For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter. + +Use this for: +- External service push notifications (Supabase/Firebase webhooks → Telegram) +- Monitoring alerts that should forward verbatim +- Inter-agent pings where one agent is telling another agent's user something +- Any webhook where an LLM round trip would be wasted effort + +```bash +hermes webhook subscribe antenna-matches \ + --deliver telegram \ + --deliver-chat-id "123456789" \ + --deliver-only \ + --prompt "🎉 New match: {match.user_name} matched with you!" \ + --description "Antenna match notifications" +``` + +The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply. + +Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless. + +## Security + +- Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`) +- The webhook adapter validates signatures on every incoming POST +- Static routes from config.yaml cannot be overwritten by dynamic subscriptions +- Subscriptions persist to `~/.hermes/webhook_subscriptions.json` + +## How It Works + +1. `hermes webhook subscribe` writes to `~/.hermes/webhook_subscriptions.json` +2. The webhook adapter hot-reloads this file on each incoming request (mtime-gated, negligible overhead) +3. When a POST arrives matching a route, the adapter formats the prompt and triggers an agent run +4. The agent's response is delivered to the configured target (Telegram, Discord, GitHub comment, etc.) + +## Troubleshooting + +If webhooks aren't working: + +1. **Is the gateway running?** Check with `systemctl --user status hermes-gateway` or `ps aux | grep gateway` +2. **Is the webhook server listening?** `curl http://localhost:8644/health` should return `{"status": "ok"}` +3. **Check gateway logs:** `grep webhook ~/.hermes/logs/gateway.log | tail -20` +4. **Signature mismatch?** Verify the secret in your service matches the one from `hermes webhook list`. GitHub sends `X-Hub-Signature-256`, GitLab sends `X-Gitlab-Token`. +5. **Firewall/NAT?** The webhook URL must be reachable from the service. For local development, use a tunnel (ngrok, cloudflared). +6. **Wrong event type?** Check `--events` filter matches what the service sends. Use `hermes webhook test ` to verify the route works. diff --git a/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md new file mode 100644 index 0000000000..0ff7e72d9d --- /dev/null +++ b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md @@ -0,0 +1,178 @@ +--- +title: "Dogfood" +sidebar_label: "Dogfood" +description: "Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Dogfood + +Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/dogfood` | +| Version | `1.0.0` | +| Tags | `qa`, `testing`, `browser`, `web`, `dogfood` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Dogfood: Systematic Web Application QA Testing + +## Overview + +This skill guides you through systematic exploratory QA testing of web applications using the browser toolset. You will navigate the application, interact with elements, capture evidence of issues, and produce a structured bug report. + +## Prerequisites + +- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`) +- A target URL and testing scope from the user + +## Inputs + +The user provides: +1. **Target URL** — the entry point for testing +2. **Scope** — what areas/features to focus on (or "full site" for comprehensive testing) +3. **Output directory** (optional) — where to save screenshots and the report (default: `./dogfood-output`) + +## Workflow + +Follow this 5-phase systematic workflow: + +### Phase 1: Plan + +1. Create the output directory structure: + ``` + {output_dir}/ + ├── screenshots/ # Evidence screenshots + └── report.md # Final report (generated in Phase 5) + ``` +2. Identify the testing scope based on user input. +3. Build a rough sitemap by planning which pages and features to test: + - Landing/home page + - Navigation links (header, footer, sidebar) + - Key user flows (sign up, login, search, checkout, etc.) + - Forms and interactive elements + - Edge cases (empty states, error pages, 404s) + +### Phase 2: Explore + +For each page or feature in your plan: + +1. **Navigate** to the page: + ``` + browser_navigate(url="https://example.com/page") + ``` + +2. **Take a snapshot** to understand the DOM structure: + ``` + browser_snapshot() + ``` + +3. **Check the console** for JavaScript errors: + ``` + browser_console(clear=true) + ``` + Do this after every navigation and after every significant interaction. Silent JS errors are high-value findings. + +4. **Take an annotated screenshot** to visually assess the page and identify interactive elements: + ``` + browser_vision(question="Describe the page layout, identify any visual issues, broken elements, or accessibility concerns", annotate=true) + ``` + The `annotate=true` flag overlays numbered `[N]` labels on interactive elements. Each `[N]` maps to ref `@eN` for subsequent browser commands. + +5. **Test interactive elements** systematically: + - Click buttons and links: `browser_click(ref="@eN")` + - Fill forms: `browser_type(ref="@eN", text="test input")` + - Test keyboard navigation: `browser_press(key="Tab")`, `browser_press(key="Enter")` + - Scroll through content: `browser_scroll(direction="down")` + - Test form validation with invalid inputs + - Test empty submissions + +6. **After each interaction**, check for: + - Console errors: `browser_console()` + - Visual changes: `browser_vision(question="What changed after the interaction?")` + - Expected vs actual behavior + +### Phase 3: Collect Evidence + +For every issue found: + +1. **Take a screenshot** showing the issue: + ``` + browser_vision(question="Capture and describe the issue visible on this page", annotate=false) + ``` + Save the `screenshot_path` from the response — you will reference it in the report. + +2. **Record the details**: + - URL where the issue occurs + - Steps to reproduce + - Expected behavior + - Actual behavior + - Console errors (if any) + - Screenshot path + +3. **Classify the issue** using the issue taxonomy (see `references/issue-taxonomy.md`): + - Severity: Critical / High / Medium / Low + - Category: Functional / Visual / Accessibility / Console / UX / Content + +### Phase 4: Categorize + +1. Review all collected issues. +2. De-duplicate — merge issues that are the same bug manifesting in different places. +3. Assign final severity and category to each issue. +4. Sort by severity (Critical first, then High, Medium, Low). +5. Count issues by severity and category for the executive summary. + +### Phase 5: Report + +Generate the final report using the template at `templates/dogfood-report-template.md`. + +The report must include: +1. **Executive summary** with total issue count, breakdown by severity, and testing scope +2. **Per-issue sections** with: + - Issue number and title + - Severity and category badges + - URL where observed + - Description of the issue + - Steps to reproduce + - Expected vs actual behavior + - Screenshot references (use `MEDIA:` for inline images) + - Console errors if relevant +3. **Summary table** of all issues +4. **Testing notes** — what was tested, what was not, any blockers + +Save the report to `{output_dir}/report.md`. + +## Tools Reference + +| Tool | Purpose | +|------|---------| +| `browser_navigate` | Go to a URL | +| `browser_snapshot` | Get DOM text snapshot (accessibility tree) | +| `browser_click` | Click an element by ref (`@eN`) or text | +| `browser_type` | Type into an input field | +| `browser_scroll` | Scroll up/down on the page | +| `browser_back` | Go back in browser history | +| `browser_press` | Press a keyboard key | +| `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels | +| `browser_console` | Get JS console output and errors | + +## Tips + +- **Always check `browser_console()` after navigating and after significant interactions.** Silent JS errors are among the most valuable findings. +- **Use `annotate=true` with `browser_vision`** when you need to reason about interactive element positions or when the snapshot refs are unclear. +- **Test with both valid and invalid inputs** — form validation bugs are common. +- **Scroll through long pages** — content below the fold may have rendering issues. +- **Test navigation flows** — click through multi-step processes end-to-end. +- **Check responsive behavior** by noting any layout issues visible in screenshots. +- **Don't forget edge cases**: empty states, very long text, special characters, rapid clicking. +- When reporting screenshots to the user, include `MEDIA:` so they can see the evidence inline. diff --git a/website/docs/user-guide/skills/bundled/email/email-himalaya.md b/website/docs/user-guide/skills/bundled/email/email-himalaya.md new file mode 100644 index 0000000000..55178bdc98 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/email/email-himalaya.md @@ -0,0 +1,293 @@ +--- +title: "Himalaya — CLI to manage emails via IMAP/SMTP" +sidebar_label: "Himalaya" +description: "CLI to manage emails via IMAP/SMTP" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Himalaya + +CLI to manage emails via IMAP/SMTP. Use himalaya to list, read, write, reply, forward, search, and organize emails from the terminal. Supports multiple accounts and message composition with MML (MIME Meta Language). + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/email/himalaya` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Email`, `IMAP`, `SMTP`, `CLI`, `Communication` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Himalaya Email CLI + +Himalaya is a CLI email client that lets you manage emails from the terminal using IMAP, SMTP, Notmuch, or Sendmail backends. + +## References + +- `references/configuration.md` (config file setup + IMAP/SMTP authentication) +- `references/message-composition.md` (MML syntax for composing emails) + +## Prerequisites + +1. Himalaya CLI installed (`himalaya --version` to verify) +2. A configuration file at `~/.config/himalaya/config.toml` +3. IMAP/SMTP credentials configured (password stored securely) + +### Installation + +```bash +# Pre-built binary (Linux/macOS — recommended) +curl -sSL https://raw.githubusercontent.com/pimalaya/himalaya/master/install.sh | PREFIX=~/.local sh + +# macOS via Homebrew +brew install himalaya + +# Or via cargo (any platform with Rust) +cargo install himalaya --locked +``` + +## Configuration Setup + +Run the interactive wizard to set up an account: + +```bash +himalaya account configure +``` + +Or create `~/.config/himalaya/config.toml` manually: + +```toml +[accounts.personal] +email = "you@example.com" +display-name = "Your Name" +default = true + +backend.type = "imap" +backend.host = "imap.example.com" +backend.port = 993 +backend.encryption.type = "tls" +backend.login = "you@example.com" +backend.auth.type = "password" +backend.auth.cmd = "pass show email/imap" # or use keyring + +message.send.backend.type = "smtp" +message.send.backend.host = "smtp.example.com" +message.send.backend.port = 587 +message.send.backend.encryption.type = "start-tls" +message.send.backend.login = "you@example.com" +message.send.backend.auth.type = "password" +message.send.backend.auth.cmd = "pass show email/smtp" +``` + +## Hermes Integration Notes + +- **Reading, listing, searching, moving, deleting** all work directly through the terminal tool +- **Composing/replying/forwarding** — piped input (`cat << EOF | himalaya template send`) is recommended for reliability. Interactive `$EDITOR` mode works with `pty=true` + background + process tool, but requires knowing the editor and its commands +- Use `--output json` for structured output that's easier to parse programmatically +- The `himalaya account configure` wizard requires interactive input — use PTY mode: `terminal(command="himalaya account configure", pty=true)` + +## Common Operations + +### List Folders + +```bash +himalaya folder list +``` + +### List Emails + +List emails in INBOX (default): + +```bash +himalaya envelope list +``` + +List emails in a specific folder: + +```bash +himalaya envelope list --folder "Sent" +``` + +List with pagination: + +```bash +himalaya envelope list --page 1 --page-size 20 +``` + +### Search Emails + +```bash +himalaya envelope list from john@example.com subject meeting +``` + +### Read an Email + +Read email by ID (shows plain text): + +```bash +himalaya message read 42 +``` + +Export raw MIME: + +```bash +himalaya message export 42 --full +``` + +### Reply to an Email + +To reply non-interactively from Hermes, read the original message, compose a reply, and pipe it: + +```bash +# Get the reply template, edit it, and send +himalaya template reply 42 | sed 's/^$/\nYour reply text here\n/' | himalaya template send +``` + +Or build the reply manually: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: sender@example.com +Subject: Re: Original Subject +In-Reply-To: + +Your reply here. +EOF +``` + +Reply-all (interactive — needs $EDITOR, use template approach above instead): + +```bash +himalaya message reply 42 --all +``` + +### Forward an Email + +```bash +# Get forward template and pipe with modifications +himalaya template forward 42 | sed 's/^To:.*/To: newrecipient@example.com/' | himalaya template send +``` + +### Write a New Email + +**Non-interactive (use this from Hermes)** — pipe the message via stdin: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: recipient@example.com +Subject: Test Message + +Hello from Himalaya! +EOF +``` + +Or with headers flag: + +```bash +himalaya message write -H "To:recipient@example.com" -H "Subject:Test" "Message body here" +``` + +Note: `himalaya message write` without piped input opens `$EDITOR`. This works with `pty=true` + background mode, but piping is simpler and more reliable. + +### Move/Copy Emails + +Move to folder: + +```bash +himalaya message move 42 "Archive" +``` + +Copy to folder: + +```bash +himalaya message copy 42 "Important" +``` + +### Delete an Email + +```bash +himalaya message delete 42 +``` + +### Manage Flags + +Add flag: + +```bash +himalaya flag add 42 --flag seen +``` + +Remove flag: + +```bash +himalaya flag remove 42 --flag seen +``` + +## Multiple Accounts + +List accounts: + +```bash +himalaya account list +``` + +Use a specific account: + +```bash +himalaya --account work envelope list +``` + +## Attachments + +Save attachments from a message: + +```bash +himalaya attachment download 42 +``` + +Save to specific directory: + +```bash +himalaya attachment download 42 --dir ~/Downloads +``` + +## Output Formats + +Most commands support `--output` for structured output: + +```bash +himalaya envelope list --output json +himalaya envelope list --output plain +``` + +## Debugging + +Enable debug logging: + +```bash +RUST_LOG=debug himalaya envelope list +``` + +Full trace with backtrace: + +```bash +RUST_LOG=trace RUST_BACKTRACE=1 himalaya envelope list +``` + +## Tips + +- Use `himalaya --help` or `himalaya --help` for detailed usage. +- Message IDs are relative to the current folder; re-list after folder changes. +- For composing rich emails with attachments, use MML syntax (see `references/message-composition.md`). +- Store passwords securely using `pass`, system keyring, or a command that outputs the password. diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md new file mode 100644 index 0000000000..d85495a181 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md @@ -0,0 +1,205 @@ +--- +title: "Minecraft Modpack Server — Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +sidebar_label: "Minecraft Modpack Server" +description: "Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Minecraft Modpack Server + +Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip. Covers NeoForge/Forge install, Java version, JVM tuning, firewall, LAN config, backups, and launch scripts. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/minecraft-modpack-server` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Minecraft Modpack Server Setup + +## When to use +- User wants to set up a modded Minecraft server from a server pack zip +- User needs help with NeoForge/Forge server configuration +- User asks about Minecraft server performance tuning or backups + +## Gather User Preferences First +Before starting setup, ask the user for: +- **Server name / MOTD** — what should it say in the server list? +- **Seed** — specific seed or random? +- **Difficulty** — peaceful / easy / normal / hard? +- **Gamemode** — survival / creative / adventure? +- **Online mode** — true (Mojang auth, legit accounts) or false (LAN/cracked friendly)? +- **Player count** — how many players expected? (affects RAM & view distance tuning) +- **RAM allocation** — or let agent decide based on mod count & available RAM? +- **View distance / simulation distance** — or let agent pick based on player count & hardware? +- **PvP** — on or off? +- **Whitelist** — open server or whitelist only? +- **Backups** — want automated backups? How often? + +Use sensible defaults if the user doesn't care, but always ask before generating the config. + +## Steps + +### 1. Download & Inspect the Pack +```bash +mkdir -p ~/minecraft-server +cd ~/minecraft-server +wget -O serverpack.zip "" +unzip -o serverpack.zip -d server +ls server/ +``` +Look for: `startserver.sh`, installer jar (neoforge/forge), `user_jvm_args.txt`, `mods/` folder. +Check the script to determine: mod loader type, version, and required Java version. + +### 2. Install Java +- Minecraft 1.21+ → Java 21: `sudo apt install openjdk-21-jre-headless` +- Minecraft 1.18-1.20 → Java 17: `sudo apt install openjdk-17-jre-headless` +- Minecraft 1.16 and below → Java 8: `sudo apt install openjdk-8-jre-headless` +- Verify: `java -version` + +### 3. Install the Mod Loader +Most server packs include an install script. Use the INSTALL_ONLY env var to install without launching: +```bash +cd ~/minecraft-server/server +ATM10_INSTALL_ONLY=true bash startserver.sh +# Or for generic Forge packs: +# java -jar forge-*-installer.jar --installServer +``` +This downloads libraries, patches the server jar, etc. + +### 4. Accept EULA +```bash +echo "eula=true" > ~/minecraft-server/server/eula.txt +``` + +### 5. Configure server.properties +Key settings for modded/LAN: +```properties +motd=\u00a7b\u00a7lServer Name \u00a7r\u00a78| \u00a7aModpack Name +server-port=25565 +online-mode=true # false for LAN without Mojang auth +enforce-secure-profile=true # match online-mode +difficulty=hard # most modpacks balance around hard +allow-flight=true # REQUIRED for modded (flying mounts/items) +spawn-protection=0 # let everyone build at spawn +max-tick-time=180000 # modded needs longer tick timeout +enable-command-block=true +``` + +Performance settings (scale to hardware): +```properties +# 2 players, beefy machine: +view-distance=16 +simulation-distance=10 + +# 4-6 players, moderate machine: +view-distance=10 +simulation-distance=6 + +# 8+ players or weaker hardware: +view-distance=8 +simulation-distance=4 +``` + +### 6. Tune JVM Args (user_jvm_args.txt) +Scale RAM to player count and mod count. Rule of thumb for modded: +- 100-200 mods: 6-12GB +- 200-350+ mods: 12-24GB +- Leave at least 8GB free for the OS/other tasks + +``` +-Xms12G +-Xmx24G +-XX:+UseG1GC +-XX:+ParallelRefProcEnabled +-XX:MaxGCPauseMillis=200 +-XX:+UnlockExperimentalVMOptions +-XX:+DisableExplicitGC +-XX:+AlwaysPreTouch +-XX:G1NewSizePercent=30 +-XX:G1MaxNewSizePercent=40 +-XX:G1HeapRegionSize=8M +-XX:G1ReservePercent=20 +-XX:G1HeapWastePercent=5 +-XX:G1MixedGCCountTarget=4 +-XX:InitiatingHeapOccupancyPercent=15 +-XX:G1MixedGCLiveThresholdPercent=90 +-XX:G1RSetUpdatingPauseTimePercent=5 +-XX:SurvivorRatio=32 +-XX:+PerfDisableSharedMem +-XX:MaxTenuringThreshold=1 +``` + +### 7. Open Firewall +```bash +sudo ufw allow 25565/tcp comment "Minecraft Server" +``` +Check with: `sudo ufw status | grep 25565` + +### 8. Create Launch Script +```bash +cat > ~/start-minecraft.sh << 'EOF' +#!/bin/bash +cd ~/minecraft-server/server +java @user_jvm_args.txt @libraries/net/neoforged/neoforge//unix_args.txt nogui +EOF +chmod +x ~/start-minecraft.sh +``` +Note: For Forge (not NeoForge), the args file path differs. Check `startserver.sh` for the exact path. + +### 9. Set Up Automated Backups +Create backup script: +```bash +cat > ~/minecraft-server/backup.sh << 'SCRIPT' +#!/bin/bash +SERVER_DIR="$HOME/minecraft-server/server" +BACKUP_DIR="$HOME/minecraft-server/backups" +WORLD_DIR="$SERVER_DIR/world" +MAX_BACKUPS=24 +mkdir -p "$BACKUP_DIR" +[ ! -d "$WORLD_DIR" ] && echo "[BACKUP] No world folder" && exit 0 +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +BACKUP_FILE="$BACKUP_DIR/world_${TIMESTAMP}.tar.gz" +echo "[BACKUP] Starting at $(date)" +tar -czf "$BACKUP_FILE" -C "$SERVER_DIR" world +SIZE=$(du -h "$BACKUP_FILE" | cut -f1) +echo "[BACKUP] Saved: $BACKUP_FILE ($SIZE)" +BACKUP_COUNT=$(ls -1t "$BACKUP_DIR"/world_*.tar.gz 2>/dev/null | wc -l) +if [ "$BACKUP_COUNT" -gt "$MAX_BACKUPS" ]; then + REMOVE=$((BACKUP_COUNT - MAX_BACKUPS)) + ls -1t "$BACKUP_DIR"/world_*.tar.gz | tail -n "$REMOVE" | xargs rm -f + echo "[BACKUP] Pruned $REMOVE old backup(s)" +fi +echo "[BACKUP] Done at $(date)" +SCRIPT +chmod +x ~/minecraft-server/backup.sh +``` + +Add hourly cron: +```bash +(crontab -l 2>/dev/null | grep -v "minecraft/backup.sh"; echo "0 * * * * $HOME/minecraft-server/backup.sh >> $HOME/minecraft-server/backups/backup.log 2>&1") | crontab - +``` + +## Pitfalls +- ALWAYS set `allow-flight=true` for modded — mods with jetpacks/flight will kick players otherwise +- `max-tick-time=180000` or higher — modded servers often have long ticks during worldgen +- First startup is SLOW (several minutes for big packs) — don't panic +- "Can't keep up!" warnings on first launch are normal, settles after initial chunk gen +- If online-mode=false, set enforce-secure-profile=false too or clients get rejected +- The pack's startserver.sh often has an auto-restart loop — make a clean launch script without it +- Delete the world/ folder to regenerate with a new seed +- Some packs have env vars to control behavior (e.g., ATM10 uses ATM10_JAVA, ATM10_RESTART, ATM10_INSTALL_ONLY) + +## Verification +- `pgrep -fa neoforge` or `pgrep -fa minecraft` to check if running +- Check logs: `tail -f ~/minecraft-server/server/logs/latest.log` +- Look for "Done (Xs)!" in the log = server is ready +- Test connection: player adds server IP in Multiplayer diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md new file mode 100644 index 0000000000..ab070f8671 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md @@ -0,0 +1,235 @@ +--- +title: "Pokemon Player — Play Pokemon games autonomously via headless emulation" +sidebar_label: "Pokemon Player" +description: "Play Pokemon games autonomously via headless emulation" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pokemon Player + +Play Pokemon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/pokemon-player` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pokemon Player + +Play Pokemon games via headless emulation using the `pokemon-agent` package. + +## When to Use +- User says "play pokemon", "start pokemon", "pokemon game" +- User asks about Pokemon Red, Blue, Yellow, FireRed, etc. +- User wants to watch an AI play Pokemon +- User references a ROM file (.gb, .gbc, .gba) + +## Startup Procedure + +### 1. First-time setup (clone, venv, install) +The repo is NousResearch/pokemon-agent on GitHub. Clone it, then +set up a Python 3.10+ virtual environment. Use uv (preferred for speed) +to create the venv and install the package in editable mode with the +pyboy extra. If uv is not available, fall back to python3 -m venv + pip. + +On this machine it is already set up at /home/teknium/pokemon-agent +with a venv ready — just cd there and source .venv/bin/activate. + +You also need a ROM file. Ask the user for theirs. On this machine +one exists at roms/pokemon_red.gb inside that directory. +NEVER download or provide ROM files — always ask the user. + +### 2. Start the game server +From inside the pokemon-agent directory with the venv activated, run +pokemon-agent serve with --rom pointing to the ROM and --port 9876. +Run it in the background with &. +To resume from a saved game, add --load-state with the save name. +Wait 4 seconds for startup, then verify with GET /health. + +### 3. Set up live dashboard for user to watch +Use an SSH reverse tunnel via localhost.run so the user can view +the dashboard in their browser. Connect with ssh, forwarding local +port 9876 to remote port 80 on nokey@localhost.run. Redirect output +to a log file, wait 10 seconds, then grep the log for the .lhr.life +URL. Give the user the URL with /dashboard/ appended. +The tunnel URL changes each time — give the user the new one if restarted. + +## Save and Load + +### When to save +- Every 15-20 turns of gameplay +- ALWAYS before gym battles, rival encounters, or risky fights +- Before entering a new town or dungeon +- Before any action you are unsure about + +### How to save +POST /save with a descriptive name. Good examples: +before_brock, route1_start, mt_moon_entrance, got_cut + +### How to load +POST /load with the save name. + +### List available saves +GET /saves returns all saved states. + +### Loading on server startup +Use --load-state flag when starting the server to auto-load a save. +This is faster than loading via the API after startup. + +## The Gameplay Loop + +### Step 1: OBSERVE — check state AND take a screenshot +GET /state for position, HP, battle, dialog. +GET /screenshot and save to /tmp/pokemon.png, then use vision_analyze. +Always do BOTH — RAM state gives numbers, vision gives spatial awareness. + +### Step 2: ORIENT +- Dialog/text on screen → advance it +- In battle → fight or run +- Party hurt → head to Pokemon Center +- Near objective → navigate carefully + +### Step 3: DECIDE +Priority: dialog > battle > heal > story objective > training > explore + +### Step 4: ACT — move 2-4 steps max, then re-check +POST /action with a SHORT action list (2-4 actions, not 10-15). + +### Step 5: VERIFY — screenshot after every move sequence +Take a screenshot and use vision_analyze to confirm you moved where +intended. This is the MOST IMPORTANT step. Without vision you WILL get lost. + +### Step 6: RECORD progress to memory with PKM: prefix + +### Step 7: SAVE periodically + +## Action Reference +- press_a — confirm, talk, select +- press_b — cancel, close menu +- press_start — open game menu +- walk_up/down/left/right — move one tile +- hold_b_N — hold B for N frames (use for speeding through text) +- wait_60 — wait about 1 second (60 frames) +- a_until_dialog_end — press A repeatedly until dialog clears + +## Critical Tips from Experience + +### USE VISION CONSTANTLY +- Take a screenshot every 2-4 movement steps +- The RAM state tells you position and HP but NOT what is around you +- Ledges, fences, signs, building doors, NPCs — only visible via screenshot +- Ask the vision model specific questions: "what is one tile north of me?" +- When stuck, always screenshot before trying random directions + +### Warp Transitions Need Extra Wait Time +When walking through a door or stairs, the screen fades to black during +the map transition. You MUST wait for it to complete. Add 2-3 wait_60 +actions after any door/stair warp. Without waiting, the position reads +as stale and you will think you are still in the old map. + +### Building Exit Trap +When you exit a building, you appear directly IN FRONT of the door. +If you walk north, you go right back inside. ALWAYS sidestep first +by walking left or right 2 tiles, then proceed in your intended direction. + +### Dialog Handling +Gen 1 text scrolls slowly letter-by-letter. To speed through dialog, +hold B for 120 frames then press A. Repeat as needed. Holding B makes +text display at max speed. Then press A to advance to the next line. +The a_until_dialog_end action checks the RAM dialog flag, but this flag +does not catch ALL text states. If dialog seems stuck, use the manual +hold_b + press_a pattern instead and verify via screenshot. + +### Ledges Are One-Way +Ledges (small cliff edges) can only be jumped DOWN (south), never climbed +UP (north). If blocked by a ledge going north, you must go left or right +to find the gap around it. Use vision to identify which direction the +gap is. Ask the vision model explicitly. + +### Navigation Strategy +- Move 2-4 steps at a time, then screenshot to check position +- When entering a new area, screenshot immediately to orient +- Ask the vision model "which direction to [destination]?" +- If stuck for 3+ attempts, screenshot and re-evaluate completely +- Do not spam 10-15 movements — you will overshoot or get stuck + +### Running from Wild Battles +On the battle menu, RUN is bottom-right. To reach it from the default +cursor position (FIGHT, top-left): press down then right to move cursor +to RUN, then press A. Wrap with hold_b to speed through text/animations. + +### Battling (FIGHT) +On the battle menu FIGHT is top-left (default cursor position). +Press A to enter move selection, A again to use the first move. +Then hold B to speed through attack animations and text. + +## Battle Strategy + +### Decision Tree +1. Want to catch? → Weaken then throw Poke Ball +2. Wild you don't need? → RUN +3. Type advantage? → Use super-effective move +4. No advantage? → Use strongest STAB move +5. Low HP? → Switch or use Potion + +### Gen 1 Type Chart (key matchups) +- Water beats Fire, Ground, Rock +- Fire beats Grass, Bug, Ice +- Grass beats Water, Ground, Rock +- Electric beats Water, Flying +- Ground beats Fire, Electric, Rock, Poison +- Psychic beats Fighting, Poison (dominant in Gen 1!) + +### Gen 1 Quirks +- Special stat = both offense AND defense for special moves +- Psychic type is overpowered (Ghost moves bugged) +- Critical hits based on Speed stat +- Wrap/Bind prevent opponent from acting +- Focus Energy bug: REDUCES crit rate instead of raising it + +## Memory Conventions +| Prefix | Purpose | Example | +|--------|---------|---------| +| PKM:OBJECTIVE | Current goal | Get Parcel from Viridian Mart | +| PKM:MAP | Navigation knowledge | Viridian: mart is northeast | +| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty | +| PKM:PROGRESS | Milestone tracker | Beat rival, heading to Viridian | +| PKM:STUCK | Stuck situations | Ledge at y=28 go right to bypass | +| PKM:TEAM | Team notes | Squirtle Lv6, Tackle + Tail Whip | + +## Progression Milestones +- Choose starter +- Deliver Parcel from Viridian Mart, receive Pokedex +- Boulder Badge — Brock (Rock) → use Water/Grass +- Cascade Badge — Misty (Water) → use Grass/Electric +- Thunder Badge — Lt. Surge (Electric) → use Ground +- Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying +- Soul Badge — Koga (Poison) → use Ground/Psychic +- Marsh Badge — Sabrina (Psychic) → hardest gym +- Volcano Badge — Blaine (Fire) → use Water/Ground +- Earth Badge — Giovanni (Ground) → use Water/Grass/Ice +- Elite Four → Champion! + +## Stopping Play +1. Save the game with a descriptive name via POST /save +2. Update memory with PKM:PROGRESS +3. Tell user: "Game saved as [name]! Say 'play pokemon' to resume." +4. Kill the server and tunnel background processes + +## Pitfalls +- NEVER download or provide ROM files +- Do NOT send more than 4-5 actions without checking vision +- Always sidestep after exiting buildings before going north +- Always add wait_60 x2-3 after door/stair warps +- Dialog detection via RAM is unreliable — verify with screenshots +- Save BEFORE risky encounters +- The tunnel URL changes each time you restart it diff --git a/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md new file mode 100644 index 0000000000..13c3fe4425 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md @@ -0,0 +1,131 @@ +--- +title: "Codebase Inspection" +sidebar_label: "Codebase Inspection" +description: "Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Codebase Inspection + +Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios. Use when asked to check lines of code, repo size, language composition, or codebase stats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/codebase-inspection` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `LOC`, `Code Analysis`, `pygount`, `Codebase`, `Metrics`, `Repository` | +| Related skills | [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Codebase Inspection with pygount + +Analyze repositories for lines of code, language breakdown, file counts, and code-vs-comment ratios using `pygount`. + +## When to Use + +- User asks for LOC (lines of code) count +- User wants a language breakdown of a repo +- User asks about codebase size or composition +- User wants code-vs-comment ratios +- General "how big is this repo" questions + +## Prerequisites + +```bash +pip install --break-system-packages pygount 2>/dev/null || pip install pygount +``` + +## 1. Basic Summary (Most Common) + +Get a full language breakdown with file counts, code lines, and comment lines: + +```bash +cd /path/to/repo +pygount --format=summary \ + --folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,.eggs,*.egg-info" \ + . +``` + +**IMPORTANT:** Always use `--folders-to-skip` to exclude dependency/build directories, otherwise pygount will crawl them and take a very long time or hang. + +## 2. Common Folder Exclusions + +Adjust based on the project type: + +```bash +# Python projects +--folders-to-skip=".git,venv,.venv,__pycache__,.cache,dist,build,.tox,.eggs,.mypy_cache" + +# JavaScript/TypeScript projects +--folders-to-skip=".git,node_modules,dist,build,.next,.cache,.turbo,coverage" + +# General catch-all +--folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,vendor,third_party" +``` + +## 3. Filter by Specific Language + +```bash +# Only count Python files +pygount --suffix=py --format=summary . + +# Only count Python and YAML +pygount --suffix=py,yaml,yml --format=summary . +``` + +## 4. Detailed File-by-File Output + +```bash +# Default format shows per-file breakdown +pygount --folders-to-skip=".git,node_modules,venv" . + +# Sort by code lines (pipe through sort) +pygount --folders-to-skip=".git,node_modules,venv" . | sort -t$'\t' -k1 -nr | head -20 +``` + +## 5. Output Formats + +```bash +# Summary table (default recommendation) +pygount --format=summary . + +# JSON output for programmatic use +pygount --format=json . + +# Pipe-friendly: Language, file count, code, docs, empty, string +pygount --format=summary . 2>/dev/null +``` + +## 6. Interpreting Results + +The summary table columns: +- **Language** — detected programming language +- **Files** — number of files of that language +- **Code** — lines of actual code (executable/declarative) +- **Comment** — lines that are comments or documentation +- **%** — percentage of total + +Special pseudo-languages: +- `__empty__` — empty files +- `__binary__` — binary files (images, compiled, etc.) +- `__generated__` — auto-generated files (detected heuristically) +- `__duplicate__` — files with identical content +- `__unknown__` — unrecognized file types + +## Pitfalls + +1. **Always exclude .git, node_modules, venv** — without `--folders-to-skip`, pygount will crawl everything and may take minutes or hang on large dependency trees. +2. **Markdown shows 0 code lines** — pygount classifies all Markdown content as comments, not code. This is expected behavior. +3. **JSON files show low code counts** — pygount may count JSON lines conservatively. For accurate JSON line counts, use `wc -l` directly. +4. **Large monorepos** — for very large repos, consider using `--suffix` to target specific languages rather than scanning everything. diff --git a/website/docs/user-guide/skills/bundled/github/github-github-auth.md b/website/docs/user-guide/skills/bundled/github/github-github-auth.md new file mode 100644 index 0000000000..4f7360c43e --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-auth.md @@ -0,0 +1,264 @@ +--- +title: "Github Auth — Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +sidebar_label: "Github Auth" +description: "Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Auth + +Set up GitHub authentication for the agent using git (universally available) or the gh CLI. Covers HTTPS tokens, SSH keys, credential helpers, and gh auth — with a detection flow to pick the right method automatically. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-auth` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Authentication`, `Git`, `gh-cli`, `SSH`, `Setup` | +| Related skills | [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues), [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Authentication Setup + +This skill sets up authentication so the agent can work with GitHub repositories, PRs, issues, and CI. It covers two paths: + +- **`git` (always available)** — uses HTTPS personal access tokens or SSH keys +- **`gh` CLI (if installed)** — richer GitHub API access with a simpler auth flow + +## Detection Flow + +When a user asks you to work with GitHub, run this check first: + +```bash +# Check what's available +git --version +gh --version 2>/dev/null || echo "gh not installed" + +# Check if already authenticated +gh auth status 2>/dev/null || echo "gh not authenticated" +git config --global credential.helper 2>/dev/null || echo "no git credential helper" +``` + +**Decision tree:** +1. If `gh auth status` shows authenticated → you're good, use `gh` for everything +2. If `gh` is installed but not authenticated → use "gh auth" method below +3. If `gh` is not installed → use "git-only" method below (no sudo needed) + +--- + +## Method 1: Git-Only Authentication (No gh, No sudo) + +This works on any machine with `git` installed. No root access needed. + +### Option A: HTTPS with Personal Access Token (Recommended) + +This is the most portable method — works everywhere, no SSH config needed. + +**Step 1: Create a personal access token** + +Tell the user to go to: **https://github.com/settings/tokens** + +- Click "Generate new token (classic)" +- Give it a name like "hermes-agent" +- Select scopes: + - `repo` (full repository access — read, write, push, PRs) + - `workflow` (trigger and manage GitHub Actions) + - `read:org` (if working with organization repos) +- Set expiration (90 days is a good default) +- Copy the token — it won't be shown again + +**Step 2: Configure git to store the token** + +```bash +# Set up the credential helper to cache credentials +# "store" saves to ~/.git-credentials in plaintext (simple, persistent) +git config --global credential.helper store + +# Now do a test operation that triggers auth — git will prompt for credentials +# Username: +# Password: +git ls-remote https://github.com//.git +``` + +After entering credentials once, they're saved and reused for all future operations. + +**Alternative: cache helper (credentials expire from memory)** + +```bash +# Cache in memory for 8 hours (28800 seconds) instead of saving to disk +git config --global credential.helper 'cache --timeout=28800' +``` + +**Alternative: set the token directly in the remote URL (per-repo)** + +```bash +# Embed token in the remote URL (avoids credential prompts entirely) +git remote set-url origin https://:@github.com//.git +``` + +**Step 3: Configure git identity** + +```bash +# Required for commits — set name and email +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +**Step 4: Verify** + +```bash +# Test push access (this should work without any prompts now) +git ls-remote https://github.com//.git + +# Verify identity +git config --global user.name +git config --global user.email +``` + +### Option B: SSH Key Authentication + +Good for users who prefer SSH or already have keys set up. + +**Step 1: Check for existing SSH keys** + +```bash +ls -la ~/.ssh/id_*.pub 2>/dev/null || echo "No SSH keys found" +``` + +**Step 2: Generate a key if needed** + +```bash +# Generate an ed25519 key (modern, secure, fast) +ssh-keygen -t ed25519 -C "their-email@example.com" -f ~/.ssh/id_ed25519 -N "" + +# Display the public key for them to add to GitHub +cat ~/.ssh/id_ed25519.pub +``` + +Tell the user to add the public key at: **https://github.com/settings/keys** +- Click "New SSH key" +- Paste the public key content +- Give it a title like "hermes-agent-<machine-name>" + +**Step 3: Test the connection** + +```bash +ssh -T git@github.com +# Expected: "Hi ! You've successfully authenticated..." +``` + +**Step 4: Configure git to use SSH for GitHub** + +```bash +# Rewrite HTTPS GitHub URLs to SSH automatically +git config --global url."git@github.com:".insteadOf "https://github.com/" +``` + +**Step 5: Configure git identity** + +```bash +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +--- + +## Method 2: gh CLI Authentication + +If `gh` is installed, it handles both API access and git credentials in one step. + +### Interactive Browser Login (Desktop) + +```bash +gh auth login +# Select: GitHub.com +# Select: HTTPS +# Authenticate via browser +``` + +### Token-Based Login (Headless / SSH Servers) + +```bash +echo "" | gh auth login --with-token + +# Set up git credentials through gh +gh auth setup-git +``` + +### Verify + +```bash +gh auth status +``` + +--- + +## Using the GitHub API Without gh + +When `gh` is not available, you can still access the full GitHub API using `curl` with a personal access token. This is how the other GitHub skills implement their fallbacks. + +### Setting the Token for API Calls + +```bash +# Option 1: Export as env var (preferred — keeps it out of commands) +export GITHUB_TOKEN="" + +# Then use in curl calls: +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user +``` + +### Extracting the Token from Git Credentials + +If git credentials are already configured (via credential.helper store), the token can be extracted: + +```bash +# Read from git credential store +grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|' +``` + +### Helper: Detect Auth Method + +Use this pattern at the start of any GitHub workflow: + +```bash +# Try gh first, fall back to git + curl +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + echo "AUTH_METHOD=gh" +elif [ -n "$GITHUB_TOKEN" ]; then + echo "AUTH_METHOD=curl" +elif [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + echo "AUTH_METHOD=curl" +elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + export GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + echo "AUTH_METHOD=curl" +else + echo "AUTH_METHOD=none" + echo "Need to set up authentication first" +fi +``` + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| `git push` asks for password | GitHub disabled password auth. Use a personal access token as the password, or switch to SSH | +| `remote: Permission to X denied` | Token may lack `repo` scope — regenerate with correct scopes | +| `fatal: Authentication failed` | Cached credentials may be stale — run `git credential reject` then re-authenticate | +| `ssh: connect to host github.com port 22: Connection refused` | Try SSH over HTTPS port: add `Host github.com` with `Port 443` and `Hostname ssh.github.com` to `~/.ssh/config` | +| Credentials not persisting | Check `git config --global credential.helper` — must be `store` or `cache` | +| Multiple GitHub accounts | Use SSH with different keys per host alias in `~/.ssh/config`, or per-repo credential URLs | +| `gh: command not found` + no sudo | Use git-only Method 1 above — no installation needed | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-code-review.md b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md new file mode 100644 index 0000000000..9a18c45e16 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md @@ -0,0 +1,498 @@ +--- +title: "Github Code Review" +sidebar_label: "Github Code Review" +description: "Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Code Review + +Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-code-review` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Code-Review`, `Pull-Requests`, `Git`, `Quality` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Code Review + +Perform code reviews on local changes before pushing, or review open PRs on GitHub. Most of this skill uses plain `git` — the `gh`/`curl` split only matters for PR-level interactions. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository + +### Setup (for PR interactions) + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Reviewing Local Changes (Pre-Push) + +This is pure `git` — works everywhere, no API needed. + +### Get the Diff + +```bash +# Staged changes (what would be committed) +git diff --staged + +# All changes vs main (what a PR would contain) +git diff main...HEAD + +# File names only +git diff main...HEAD --name-only + +# Stat summary (insertions/deletions per file) +git diff main...HEAD --stat +``` + +### Review Strategy + +1. **Get the big picture first:** + +```bash +git diff main...HEAD --stat +git log main..HEAD --oneline +``` + +2. **Review file by file** — use `read_file` on changed files for full context, and the diff to see what changed: + +```bash +git diff main...HEAD -- src/auth/login.py +``` + +3. **Check for common issues:** + +```bash +# Debug statements, TODOs, console.logs left behind +git diff main...HEAD | grep -n "print(\|console\.log\|TODO\|FIXME\|HACK\|XXX\|debugger" + +# Large files accidentally staged +git diff main...HEAD --stat | sort -t'|' -k2 -rn | head -10 + +# Secrets or credential patterns +git diff main...HEAD | grep -in "password\|secret\|api_key\|token.*=\|private_key" + +# Merge conflict markers +git diff main...HEAD | grep -n "<<<<<<\|>>>>>>\|=======" +``` + +4. **Present structured feedback** to the user. + +### Review Output Format + +When reviewing local changes, present findings in this structure: + +``` +## Code Review Summary + +### Critical +- **src/auth.py:45** — SQL injection: user input passed directly to query. + Suggestion: Use parameterized queries. + +### Warnings +- **src/models/user.py:23** — Password stored in plaintext. Use bcrypt or argon2. +- **src/api/routes.py:112** — No rate limiting on login endpoint. + +### Suggestions +- **src/utils/helpers.py:8** — Duplicates logic in `src/core/utils.py:34`. Consolidate. +- **tests/test_auth.py** — Missing edge case: expired token test. + +### Looks Good +- Clean separation of concerns in the middleware layer +- Good test coverage for the happy path +``` + +--- + +## 2. Reviewing a Pull Request on GitHub + +### View PR Details + +**With gh:** + +```bash +gh pr view 123 +gh pr diff 123 +gh pr diff 123 --name-only +``` + +**With git + curl:** + +```bash +PR_NUMBER=123 + +# Get PR details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c " +import sys, json +pr = json.load(sys.stdin) +print(f\"Title: {pr['title']}\") +print(f\"Author: {pr['user']['login']}\") +print(f\"Branch: {pr['head']['ref']} -> {pr['base']['ref']}\") +print(f\"State: {pr['state']}\") +print(f\"Body:\n{pr['body']}\")" + +# List changed files +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/files \ + | python3 -c " +import sys, json +for f in json.load(sys.stdin): + print(f\"{f['status']:10} +{f['additions']:-4} -{f['deletions']:-4} {f['filename']}\")" +``` + +### Check Out PR Locally for Full Review + +This works with plain `git` — no `gh` needed: + +```bash +# Fetch the PR branch and check it out +git fetch origin pull/123/head:pr-123 +git checkout pr-123 + +# Now you can use read_file, search_files, run tests, etc. + +# View diff against the base branch +git diff main...pr-123 +``` + +**With gh (shortcut):** + +```bash +gh pr checkout 123 +``` + +### Leave Comments on a PR + +**General PR comment — with gh:** + +```bash +gh pr comment 123 --body "Overall looks good, a few suggestions below." +``` + +**General PR comment — with curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$PR_NUMBER/comments \ + -d '{"body": "Overall looks good, a few suggestions below."}' +``` + +### Leave Inline Review Comments + +**Single inline comment — with gh (via API):** + +```bash +HEAD_SHA=$(gh pr view 123 --json headRefOid --jq '.headRefOid') + +gh api repos/$OWNER/$REPO/pulls/123/comments \ + --method POST \ + -f body="This could be simplified with a list comprehension." \ + -f path="src/auth/login.py" \ + -f commit_id="$HEAD_SHA" \ + -f line=45 \ + -f side="RIGHT" +``` + +**Single inline comment — with curl:** + +```bash +# Get the head commit SHA +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/comments \ + -d "{ + \"body\": \"This could be simplified with a list comprehension.\", + \"path\": \"src/auth/login.py\", + \"commit_id\": \"$HEAD_SHA\", + \"line\": 45, + \"side\": \"RIGHT\" + }" +``` + +### Submit a Formal Review (Approve / Request Changes) + +**With gh:** + +```bash +gh pr review 123 --approve --body "LGTM!" +gh pr review 123 --request-changes --body "See inline comments." +gh pr review 123 --comment --body "Some suggestions, nothing blocking." +``` + +**With curl — multi-comment review submitted atomically:** + +```bash +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"COMMENT\", + \"body\": \"Code review from Hermes Agent\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"Use parameterized queries to prevent SQL injection.\"}, + {\"path\": \"src/models/user.py\", \"line\": 23, \"body\": \"Hash passwords with bcrypt before storing.\"}, + {\"path\": \"tests/test_auth.py\", \"line\": 1, \"body\": \"Add test for expired token edge case.\"} + ] + }" +``` + +Event values: `"APPROVE"`, `"REQUEST_CHANGES"`, `"COMMENT"` + +The `line` field refers to the line number in the *new* version of the file. For deleted lines, use `"side": "LEFT"`. + +--- + +## 3. Review Checklist + +When performing a code review (local or PR), systematically check: + +### Correctness +- Does the code do what it claims? +- Edge cases handled (empty inputs, nulls, large data, concurrent access)? +- Error paths handled gracefully? + +### Security +- No hardcoded secrets, credentials, or API keys +- Input validation on user-facing inputs +- No SQL injection, XSS, or path traversal +- Auth/authz checks where needed + +### Code Quality +- Clear naming (variables, functions, classes) +- No unnecessary complexity or premature abstraction +- DRY — no duplicated logic that should be extracted +- Functions are focused (single responsibility) + +### Testing +- New code paths tested? +- Happy path and error cases covered? +- Tests readable and maintainable? + +### Performance +- No N+1 queries or unnecessary loops +- Appropriate caching where beneficial +- No blocking operations in async code paths + +### Documentation +- Public APIs documented +- Non-obvious logic has comments explaining "why" +- README updated if behavior changed + +--- + +## 4. Pre-Push Review Workflow + +When the user asks you to "review the code" or "check before pushing": + +1. `git diff main...HEAD --stat` — see scope of changes +2. `git diff main...HEAD` — read the full diff +3. For each changed file, use `read_file` if you need more context +4. Apply the checklist above +5. Present findings in the structured format (Critical / Warnings / Suggestions / Looks Good) +6. If critical issues found, offer to fix them before the user pushes + +--- + +## 5. PR Review Workflow (End-to-End) + +When the user asks you to "review PR #N", "look at this PR", or gives you a PR URL, follow this recipe: + +### Step 1: Set up environment + +```bash +source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh" +# Or run the inline setup block from the top of this skill +``` + +### Step 2: Gather PR context + +Get the PR metadata, description, and list of changed files to understand scope before diving into code. + +**With gh:** +```bash +gh pr view 123 +gh pr diff 123 --name-only +gh pr checks 123 +``` + +**With curl:** +```bash +PR_NUMBER=123 + +# PR details (title, author, description, branch) +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER + +# Changed files with line counts +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/files +``` + +### Step 3: Check out the PR locally + +This gives you full access to `read_file`, `search_files`, and the ability to run tests. + +```bash +git fetch origin pull/$PR_NUMBER/head:pr-$PR_NUMBER +git checkout pr-$PR_NUMBER +``` + +### Step 4: Read the diff and understand changes + +```bash +# Full diff against the base branch +git diff main...HEAD + +# Or file-by-file for large PRs +git diff main...HEAD --name-only +# Then for each file: +git diff main...HEAD -- path/to/file.py +``` + +For each changed file, use `read_file` to see full context around the changes — diffs alone can miss issues visible only with surrounding code. + +### Step 5: Run automated checks locally (if applicable) + +```bash +# Run tests if there's a test suite +python -m pytest 2>&1 | tail -20 +# or: npm test, cargo test, go test ./..., etc. + +# Run linter if configured +ruff check . 2>&1 | head -30 +# or: eslint, clippy, etc. +``` + +### Step 6: Apply the review checklist (Section 3) + +Go through each category: Correctness, Security, Code Quality, Testing, Performance, Documentation. + +### Step 7: Post the review to GitHub + +Collect your findings and submit them as a formal review with inline comments. + +**With gh:** +```bash +# If no issues — approve +gh pr review $PR_NUMBER --approve --body "Reviewed by Hermes Agent. Code looks clean — good test coverage, no security concerns." + +# If issues found — request changes with inline comments +gh pr review $PR_NUMBER --request-changes --body "Found a few issues — see inline comments." +``` + +**With curl — atomic review with multiple inline comments:** +```bash +HEAD_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +# Build the review JSON — event is APPROVE, REQUEST_CHANGES, or COMMENT +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"REQUEST_CHANGES\", + \"body\": \"## Hermes Agent Review\n\nFound 2 issues, 1 suggestion. See inline comments.\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"🔴 **Critical:** User input passed directly to SQL query — use parameterized queries.\"}, + {\"path\": \"src/models.py\", \"line\": 23, \"body\": \"⚠️ **Warning:** Password stored without hashing.\"}, + {\"path\": \"src/utils.py\", \"line\": 8, \"body\": \"💡 **Suggestion:** This duplicates logic in core/utils.py:34.\"} + ] + }" +``` + +### Step 8: Also post a summary comment + +In addition to inline comments, leave a top-level summary so the PR author gets the full picture at a glance. Use the review output format from `references/review-output-template.md`. + +**With gh:** +```bash +gh pr comment $PR_NUMBER --body "$(cat <<'EOF' +## Code Review Summary + +**Verdict: Changes Requested** (2 issues, 1 suggestion) + +### 🔴 Critical +- **src/auth.py:45** — SQL injection vulnerability + +### ⚠️ Warnings +- **src/models.py:23** — Plaintext password storage + +### 💡 Suggestions +- **src/utils.py:8** — Duplicated logic, consider consolidating + +### ✅ Looks Good +- Clean API design +- Good error handling in the middleware layer + +--- +*Reviewed by Hermes Agent* +EOF +)" +``` + +### Step 9: Clean up + +```bash +git checkout main +git branch -D pr-$PR_NUMBER +``` + +### Decision: Approve vs Request Changes vs Comment + +- **Approve** — no critical or warning-level issues, only minor suggestions or all clear +- **Request Changes** — any critical or warning-level issue that should be fixed before merge +- **Comment** — observations and suggestions, but nothing blocking (use when you're unsure or the PR is a draft) diff --git a/website/docs/user-guide/skills/bundled/github/github-github-issues.md b/website/docs/user-guide/skills/bundled/github/github-github-issues.md new file mode 100644 index 0000000000..8493663cd5 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-issues.md @@ -0,0 +1,387 @@ +--- +title: "Github Issues — Create, manage, triage, and close GitHub issues" +sidebar_label: "Github Issues" +description: "Create, manage, triage, and close GitHub issues" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Issues + +Create, manage, triage, and close GitHub issues. Search existing issues, add labels, assign people, and link to PRs. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-issues` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Issues`, `Project-Management`, `Bug-Tracking`, `Triage` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Issues Management + +Create, search, triage, and manage GitHub issues. Each section shows `gh` first, then the `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repo with a GitHub remote, or specify the repo explicitly + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Viewing Issues + +**With gh:** + +```bash +gh issue list +gh issue list --state open --label "bug" +gh issue list --assignee @me +gh issue list --search "authentication error" --state all +gh issue view 42 +``` + +**With curl:** + +```bash +# List open issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: # GitHub API returns PRs in /issues too + labels = ', '.join(l['name'] for l in i['labels']) + print(f\"#{i['number']:5} {i['state']:6} {labels:30} {i['title']}\")" + +# Filter by label +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&labels=bug&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" + +# View a specific issue +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + | python3 -c " +import sys, json +i = json.load(sys.stdin) +labels = ', '.join(l['name'] for l in i['labels']) +assignees = ', '.join(a['login'] for a in i['assignees']) +print(f\"#{i['number']}: {i['title']}\") +print(f\"State: {i['state']} Labels: {labels} Assignees: {assignees}\") +print(f\"Author: {i['user']['login']} Created: {i['created_at']}\") +print(f\"\n{i['body']}\")" + +# Search issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/search/issues?q=authentication+error+repo:$OWNER/$REPO" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin)['items']: + print(f\"#{i['number']} {i['state']:6} {i['title']}\")" +``` + +## 2. Creating Issues + +**With gh:** + +```bash +gh issue create \ + --title "Login redirect ignores ?next= parameter" \ + --body "## Description +After logging in, users always land on /dashboard. + +## Steps to Reproduce +1. Navigate to /settings while logged out +2. Get redirected to /login?next=/settings +3. Log in +4. Actual: redirected to /dashboard (should go to /settings) + +## Expected Behavior +Respect the ?next= query parameter." \ + --label "bug,backend" \ + --assignee "username" +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues \ + -d '{ + "title": "Login redirect ignores ?next= parameter", + "body": "## Description\nAfter logging in, users always land on /dashboard.\n\n## Steps to Reproduce\n1. Navigate to /settings while logged out\n2. Get redirected to /login?next=/settings\n3. Log in\n4. Actual: redirected to /dashboard\n\n## Expected Behavior\nRespect the ?next= query parameter.", + "labels": ["bug", "backend"], + "assignees": ["username"] + }' +``` + +### Bug Report Template + +``` +## Bug Description + + +## Steps to Reproduce +1. +2. + +## Expected Behavior + + +## Actual Behavior + + +## Environment +- OS: +- Version: +``` + +### Feature Request Template + +``` +## Feature Description + + +## Motivation + + +## Proposed Solution + + +## Alternatives Considered + +``` + +## 3. Managing Issues + +### Add/Remove Labels + +**With gh:** + +```bash +gh issue edit 42 --add-label "priority:high,bug" +gh issue edit 42 --remove-label "needs-triage" +``` + +**With curl:** + +```bash +# Add labels +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels \ + -d '{"labels": ["priority:high", "bug"]}' + +# Remove a label +curl -s -X DELETE \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels/needs-triage + +# List available labels in the repo +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/labels \ + | python3 -c " +import sys, json +for l in json.load(sys.stdin): + print(f\" {l['name']:30} {l.get('description', '')}\")" +``` + +### Assignment + +**With gh:** + +```bash +gh issue edit 42 --add-assignee username +gh issue edit 42 --add-assignee @me +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/assignees \ + -d '{"assignees": ["username"]}' +``` + +### Commenting + +**With gh:** + +```bash +gh issue comment 42 --body "Investigated — root cause is in auth middleware. Working on a fix." +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/comments \ + -d '{"body": "Investigated — root cause is in auth middleware. Working on a fix."}' +``` + +### Closing and Reopening + +**With gh:** + +```bash +gh issue close 42 +gh issue close 42 --reason "not planned" +gh issue reopen 42 +``` + +**With curl:** + +```bash +# Close +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "closed", "state_reason": "completed"}' + +# Reopen +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "open"}' +``` + +### Linking Issues to PRs + +Issues are automatically closed when a PR merges with the right keywords in the body: + +``` +Closes #42 +Fixes #42 +Resolves #42 +``` + +To create a branch from an issue: + +**With gh:** + +```bash +gh issue develop 42 --checkout +``` + +**With git (manual equivalent):** + +```bash +git checkout main && git pull origin main +git checkout -b fix/issue-42-login-redirect +``` + +## 4. Issue Triage Workflow + +When asked to triage issues: + +1. **List untriaged issues:** + +```bash +# With gh +gh issue list --label "needs-triage" --state open + +# With curl +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=needs-triage&state=open" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" +``` + +2. **Read and categorize** each issue (view details, understand the bug/feature) + +3. **Apply labels and priority** (see Managing Issues above) + +4. **Assign** if the owner is clear + +5. **Comment with triage notes** if needed + +## 5. Bulk Operations + +For batch operations, combine API calls with shell scripting: + +**With gh:** + +```bash +# Close all issues with a specific label +gh issue list --label "wontfix" --json number --jq '.[].number' | \ + xargs -I {} gh issue close {} --reason "not planned" +``` + +**With curl:** + +```bash +# List issue numbers with a label, then close each +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=wontfix&state=open" \ + | python3 -c "import sys,json; [print(i['number']) for i in json.load(sys.stdin)]" \ + | while read num; do + curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$num \ + -d '{"state": "closed", "state_reason": "not_planned"}' + echo "Closed #$num" + done +``` + +## Quick Reference Table + +| Action | gh | curl endpoint | +|--------|-----|--------------| +| List issues | `gh issue list` | `GET /repos/{o}/{r}/issues` | +| View issue | `gh issue view N` | `GET /repos/{o}/{r}/issues/N` | +| Create issue | `gh issue create ...` | `POST /repos/{o}/{r}/issues` | +| Add labels | `gh issue edit N --add-label ...` | `POST /repos/{o}/{r}/issues/N/labels` | +| Assign | `gh issue edit N --add-assignee ...` | `POST /repos/{o}/{r}/issues/N/assignees` | +| Comment | `gh issue comment N --body ...` | `POST /repos/{o}/{r}/issues/N/comments` | +| Close | `gh issue close N` | `PATCH /repos/{o}/{r}/issues/N` | +| Search | `gh issue list --search "..."` | `GET /search/issues?q=...` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md new file mode 100644 index 0000000000..f1a31e1572 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md @@ -0,0 +1,384 @@ +--- +title: "Github Pr Workflow" +sidebar_label: "Github Pr Workflow" +description: "Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Pr Workflow + +Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-pr-workflow` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Pull-Requests`, `CI/CD`, `Git`, `Automation`, `Merge` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Pull Request Workflow + +Complete guide for managing the PR lifecycle. Each section shows the `gh` way first, then the `git` + `curl` fallback for machines without `gh`. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository with a GitHub remote + +### Quick Auth Detection + +```bash +# Determine which method to use throughout this workflow +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + # Ensure we have a token for API calls + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi +echo "Using: $AUTH" +``` + +### Extracting Owner/Repo from the Git Remote + +Many `curl` commands need `owner/repo`. Extract it from the git remote: + +```bash +# Works for both HTTPS and SSH remote URLs +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +echo "Owner: $OWNER, Repo: $REPO" +``` + +--- + +## 1. Branch Creation + +This part is pure `git` — identical either way: + +```bash +# Make sure you're up to date +git fetch origin +git checkout main && git pull origin main + +# Create and switch to a new branch +git checkout -b feat/add-user-authentication +``` + +Branch naming conventions: +- `feat/description` — new features +- `fix/description` — bug fixes +- `refactor/description` — code restructuring +- `docs/description` — documentation +- `ci/description` — CI/CD changes + +## 2. Making Commits + +Use the agent's file tools (`write_file`, `patch`) to make changes, then commit: + +```bash +# Stage specific files +git add src/auth.py src/models/user.py tests/test_auth.py + +# Commit with a conventional commit message +git commit -m "feat: add JWT-based user authentication + +- Add login/register endpoints +- Add User model with password hashing +- Add auth middleware for protected routes +- Add unit tests for auth flow" +``` + +Commit message format (Conventional Commits): +``` +type(scope): short description + +Longer explanation if needed. Wrap at 72 characters. +``` + +Types: `feat`, `fix`, `refactor`, `docs`, `test`, `ci`, `chore`, `perf` + +## 3. Pushing and Creating a PR + +### Push the Branch (same either way) + +```bash +git push -u origin HEAD +``` + +### Create the PR + +**With gh:** + +```bash +gh pr create \ + --title "feat: add JWT-based user authentication" \ + --body "## Summary +- Adds login and register API endpoints +- JWT token generation and validation + +## Test Plan +- [ ] Unit tests pass + +Closes #42" +``` + +Options: `--draft`, `--reviewer user1,user2`, `--label "enhancement"`, `--base develop` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$OWNER/$REPO/pulls \ + -d "{ + \"title\": \"feat: add JWT-based user authentication\", + \"body\": \"## Summary\nAdds login and register API endpoints.\n\nCloses #42\", + \"head\": \"$BRANCH\", + \"base\": \"main\" + }" +``` + +The response JSON includes the PR `number` — save it for later commands. + +To create as a draft, add `"draft": true` to the JSON body. + +## 4. Monitoring CI Status + +### Check CI Status + +**With gh:** + +```bash +# One-shot check +gh pr checks + +# Watch until all checks finish (polls every 10s) +gh pr checks --watch +``` + +**With git + curl:** + +```bash +# Get the latest commit SHA on the current branch +SHA=$(git rev-parse HEAD) + +# Query the combined status +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +print(f\"Overall: {data['state']}\") +for s in data.get('statuses', []): + print(f\" {s['context']}: {s['state']} - {s.get('description', '')}\")" + +# Also check GitHub Actions check runs (separate endpoint) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/check-runs \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +for cr in data.get('check_runs', []): + print(f\" {cr['name']}: {cr['status']} / {cr['conclusion'] or 'pending'}\")" +``` + +### Poll Until Complete (git + curl) + +```bash +# Simple polling loop — check every 30 seconds, up to 10 minutes +SHA=$(git rev-parse HEAD) +for i in $(seq 1 20); do + STATUS=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])") + echo "Check $i: $STATUS" + if [ "$STATUS" = "success" ] || [ "$STATUS" = "failure" ] || [ "$STATUS" = "error" ]; then + break + fi + sleep 30 +done +``` + +## 5. Auto-Fixing CI Failures + +When CI fails, diagnose and fix. This loop works with either auth method. + +### Step 1: Get Failure Details + +**With gh:** + +```bash +# List recent workflow runs on this branch +gh run list --branch $(git branch --show-current) --limit 5 + +# View failed logs +gh run view --log-failed +``` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +# List workflow runs on this branch +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?branch=$BRANCH&per_page=5" \ + | python3 -c " +import sys, json +runs = json.load(sys.stdin)['workflow_runs'] +for r in runs: + print(f\"Run {r['id']}: {r['name']} - {r['conclusion'] or r['status']}\")" + +# Get failed job logs (download as zip, extract, read) +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs && cat ci-logs/*.txt +``` + +### Step 2: Fix and Push + +After identifying the issue, use file tools (`patch`, `write_file`) to fix it: + +```bash +git add +git commit -m "fix: resolve CI failure in " +git push +``` + +### Step 3: Verify + +Re-check CI status using the commands from Section 4 above. + +### Auto-Fix Loop Pattern + +When asked to auto-fix CI, follow this loop: + +1. Check CI status → identify failures +2. Read failure logs → understand the error +3. Use `read_file` + `patch`/`write_file` → fix the code +4. `git add . && git commit -m "fix: ..." && git push` +5. Wait for CI → re-check status +6. Repeat if still failing (up to 3 attempts, then ask the user) + +## 6. Merging + +**With gh:** + +```bash +# Squash merge + delete branch (cleanest for feature branches) +gh pr merge --squash --delete-branch + +# Enable auto-merge (merges when all checks pass) +gh pr merge --auto --squash --delete-branch +``` + +**With git + curl:** + +```bash +PR_NUMBER= + +# Merge the PR via API (squash) +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/merge \ + -d "{ + \"merge_method\": \"squash\", + \"commit_title\": \"feat: add user authentication (#$PR_NUMBER)\" + }" + +# Delete the remote branch after merge +BRANCH=$(git branch --show-current) +git push origin --delete $BRANCH + +# Switch back to main locally +git checkout main && git pull origin main +git branch -d $BRANCH +``` + +Merge methods: `"merge"` (merge commit), `"squash"`, `"rebase"` + +### Enable Auto-Merge (curl) + +```bash +# Auto-merge requires the repo to have it enabled in settings. +# This uses the GraphQL API since REST doesn't support auto-merge. +PR_NODE_ID=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['node_id'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/graphql \ + -d "{\"query\": \"mutation { enablePullRequestAutoMerge(input: {pullRequestId: \\\"$PR_NODE_ID\\\", mergeMethod: SQUASH}) { clientMutationId } }\"}" +``` + +## 7. Complete Workflow Example + +```bash +# 1. Start from clean main +git checkout main && git pull origin main + +# 2. Branch +git checkout -b fix/login-redirect-bug + +# 3. (Agent makes code changes with file tools) + +# 4. Commit +git add src/auth/login.py tests/test_login.py +git commit -m "fix: correct redirect URL after login + +Preserves the ?next= parameter instead of always redirecting to /dashboard." + +# 5. Push +git push -u origin HEAD + +# 6. Create PR (picks gh or curl based on what's available) +# ... (see Section 3) + +# 7. Monitor CI (see Section 4) + +# 8. Merge when green (see Section 6) +``` + +## Useful PR Commands Reference + +| Action | gh | git + curl | +|--------|-----|-----------| +| List my PRs | `gh pr list --author @me` | `curl -s -H "Authorization: token $GITHUB_TOKEN" "https://api.github.com/repos/$OWNER/$REPO/pulls?state=open"` | +| View PR diff | `gh pr diff` | `git diff main...HEAD` (local) or `curl -H "Accept: application/vnd.github.diff" ...` | +| Add comment | `gh pr comment N --body "..."` | `curl -X POST .../issues/N/comments -d '{"body":"..."}'` | +| Request review | `gh pr edit N --add-reviewer user` | `curl -X POST .../pulls/N/requested_reviewers -d '{"reviewers":["user"]}'` | +| Close PR | `gh pr close N` | `curl -X PATCH .../pulls/N -d '{"state":"closed"}'` | +| Check out someone's PR | `gh pr checkout N` | `git fetch origin pull/N/head:pr-N && git checkout pr-N` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md new file mode 100644 index 0000000000..8392250346 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md @@ -0,0 +1,533 @@ +--- +title: "Github Repo Management — Clone, create, fork, configure, and manage GitHub repositories" +sidebar_label: "Github Repo Management" +description: "Clone, create, fork, configure, and manage GitHub repositories" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Repo Management + +Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-repo-management` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Repositories`, `Git`, `Releases`, `Secrets`, `Configuration` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Repository Management + +Create, clone, fork, configure, and manage GitHub repositories. Each section shows `gh` first, then the `git` + `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +# Get your GitHub username (needed for several operations) +if [ "$AUTH" = "gh" ]; then + GH_USER=$(gh api user --jq '.login') +else + GH_USER=$(curl -s -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/user | python3 -c "import sys,json; print(json.load(sys.stdin)['login'])") +fi +``` + +If you're inside a repo already: + +```bash +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Cloning Repositories + +Cloning is pure `git` — works identically either way: + +```bash +# Clone via HTTPS (works with credential helper or token-embedded URL) +git clone https://github.com/owner/repo-name.git + +# Clone into a specific directory +git clone https://github.com/owner/repo-name.git ./my-local-dir + +# Shallow clone (faster for large repos) +git clone --depth 1 https://github.com/owner/repo-name.git + +# Clone a specific branch +git clone --branch develop https://github.com/owner/repo-name.git + +# Clone via SSH (if SSH is configured) +git clone git@github.com:owner/repo-name.git +``` + +**With gh (shorthand):** + +```bash +gh repo clone owner/repo-name +gh repo clone owner/repo-name -- --depth 1 +``` + +## 2. Creating Repositories + +**With gh:** + +```bash +# Create a public repo and clone it +gh repo create my-new-project --public --clone + +# Private, with description and license +gh repo create my-new-project --private --description "A useful tool" --license MIT --clone + +# Under an organization +gh repo create my-org/my-new-project --public --clone + +# From existing local directory +cd /path/to/existing/project +gh repo create my-project --source . --public --push +``` + +**With git + curl:** + +```bash +# Create the remote repo via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user/repos \ + -d '{ + "name": "my-new-project", + "description": "A useful tool", + "private": false, + "auto_init": true, + "license_template": "mit" + }' + +# Clone it +git clone https://github.com/$GH_USER/my-new-project.git +cd my-new-project + +# -- OR -- push an existing local directory to the new repo +cd /path/to/existing/project +git init +git add . +git commit -m "Initial commit" +git remote add origin https://github.com/$GH_USER/my-new-project.git +git push -u origin main +``` + +To create under an organization: + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/orgs/my-org/repos \ + -d '{"name": "my-new-project", "private": false}' +``` + +### From a Template + +**With gh:** + +```bash +gh repo create my-new-app --template owner/template-repo --public --clone +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/template-repo/generate \ + -d '{"owner": "'"$GH_USER"'", "name": "my-new-app", "private": false}' +``` + +## 3. Forking Repositories + +**With gh:** + +```bash +gh repo fork owner/repo-name --clone +``` + +**With git + curl:** + +```bash +# Create the fork via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/repo-name/forks + +# Wait a moment for GitHub to create it, then clone +sleep 3 +git clone https://github.com/$GH_USER/repo-name.git +cd repo-name + +# Add the original repo as "upstream" remote +git remote add upstream https://github.com/owner/repo-name.git +``` + +### Keeping a Fork in Sync + +```bash +# Pure git — works everywhere +git fetch upstream +git checkout main +git merge upstream/main +git push origin main +``` + +**With gh (shortcut):** + +```bash +gh repo sync $GH_USER/repo-name +``` + +## 4. Repository Information + +**With gh:** + +```bash +gh repo view owner/repo-name +gh repo list --limit 20 +gh search repos "machine learning" --language python --sort stars +``` + +**With curl:** + +```bash +# View repo details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + | python3 -c " +import sys, json +r = json.load(sys.stdin) +print(f\"Name: {r['full_name']}\") +print(f\"Description: {r['description']}\") +print(f\"Stars: {r['stargazers_count']} Forks: {r['forks_count']}\") +print(f\"Default branch: {r['default_branch']}\") +print(f\"Language: {r['language']}\")" + +# List your repos +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/user/repos?per_page=20&sort=updated" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + vis = 'private' if r['private'] else 'public' + print(f\" {r['full_name']:40} {vis:8} {r.get('language', ''):10} ★{r['stargazers_count']}\")" + +# Search repos +curl -s \ + "https://api.github.com/search/repositories?q=machine+learning+language:python&sort=stars&per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['items']: + print(f\" {r['full_name']:40} ★{r['stargazers_count']:6} {r['description'][:60] if r['description'] else ''}\")" +``` + +## 5. Repository Settings + +**With gh:** + +```bash +gh repo edit --description "Updated description" --visibility public +gh repo edit --enable-wiki=false --enable-issues=true +gh repo edit --default-branch main +gh repo edit --add-topic "machine-learning,python" +gh repo edit --enable-auto-merge +``` + +**With curl:** + +```bash +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + -d '{ + "description": "Updated description", + "has_wiki": false, + "has_issues": true, + "allow_auto_merge": true + }' + +# Update topics +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.mercy-preview+json" \ + https://api.github.com/repos/$OWNER/$REPO/topics \ + -d '{"names": ["machine-learning", "python", "automation"]}' +``` + +## 6. Branch Protection + +```bash +# View current protection +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection + +# Set up branch protection +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection \ + -d '{ + "required_status_checks": { + "strict": true, + "contexts": ["ci/test", "ci/lint"] + }, + "enforce_admins": false, + "required_pull_request_reviews": { + "required_approving_review_count": 1 + }, + "restrictions": null + }' +``` + +## 7. Secrets Management (GitHub Actions) + +**With gh:** + +```bash +gh secret set API_KEY --body "your-secret-value" +gh secret set SSH_KEY < ~/.ssh/id_rsa +gh secret list +gh secret delete API_KEY +``` + +**With curl:** + +Secrets require encryption with the repo's public key — more involved via API: + +```bash +# Get the repo's public key for encrypting secrets +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/public-key + +# Encrypt and set (requires Python with PyNaCl) +python3 -c " +from base64 import b64encode +from nacl import encoding, public +import json, sys + +# Get the public key +key_id = '' +public_key = '' + +# Encrypt +sealed = public.SealedBox( + public.PublicKey(public_key.encode('utf-8'), encoding.Base64Encoder) +).encrypt('your-secret-value'.encode('utf-8')) +print(json.dumps({ + 'encrypted_value': b64encode(sealed).decode('utf-8'), + 'key_id': key_id +}))" + +# Then PUT the encrypted secret +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/API_KEY \ + -d '' + +# List secrets (names only, values hidden) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets \ + | python3 -c " +import sys, json +for s in json.load(sys.stdin)['secrets']: + print(f\" {s['name']:30} updated: {s['updated_at']}\")" +``` + +Note: For secrets, `gh secret set` is dramatically simpler. If setting secrets is needed and `gh` isn't available, recommend installing it for just that operation. + +## 8. Releases + +**With gh:** + +```bash +gh release create v1.0.0 --title "v1.0.0" --generate-notes +gh release create v2.0.0-rc1 --draft --prerelease --generate-notes +gh release create v1.0.0 ./dist/binary --title "v1.0.0" --notes "Release notes" +gh release list +gh release download v1.0.0 --dir ./downloads +``` + +**With curl:** + +```bash +# Create a release +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + -d '{ + "tag_name": "v1.0.0", + "name": "v1.0.0", + "body": "## Changelog\n- Feature A\n- Bug fix B", + "draft": false, + "prerelease": false, + "generate_release_notes": true + }' + +# List releases +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + tag = r.get('tag_name', 'no tag') + print(f\" {tag:15} {r['name']:30} {'draft' if r['draft'] else 'published'}\")" + +# Upload a release asset (binary file) +RELEASE_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Content-Type: application/octet-stream" \ + "https://uploads.github.com/repos/$OWNER/$REPO/releases/$RELEASE_ID/assets?name=binary-amd64" \ + --data-binary @./dist/binary-amd64 +``` + +## 9. GitHub Actions Workflows + +**With gh:** + +```bash +gh workflow list +gh run list --limit 10 +gh run view +gh run view --log-failed +gh run rerun +gh run rerun --failed +gh workflow run ci.yml --ref main +gh workflow run deploy.yml -f environment=staging +``` + +**With curl:** + +```bash +# List workflows +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows \ + | python3 -c " +import sys, json +for w in json.load(sys.stdin)['workflows']: + print(f\" {w['id']:10} {w['name']:30} {w['state']}\")" + +# List recent runs +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['workflow_runs']: + print(f\" Run {r['id']} {r['name']:30} {r['conclusion'] or r['status']}\")" + +# Download failed run logs +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs + +# Re-run a failed workflow +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun + +# Re-run only failed jobs +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun-failed-jobs + +# Trigger a workflow manually (workflow_dispatch) +WORKFLOW_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows/$WORKFLOW_ID/dispatches \ + -d '{"ref": "main", "inputs": {"environment": "staging"}}' +``` + +## 10. Gists + +**With gh:** + +```bash +gh gist create script.py --public --desc "Useful script" +gh gist list +``` + +**With curl:** + +```bash +# Create a gist +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + -d '{ + "description": "Useful script", + "public": true, + "files": { + "script.py": {"content": "print(\"hello\")"} + } + }' + +# List your gists +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + | python3 -c " +import sys, json +for g in json.load(sys.stdin): + files = ', '.join(g['files'].keys()) + print(f\" {g['id']} {g['description'] or '(no desc)':40} {files}\")" +``` + +## Quick Reference Table + +| Action | gh | git + curl | +|--------|-----|-----------| +| Clone | `gh repo clone o/r` | `git clone https://github.com/o/r.git` | +| Create repo | `gh repo create name --public` | `curl POST /user/repos` | +| Fork | `gh repo fork o/r --clone` | `curl POST /repos/o/r/forks` + `git clone` | +| Repo info | `gh repo view o/r` | `curl GET /repos/o/r` | +| Edit settings | `gh repo edit --...` | `curl PATCH /repos/o/r` | +| Create release | `gh release create v1.0` | `curl POST /repos/o/r/releases` | +| List workflows | `gh workflow list` | `curl GET /repos/o/r/actions/workflows` | +| Rerun CI | `gh run rerun ID` | `curl POST /repos/o/r/actions/runs/ID/rerun` | +| Set secret | `gh secret set KEY` | `curl PUT /repos/o/r/actions/secrets/KEY` (+ encryption) | diff --git a/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md new file mode 100644 index 0000000000..267c8c064c --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md @@ -0,0 +1,374 @@ +--- +title: "Native Mcp" +sidebar_label: "Native Mcp" +description: "Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Native Mcp + +Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mcp/native-mcp` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `MCP`, `Tools`, `Integrations` | +| Related skills | [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Native MCP Client + +Hermes Agent has a built-in MCP client that connects to MCP servers at startup, discovers their tools, and makes them available as first-class tools the agent can call directly. No bridge CLI needed -- tools from MCP servers appear alongside built-in tools like `terminal`, `read_file`, etc. + +## When to Use + +Use this whenever you want to: +- Connect to MCP servers and use their tools from within Hermes Agent +- Add external capabilities (filesystem access, GitHub, databases, APIs) via MCP +- Run local stdio-based MCP servers (npx, uvx, or any command) +- Connect to remote HTTP/StreamableHTTP MCP servers +- Have MCP tools auto-discovered and available in every conversation + +For ad-hoc, one-off MCP tool calls from the terminal without configuring anything, see the `mcporter` skill instead. + +## Prerequisites + +- **mcp Python package** -- optional dependency; install with `pip install mcp`. If not installed, MCP support is silently disabled. +- **Node.js** -- required for `npx`-based MCP servers (most community servers) +- **uv** -- required for `uvx`-based MCP servers (Python-based servers) + +Install the MCP SDK: + +```bash +pip install mcp +# or, if using uv: +uv pip install mcp +``` + +## Quick Start + +Add MCP servers to `~/.hermes/config.yaml` under the `mcp_servers` key: + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Restart Hermes Agent. On startup it will: +1. Connect to the server +2. Discover available tools +3. Register them with the prefix `mcp_time_*` +4. Inject them into all platform toolsets + +You can then use the tools naturally -- just ask the agent to get the current time. + +## Configuration Reference + +Each entry under `mcp_servers` is a server name mapped to its config. There are two transport types: **stdio** (command-based) and **HTTP** (url-based). + +### Stdio Transport (command + args) + +```yaml +mcp_servers: + server_name: + command: "npx" # (required) executable to run + args: ["-y", "pkg-name"] # (optional) command arguments, default: [] + env: # (optional) environment variables for the subprocess + SOME_API_KEY: "value" + timeout: 120 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### HTTP Transport (url) + +```yaml +mcp_servers: + server_name: + url: "https://my-server.example.com/mcp" # (required) server URL + headers: # (optional) HTTP headers + Authorization: "Bearer sk-..." + timeout: 180 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### All Config Options + +| Option | Type | Default | Description | +|-------------------|--------|---------|---------------------------------------------------| +| `command` | string | -- | Executable to run (stdio transport, required) | +| `args` | list | `[]` | Arguments passed to the command | +| `env` | dict | `{}` | Extra environment variables for the subprocess | +| `url` | string | -- | Server URL (HTTP transport, required) | +| `headers` | dict | `{}` | HTTP headers sent with every request | +| `timeout` | int | `120` | Per-tool-call timeout in seconds | +| `connect_timeout` | int | `60` | Timeout for initial connection and discovery | + +Note: A server config must have either `command` (stdio) or `url` (HTTP), not both. + +## How It Works + +### Startup Discovery + +When Hermes Agent starts, `discover_mcp_tools()` is called during tool initialization: + +1. Reads `mcp_servers` from `~/.hermes/config.yaml` +2. For each server, spawns a connection in a dedicated background event loop +3. Initializes the MCP session and calls `list_tools()` to discover available tools +4. Registers each tool in the Hermes tool registry + +### Tool Naming Convention + +MCP tools are registered with the naming pattern: + +``` +mcp_{server_name}_{tool_name} +``` + +Hyphens and dots in names are replaced with underscores for LLM API compatibility. + +Examples: +- Server `filesystem`, tool `read_file` → `mcp_filesystem_read_file` +- Server `github`, tool `list-issues` → `mcp_github_list_issues` +- Server `my-api`, tool `fetch.data` → `mcp_my_api_fetch_data` + +### Auto-Injection + +After discovery, MCP tools are automatically injected into all `hermes-*` platform toolsets (CLI, Discord, Telegram, etc.). This means MCP tools are available in every conversation without any additional configuration. + +### Connection Lifecycle + +- Each server runs as a long-lived asyncio Task in a background daemon thread +- Connections persist for the lifetime of the agent process +- If a connection drops, automatic reconnection with exponential backoff kicks in (up to 5 retries, max 60s backoff) +- On agent shutdown, all connections are gracefully closed + +### Idempotency + +`discover_mcp_tools()` is idempotent -- calling it multiple times only connects to servers that aren't already connected. Failed servers are retried on subsequent calls. + +## Transport Types + +### Stdio Transport + +The most common transport. Hermes launches the MCP server as a subprocess and communicates over stdin/stdout. + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/projects"] +``` + +The subprocess inherits a **filtered** environment (see Security section below) plus any variables you specify in `env`. + +### HTTP / StreamableHTTP Transport + +For remote or shared MCP servers. Requires the `mcp` package to include HTTP client support (`mcp.client.streamable_http`). + +```yaml +mcp_servers: + remote_api: + url: "https://mcp.example.com/mcp" + headers: + Authorization: "Bearer sk-..." +``` + +If HTTP support is not available in your installed `mcp` version, the server will fail with an ImportError and other servers will continue normally. + +## Security + +### Environment Variable Filtering + +For stdio servers, Hermes does NOT pass your full shell environment to MCP subprocesses. Only safe baseline variables are inherited: + +- `PATH`, `HOME`, `USER`, `LANG`, `LC_ALL`, `TERM`, `SHELL`, `TMPDIR` +- Any `XDG_*` variables + +All other environment variables (API keys, tokens, secrets) are excluded unless you explicitly add them via the `env` config key. This prevents accidental credential leakage to untrusted MCP servers. + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + # Only this token is passed to the subprocess + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..." +``` + +### Credential Stripping in Error Messages + +If an MCP tool call fails, any credential-like patterns in the error message are automatically redacted before being shown to the LLM. This covers: + +- GitHub PATs (`ghp_...`) +- OpenAI-style keys (`sk-...`) +- Bearer tokens +- Generic `token=`, `key=`, `API_KEY=`, `password=`, `secret=` patterns + +## Troubleshooting + +### "MCP SDK not available -- skipping MCP tool discovery" + +The `mcp` Python package is not installed. Install it: + +```bash +pip install mcp +``` + +### "No MCP servers configured" + +No `mcp_servers` key in `~/.hermes/config.yaml`, or it's empty. Add at least one server. + +### "Failed to connect to MCP server 'X'" + +Common causes: +- **Command not found**: The `command` binary isn't on PATH. Ensure `npx`, `uvx`, or the relevant command is installed. +- **Package not found**: For npx servers, the npm package may not exist or may need `-y` in args to auto-install. +- **Timeout**: The server took too long to start. Increase `connect_timeout`. +- **Port conflict**: For HTTP servers, the URL may be unreachable. + +### "MCP server 'X' requires HTTP transport but mcp.client.streamable_http is not available" + +Your `mcp` package version doesn't include HTTP client support. Upgrade: + +```bash +pip install --upgrade mcp +``` + +### Tools not appearing + +- Check that the server is listed under `mcp_servers` (not `mcp` or `servers`) +- Ensure the YAML indentation is correct +- Look at Hermes Agent startup logs for connection messages +- Tool names are prefixed with `mcp_{server}_{tool}` -- look for that pattern + +### Connection keeps dropping + +The client retries up to 5 times with exponential backoff (1s, 2s, 4s, 8s, 16s, capped at 60s). If the server is fundamentally unreachable, it gives up after 5 attempts. Check the server process and network connectivity. + +## Examples + +### Time Server (uvx) + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Registers tools like `mcp_time_get_current_time`. + +### Filesystem Server (npx) + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/documents"] + timeout: 30 +``` + +Registers tools like `mcp_filesystem_read_file`, `mcp_filesystem_write_file`, `mcp_filesystem_list_directory`. + +### GitHub Server with Authentication + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + timeout: 60 +``` + +Registers tools like `mcp_github_list_issues`, `mcp_github_create_pull_request`, etc. + +### Remote HTTP Server + +```yaml +mcp_servers: + company_api: + url: "https://mcp.mycompany.com/v1/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + X-Team-Id: "engineering" + timeout: 180 + connect_timeout: 30 +``` + +### Multiple Servers + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] + + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] + + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + + company_api: + url: "https://mcp.internal.company.com/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + timeout: 300 +``` + +All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions. + +## Sampling (Server-Initiated LLM Requests) + +Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making). + +Sampling is **enabled by default**. Configure per server: + +```yaml +mcp_servers: + my_server: + command: "npx" + args: ["-y", "my-mcp-server"] + sampling: + enabled: true # default: true + model: "gemini-3-flash" # model override (optional) + max_tokens_cap: 4096 # max tokens per request + timeout: 30 # LLM call timeout (seconds) + max_rpm: 10 # max requests per minute + allowed_models: [] # model whitelist (empty = all) + max_tool_rounds: 5 # tool loop limit (0 = disable) + log_level: "info" # audit verbosity +``` + +Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`. + +Disable sampling for untrusted servers with `sampling: { enabled: false }`. + +## Notes + +- MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop +- Tool results are returned as JSON with either `{"result": "..."}` or `{"error": "..."}` +- The native MCP client is independent of `mcporter` -- you can use both simultaneously +- Server connections are persistent and shared across all conversations in the same agent process +- Adding or removing servers requires restarting the agent (no hot-reload currently) diff --git a/website/docs/user-guide/skills/bundled/media/media-gif-search.md b/website/docs/user-guide/skills/bundled/media/media-gif-search.md new file mode 100644 index 0000000000..67b56645db --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-gif-search.md @@ -0,0 +1,101 @@ +--- +title: "Gif Search — Search and download GIFs from Tenor using curl" +sidebar_label: "Gif Search" +description: "Search and download GIFs from Tenor using curl" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Gif Search + +Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/gif-search` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GIF`, `Media`, `Search`, `Tenor`, `API` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GIF Search (Tenor API) + +Search and download GIFs directly via the Tenor API using curl. No extra tools needed. + +## Setup + +Set your Tenor API key in your environment (add to `~/.hermes/.env`): + +```bash +TENOR_API_KEY=your_key_here +``` + +Get a free API key at https://developers.google.com/tenor/guides/quickstart — the Google Cloud Console Tenor API key is free and has generous rate limits. + +## Prerequisites + +- `curl` and `jq` (both standard on macOS/Linux) +- `TENOR_API_KEY` environment variable + +## Search for GIFs + +```bash +# Search and get GIF URLs +curl -s "https://tenor.googleapis.com/v2/search?q=thumbs+up&limit=5&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.gif.url' + +# Get smaller/preview versions +curl -s "https://tenor.googleapis.com/v2/search?q=nice+work&limit=3&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.tinygif.url' +``` + +## Download a GIF + +```bash +# Search and download the top result +URL=$(curl -s "https://tenor.googleapis.com/v2/search?q=celebration&limit=1&key=${TENOR_API_KEY}" | jq -r '.results[0].media_formats.gif.url') +curl -sL "$URL" -o celebration.gif +``` + +## Get Full Metadata + +```bash +curl -s "https://tenor.googleapis.com/v2/search?q=cat&limit=3&key=${TENOR_API_KEY}" | jq '.results[] | {title: .title, url: .media_formats.gif.url, preview: .media_formats.tinygif.url, dimensions: .media_formats.gif.dims}' +``` + +## API Parameters + +| Parameter | Description | +|-----------|-------------| +| `q` | Search query (URL-encode spaces as `+`) | +| `limit` | Max results (1-50, default 20) | +| `key` | API key (from `$TENOR_API_KEY` env var) | +| `media_filter` | Filter formats: `gif`, `tinygif`, `mp4`, `tinymp4`, `webm` | +| `contentfilter` | Safety: `off`, `low`, `medium`, `high` | +| `locale` | Language: `en_US`, `es`, `fr`, etc. | + +## Available Media Formats + +Each result has multiple formats under `.media_formats`: + +| Format | Use case | +|--------|----------| +| `gif` | Full quality GIF | +| `tinygif` | Small preview GIF | +| `mp4` | Video version (smaller file size) | +| `tinymp4` | Small preview video | +| `webm` | WebM video | +| `nanogif` | Tiny thumbnail | + +## Notes + +- URL-encode the query: spaces as `+`, special chars as `%XX` +- For sending in chat, `tinygif` URLs are lighter weight +- GIF URLs can be used directly in markdown: `![alt](https://github.com/NousResearch/hermes-agent/blob/main/skills/media/gif-search/url)` diff --git a/website/docs/user-guide/skills/bundled/media/media-heartmula.md b/website/docs/user-guide/skills/bundled/media/media-heartmula.md new file mode 100644 index 0000000000..85dae5e867 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-heartmula.md @@ -0,0 +1,188 @@ +--- +title: "Heartmula — Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +sidebar_label: "Heartmula" +description: "Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Heartmula + +Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/heartmula` | +| Version | `1.0.0` | +| Tags | `music`, `audio`, `generation`, `ai`, `heartmula`, `heartcodec`, `lyrics`, `songs` | +| Related skills | `audiocraft` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# HeartMuLa - Open-Source Music Generation + +## Overview +HeartMuLa is a family of open-source music foundation models (Apache-2.0) that generates music conditioned on lyrics and tags. Comparable to Suno for open-source. Includes: +- **HeartMuLa** - Music language model (3B/7B) for generation from lyrics + tags +- **HeartCodec** - 12.5Hz music codec for high-fidelity audio reconstruction +- **HeartTranscriptor** - Whisper-based lyrics transcription +- **HeartCLAP** - Audio-text alignment model + +## When to Use +- User wants to generate music/songs from text descriptions +- User wants an open-source Suno alternative +- User wants local/offline music generation +- User asks about HeartMuLa, heartlib, or AI music generation + +## Hardware Requirements +- **Minimum**: 8GB VRAM with `--lazy_load true` (loads/unloads models sequentially) +- **Recommended**: 16GB+ VRAM for comfortable single-GPU usage +- **Multi-GPU**: Use `--mula_device cuda:0 --codec_device cuda:1` to split across GPUs +- 3B model with lazy_load peaks at ~6.2GB VRAM + +## Installation Steps + +### 1. Clone Repository +```bash +cd ~/ # or desired directory +git clone https://github.com/HeartMuLa/heartlib.git +cd heartlib +``` + +### 2. Create Virtual Environment (Python 3.10 required) +```bash +uv venv --python 3.10 .venv +. .venv/bin/activate +uv pip install -e . +``` + +### 3. Fix Dependency Compatibility Issues + +**IMPORTANT**: As of Feb 2026, the pinned dependencies have conflicts with newer packages. Apply these fixes: + +```bash +# Upgrade datasets (old version incompatible with current pyarrow) +uv pip install --upgrade datasets + +# Upgrade transformers (needed for huggingface-hub 1.x compatibility) +uv pip install --upgrade transformers +``` + +### 4. Patch Source Code (Required for transformers 5.x) + +**Patch 1 - RoPE cache fix** in `src/heartlib/heartmula/modeling_heartmula.py`: + +In the `setup_caches` method of the `HeartMuLa` class, add RoPE reinitialization after the `reset_caches` try/except block and before the `with device:` block: + +```python +# Re-initialize RoPE caches that were skipped during meta-device loading +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE +for module in self.modules(): + if isinstance(module, Llama3ScaledRoPE) and not module.is_cache_built: + module.rope_init() + module.to(device) +``` + +**Why**: `from_pretrained` creates model on meta device first; `Llama3ScaledRoPE.rope_init()` skips cache building on meta tensors, then never rebuilds after weights are loaded to real device. + +**Patch 2 - HeartCodec loading fix** in `src/heartlib/pipelines/music_generation.py`: + +Add `ignore_mismatched_sizes=True` to ALL `HeartCodec.from_pretrained()` calls (there are 2: the eager load in `__init__` and the lazy load in the `codec` property). + +**Why**: VQ codebook `initted` buffers have shape `[1]` in checkpoint vs `[]` in model. Same data, just scalar vs 0-d tensor. Safe to ignore. + +### 5. Download Model Checkpoints +```bash +cd heartlib # project root +hf download --local-dir './ckpt' 'HeartMuLa/HeartMuLaGen' +hf download --local-dir './ckpt/HeartMuLa-oss-3B' 'HeartMuLa/HeartMuLa-oss-3B-happy-new-year' +hf download --local-dir './ckpt/HeartCodec-oss' 'HeartMuLa/HeartCodec-oss-20260123' +``` + +All 3 can be downloaded in parallel. Total size is several GB. + +## GPU / CUDA + +HeartMuLa uses CUDA by default (`--mula_device cuda --codec_device cuda`). No extra setup needed if the user has an NVIDIA GPU with PyTorch CUDA support installed. + +- The installed `torch==2.4.1` includes CUDA 12.1 support out of the box +- `torchtune` may report version `0.4.0+cpu` — this is just package metadata, it still uses CUDA via PyTorch +- To verify GPU is being used, look for "CUDA memory" lines in the output (e.g. "CUDA memory before unloading: 6.20 GB") +- **No GPU?** You can run on CPU with `--mula_device cpu --codec_device cpu`, but expect generation to be **extremely slow** (potentially 30-60+ minutes for a single song vs ~4 minutes on GPU). CPU mode also requires significant RAM (~12GB+ free). If the user has no NVIDIA GPU, recommend using a cloud GPU service (Google Colab free tier with T4, Lambda Labs, etc.) or the online demo at https://heartmula.github.io/ instead. + +## Usage + +### Basic Generation +```bash +cd heartlib +. .venv/bin/activate +python ./examples/run_music_generation.py \ + --model_path=./ckpt \ + --version="3B" \ + --lyrics="./assets/lyrics.txt" \ + --tags="./assets/tags.txt" \ + --save_path="./assets/output.mp3" \ + --lazy_load true +``` + +### Input Formatting + +**Tags** (comma-separated, no spaces): +``` +piano,happy,wedding,synthesizer,romantic +``` +or +``` +rock,energetic,guitar,drums,male-vocal +``` + +**Lyrics** (use bracketed structural tags): +``` +[Intro] + +[Verse] +Your lyrics here... + +[Chorus] +Chorus lyrics... + +[Bridge] +Bridge lyrics... + +[Outro] +``` + +### Key Parameters +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--max_audio_length_ms` | 240000 | Max length in ms (240s = 4 min) | +| `--topk` | 50 | Top-k sampling | +| `--temperature` | 1.0 | Sampling temperature | +| `--cfg_scale` | 1.5 | Classifier-free guidance scale | +| `--lazy_load` | false | Load/unload models on demand (saves VRAM) | +| `--mula_dtype` | bfloat16 | Dtype for HeartMuLa (bf16 recommended) | +| `--codec_dtype` | float32 | Dtype for HeartCodec (fp32 recommended for quality) | + +### Performance +- RTF (Real-Time Factor) ≈ 1.0 — a 4-minute song takes ~4 minutes to generate +- Output: MP3, 48kHz stereo, 128kbps + +## Pitfalls +1. **Do NOT use bf16 for HeartCodec** — degrades audio quality. Use fp32 (default). +2. **Tags may be ignored** — known issue (#90). Lyrics tend to dominate; experiment with tag ordering. +3. **Triton not available on macOS** — Linux/CUDA only for GPU acceleration. +4. **RTX 5080 incompatibility** reported in upstream issues. +5. The dependency pin conflicts require the manual upgrades and patches described above. + +## Links +- Repo: https://github.com/HeartMuLa/heartlib +- Models: https://huggingface.co/HeartMuLa +- Paper: https://arxiv.org/abs/2601.10547 +- License: Apache-2.0 diff --git a/website/docs/user-guide/skills/bundled/media/media-songsee.md b/website/docs/user-guide/skills/bundled/media/media-songsee.md new file mode 100644 index 0000000000..231b87ea3b --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-songsee.md @@ -0,0 +1,97 @@ +--- +title: "Songsee — Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +sidebar_label: "Songsee" +description: "Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songsee + +Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/songsee` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Audio`, `Visualization`, `Spectrogram`, `Music`, `Analysis` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# songsee + +Generate spectrograms and multi-panel audio feature visualizations from audio files. + +## Prerequisites + +Requires [Go](https://go.dev/doc/install): +```bash +go install github.com/steipete/songsee/cmd/songsee@latest +``` + +Optional: `ffmpeg` for formats beyond WAV/MP3. + +## Quick Start + +```bash +# Basic spectrogram +songsee track.mp3 + +# Save to specific file +songsee track.mp3 -o spectrogram.png + +# Multi-panel visualization grid +songsee track.mp3 --viz spectrogram,mel,chroma,hpss,selfsim,loudness,tempogram,mfcc,flux + +# Time slice (start at 12.5s, 8s duration) +songsee track.mp3 --start 12.5 --duration 8 -o slice.jpg + +# From stdin +cat track.mp3 | songsee - --format png -o out.png +``` + +## Visualization Types + +Use `--viz` with comma-separated values: + +| Type | Description | +|------|-------------| +| `spectrogram` | Standard frequency spectrogram | +| `mel` | Mel-scaled spectrogram | +| `chroma` | Pitch class distribution | +| `hpss` | Harmonic/percussive separation | +| `selfsim` | Self-similarity matrix | +| `loudness` | Loudness over time | +| `tempogram` | Tempo estimation | +| `mfcc` | Mel-frequency cepstral coefficients | +| `flux` | Spectral flux (onset detection) | + +Multiple `--viz` types render as a grid in a single image. + +## Common Flags + +| Flag | Description | +|------|-------------| +| `--viz` | Visualization types (comma-separated) | +| `--style` | Color palette: `classic`, `magma`, `inferno`, `viridis`, `gray` | +| `--width` / `--height` | Output image dimensions | +| `--window` / `--hop` | FFT window and hop size | +| `--min-freq` / `--max-freq` | Frequency range filter | +| `--start` / `--duration` | Time slice of the audio | +| `--format` | Output format: `jpg` or `png` | +| `-o` | Output file path | + +## Notes + +- WAV and MP3 are decoded natively; other formats require `ffmpeg` +- Output images can be inspected with `vision_analyze` for automated audio analysis +- Useful for comparing audio outputs, debugging synthesis, or documenting audio processing pipelines diff --git a/website/docs/user-guide/skills/bundled/media/media-youtube-content.md b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md new file mode 100644 index 0000000000..e94c755c98 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md @@ -0,0 +1,88 @@ +--- +title: "Youtube Content" +sidebar_label: "Youtube Content" +description: "Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Youtube Content + +Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to extract and reformat content from any YouTube video. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/youtube-content` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# YouTube Content Tool + +Extract transcripts from YouTube videos and convert them into useful formats. + +## Setup + +```bash +pip install youtube-transcript-api +``` + +## Helper Script + +`SKILL_DIR` is the directory containing this SKILL.md file. The script accepts any standard YouTube URL format, short links (youtu.be), shorts, embeds, live links, or a raw 11-character video ID. + +```bash +# JSON output with metadata +python3 SKILL_DIR/scripts/fetch_transcript.py "https://youtube.com/watch?v=VIDEO_ID" + +# Plain text (good for piping into further processing) +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --text-only + +# With timestamps +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --timestamps + +# Specific language with fallback chain +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --language tr,en +``` + +## Output Formats + +After fetching the transcript, format it based on what the user asks for: + +- **Chapters**: Group by topic shifts, output timestamped chapter list +- **Summary**: Concise 5-10 sentence overview of the entire video +- **Chapter summaries**: Chapters with a short paragraph summary for each +- **Thread**: Twitter/X thread format — numbered posts, each under 280 chars +- **Blog post**: Full article with title, sections, and key takeaways +- **Quotes**: Notable quotes with timestamps + +### Example — Chapters Output + +``` +00:00 Introduction — host opens with the problem statement +03:45 Background — prior work and why existing solutions fall short +12:20 Core method — walkthrough of the proposed approach +24:10 Results — benchmark comparisons and key takeaways +31:55 Q&A — audience questions on scalability and next steps +``` + +## Workflow + +1. **Fetch** the transcript using the helper script with `--text-only --timestamps`. +2. **Validate**: confirm the output is non-empty and in the expected language. If empty, retry without `--language` to get any available transcript. If still empty, tell the user the video likely has transcripts disabled. +3. **Chunk if needed**: if the transcript exceeds ~50K characters, split into overlapping chunks (~40K with 2K overlap) and summarize each chunk before merging. +4. **Transform** into the requested output format. If the user did not specify a format, default to a summary. +5. **Verify**: re-read the transformed output to check for coherence, correct timestamps, and completeness before presenting. + +## Error Handling + +- **Transcript disabled**: tell the user; suggest they check if subtitles are available on the video page. +- **Private/unavailable video**: relay the error and ask the user to verify the URL. +- **No matching language**: retry without `--language` to fetch any available transcript, then note the actual language to the user. +- **Dependency missing**: run `pip install youtube-transcript-api` and retry. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md new file mode 100644 index 0000000000..0112f747a3 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md @@ -0,0 +1,507 @@ +--- +title: "Evaluating Llms Harness — Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +sidebar_label: "Evaluating Llms Harness" +description: "Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Evaluating Llms Harness + +Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Supports HuggingFace, vLLM, APIs. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/lm-evaluation-harness` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `lm-eval`, `transformers`, `vllm` | +| Tags | `Evaluation`, `LM Evaluation Harness`, `Benchmarking`, `MMLU`, `HumanEval`, `GSM8K`, `EleutherAI`, `Model Quality`, `Academic Benchmarks`, `Industry Standard` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# lm-evaluation-harness - LLM Benchmarking + +## Quick start + +lm-evaluation-harness evaluates LLMs across 60+ academic benchmarks using standardized prompts and metrics. + +**Installation**: +```bash +pip install lm-eval +``` + +**Evaluate any HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag \ + --device cuda:0 \ + --batch_size 8 +``` + +**View available tasks**: +```bash +lm_eval --tasks list +``` + +## Common workflows + +### Workflow 1: Standard benchmark evaluation + +Evaluate model on core benchmarks (MMLU, GSM8K, HumanEval). + +Copy this checklist: + +``` +Benchmark Evaluation: +- [ ] Step 1: Choose benchmark suite +- [ ] Step 2: Configure model +- [ ] Step 3: Run evaluation +- [ ] Step 4: Analyze results +``` + +**Step 1: Choose benchmark suite** + +**Core reasoning benchmarks**: +- **MMLU** (Massive Multitask Language Understanding) - 57 subjects, multiple choice +- **GSM8K** - Grade school math word problems +- **HellaSwag** - Common sense reasoning +- **TruthfulQA** - Truthfulness and factuality +- **ARC** (AI2 Reasoning Challenge) - Science questions + +**Code benchmarks**: +- **HumanEval** - Python code generation (164 problems) +- **MBPP** (Mostly Basic Python Problems) - Python coding + +**Standard suite** (recommended for model releases): +```bash +--tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge +``` + +**Step 2: Configure model** + +**HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,dtype=bfloat16 \ + --tasks mmlu \ + --device cuda:0 \ + --batch_size auto # Auto-detect optimal batch size +``` + +**Quantized model (4-bit/8-bit)**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,load_in_4bit=True \ + --tasks mmlu \ + --device cuda:0 +``` + +**Custom checkpoint**: +```bash +lm_eval --model hf \ + --model_args pretrained=/path/to/my-model,tokenizer=/path/to/tokenizer \ + --tasks mmlu \ + --device cuda:0 +``` + +**Step 3: Run evaluation** + +```bash +# Full MMLU evaluation (57 subjects) +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --num_fewshot 5 \ # 5-shot evaluation (standard) + --batch_size 8 \ + --output_path results/ \ + --log_samples # Save individual predictions + +# Multiple benchmarks at once +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge \ + --num_fewshot 5 \ + --batch_size 8 \ + --output_path results/llama2-7b-eval.json +``` + +**Step 4: Analyze results** + +Results saved to `results/llama2-7b-eval.json`: + +```json +{ + "results": { + "mmlu": { + "acc": 0.459, + "acc_stderr": 0.004 + }, + "gsm8k": { + "exact_match": 0.142, + "exact_match_stderr": 0.006 + }, + "hellaswag": { + "acc_norm": 0.765, + "acc_norm_stderr": 0.004 + } + }, + "config": { + "model": "hf", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf", + "num_fewshot": 5 + } +} +``` + +### Workflow 2: Track training progress + +Evaluate checkpoints during training. + +``` +Training Progress Tracking: +- [ ] Step 1: Set up periodic evaluation +- [ ] Step 2: Choose quick benchmarks +- [ ] Step 3: Automate evaluation +- [ ] Step 4: Plot learning curves +``` + +**Step 1: Set up periodic evaluation** + +Evaluate every N training steps: + +```bash +#!/bin/bash +# eval_checkpoint.sh + +CHECKPOINT_DIR=$1 +STEP=$2 + +lm_eval --model hf \ + --model_args pretrained=$CHECKPOINT_DIR/checkpoint-$STEP \ + --tasks gsm8k,hellaswag \ + --num_fewshot 0 \ # 0-shot for speed + --batch_size 16 \ + --output_path results/step-$STEP.json +``` + +**Step 2: Choose quick benchmarks** + +Fast benchmarks for frequent evaluation: +- **HellaSwag**: ~10 minutes on 1 GPU +- **GSM8K**: ~5 minutes +- **PIQA**: ~2 minutes + +Avoid for frequent eval (too slow): +- **MMLU**: ~2 hours (57 subjects) +- **HumanEval**: Requires code execution + +**Step 3: Automate evaluation** + +Integrate with training script: + +```python +# In training loop +if step % eval_interval == 0: + model.save_pretrained(f"checkpoints/step-{step}") + + # Run evaluation + os.system(f"./eval_checkpoint.sh checkpoints step-{step}") +``` + +Or use PyTorch Lightning callbacks: + +```python +from pytorch_lightning import Callback + +class EvalHarnessCallback(Callback): + def on_validation_epoch_end(self, trainer, pl_module): + step = trainer.global_step + checkpoint_path = f"checkpoints/step-{step}" + + # Save checkpoint + trainer.save_checkpoint(checkpoint_path) + + # Run lm-eval + os.system(f"lm_eval --model hf --model_args pretrained={checkpoint_path} ...") +``` + +**Step 4: Plot learning curves** + +```python +import json +import matplotlib.pyplot as plt + +# Load all results +steps = [] +mmlu_scores = [] + +for file in sorted(glob.glob("results/step-*.json")): + with open(file) as f: + data = json.load(f) + step = int(file.split("-")[1].split(".")[0]) + steps.append(step) + mmlu_scores.append(data["results"]["mmlu"]["acc"]) + +# Plot +plt.plot(steps, mmlu_scores) +plt.xlabel("Training Step") +plt.ylabel("MMLU Accuracy") +plt.title("Training Progress") +plt.savefig("training_curve.png") +``` + +### Workflow 3: Compare multiple models + +Benchmark suite for model comparison. + +``` +Model Comparison: +- [ ] Step 1: Define model list +- [ ] Step 2: Run evaluations +- [ ] Step 3: Generate comparison table +``` + +**Step 1: Define model list** + +```bash +# models.txt +meta-llama/Llama-2-7b-hf +meta-llama/Llama-2-13b-hf +mistralai/Mistral-7B-v0.1 +microsoft/phi-2 +``` + +**Step 2: Run evaluations** + +```bash +#!/bin/bash +# eval_all_models.sh + +TASKS="mmlu,gsm8k,hellaswag,truthfulqa" + +while read model; do + echo "Evaluating $model" + + # Extract model name for output file + model_name=$(echo $model | sed 's/\//-/g') + + lm_eval --model hf \ + --model_args pretrained=$model,dtype=bfloat16 \ + --tasks $TASKS \ + --num_fewshot 5 \ + --batch_size auto \ + --output_path results/$model_name.json + +done < models.txt +``` + +**Step 3: Generate comparison table** + +```python +import json +import pandas as pd + +models = [ + "meta-llama-Llama-2-7b-hf", + "meta-llama-Llama-2-13b-hf", + "mistralai-Mistral-7B-v0.1", + "microsoft-phi-2" +] + +tasks = ["mmlu", "gsm8k", "hellaswag", "truthfulqa"] + +results = [] +for model in models: + with open(f"results/{model}.json") as f: + data = json.load(f) + row = {"Model": model.replace("-", "/")} + for task in tasks: + # Get primary metric for each task + metrics = data["results"][task] + if "acc" in metrics: + row[task.upper()] = f"{metrics['acc']:.3f}" + elif "exact_match" in metrics: + row[task.upper()] = f"{metrics['exact_match']:.3f}" + results.append(row) + +df = pd.DataFrame(results) +print(df.to_markdown(index=False)) +``` + +Output: +``` +| Model | MMLU | GSM8K | HELLASWAG | TRUTHFULQA | +|------------------------|-------|-------|-----------|------------| +| meta-llama/Llama-2-7b | 0.459 | 0.142 | 0.765 | 0.391 | +| meta-llama/Llama-2-13b | 0.549 | 0.287 | 0.801 | 0.430 | +| mistralai/Mistral-7B | 0.626 | 0.395 | 0.812 | 0.428 | +| microsoft/phi-2 | 0.560 | 0.613 | 0.682 | 0.447 | +``` + +### Workflow 4: Evaluate with vLLM (faster inference) + +Use vLLM backend for 5-10x faster evaluation. + +``` +vLLM Evaluation: +- [ ] Step 1: Install vLLM +- [ ] Step 2: Configure vLLM backend +- [ ] Step 3: Run evaluation +``` + +**Step 1: Install vLLM** + +```bash +pip install vllm +``` + +**Step 2: Configure vLLM backend** + +```bash +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8 \ + --tasks mmlu \ + --batch_size auto +``` + +**Step 3: Run evaluation** + +vLLM is 5-10× faster than standard HuggingFace: + +```bash +# Standard HF: ~2 hours for MMLU on 7B model +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --batch_size 8 + +# vLLM: ~15-20 minutes for MMLU on 7B model +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=2 \ + --tasks mmlu \ + --batch_size auto +``` + +## When to use vs alternatives + +**Use lm-evaluation-harness when:** +- Benchmarking models for academic papers +- Comparing model quality across standard tasks +- Tracking training progress +- Reporting standardized metrics (everyone uses same prompts) +- Need reproducible evaluation + +**Use alternatives instead:** +- **HELM** (Stanford): Broader evaluation (fairness, efficiency, calibration) +- **AlpacaEval**: Instruction-following evaluation with LLM judges +- **MT-Bench**: Conversational multi-turn evaluation +- **Custom scripts**: Domain-specific evaluation + +## Common issues + +**Issue: Evaluation too slow** + +Use vLLM backend: +```bash +lm_eval --model vllm \ + --model_args pretrained=model-name,tensor_parallel_size=2 +``` + +Or reduce fewshot examples: +```bash +--num_fewshot 0 # Instead of 5 +``` + +Or evaluate subset of MMLU: +```bash +--tasks mmlu_stem # Only STEM subjects +``` + +**Issue: Out of memory** + +Reduce batch size: +```bash +--batch_size 1 # Or --batch_size auto +``` + +Use quantization: +```bash +--model_args pretrained=model-name,load_in_8bit=True +``` + +Enable CPU offloading: +```bash +--model_args pretrained=model-name,device_map=auto,offload_folder=offload +``` + +**Issue: Different results than reported** + +Check fewshot count: +```bash +--num_fewshot 5 # Most papers use 5-shot +``` + +Check exact task name: +```bash +--tasks mmlu # Not mmlu_direct or mmlu_fewshot +``` + +Verify model and tokenizer match: +```bash +--model_args pretrained=model-name,tokenizer=same-model-name +``` + +**Issue: HumanEval not executing code** + +Install execution dependencies: +```bash +pip install human-eval +``` + +Enable code execution: +```bash +lm_eval --model hf \ + --model_args pretrained=model-name \ + --tasks humaneval \ + --allow_code_execution # Required for HumanEval +``` + +## Advanced topics + +**Benchmark descriptions**: See [references/benchmark-guide.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md) for detailed description of all 60+ tasks, what they measure, and interpretation. + +**Custom tasks**: See [references/custom-tasks.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md) for creating domain-specific evaluation tasks. + +**API evaluation**: See [references/api-evaluation.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md) for evaluating OpenAI, Anthropic, and other API models. + +**Multi-GPU strategies**: See [references/distributed-eval.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md) for data parallel and tensor parallel evaluation. + +## Hardware requirements + +- **GPU**: NVIDIA (CUDA 11.8+), works on CPU (very slow) +- **VRAM**: + - 7B model: 16GB (bf16) or 8GB (8-bit) + - 13B model: 28GB (bf16) or 14GB (8-bit) + - 70B model: Requires multi-GPU or quantization +- **Time** (7B model, single A100): + - HellaSwag: 10 minutes + - GSM8K: 5 minutes + - MMLU (full): 2 hours + - HumanEval: 20 minutes + +## Resources + +- GitHub: https://github.com/EleutherAI/lm-evaluation-harness +- Docs: https://github.com/EleutherAI/lm-evaluation-harness/tree/main/docs +- Task library: 60+ tasks including MMLU, GSM8K, HumanEval, TruthfulQA, HellaSwag, ARC, WinoGrande, etc. +- Leaderboard: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard (uses this harness) diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md new file mode 100644 index 0000000000..db8c4d4d71 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md @@ -0,0 +1,608 @@ +--- +title: "Weights And Biases" +sidebar_label: "Weights And Biases" +description: "Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - coll..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Weights And Biases + +Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - collaborative MLOps platform + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/weights-and-biases` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `wandb` | +| Tags | `MLOps`, `Weights And Biases`, `WandB`, `Experiment Tracking`, `Hyperparameter Tuning`, `Model Registry`, `Collaboration`, `Real-Time Visualization`, `PyTorch`, `TensorFlow`, `HuggingFace` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Weights & Biases: ML Experiment Tracking & MLOps + +## When to Use This Skill + +Use Weights & Biases (W&B) when you need to: +- **Track ML experiments** with automatic metric logging +- **Visualize training** in real-time dashboards +- **Compare runs** across hyperparameters and configurations +- **Optimize hyperparameters** with automated sweeps +- **Manage model registry** with versioning and lineage +- **Collaborate on ML projects** with team workspaces +- **Track artifacts** (datasets, models, code) with lineage + +**Users**: 200,000+ ML practitioners | **GitHub Stars**: 10.5k+ | **Integrations**: 100+ + +## Installation + +```bash +# Install W&B +pip install wandb + +# Login (creates API key) +wandb login + +# Or set API key programmatically +export WANDB_API_KEY=your_api_key_here +``` + +## Quick Start + +### Basic Experiment Tracking + +```python +import wandb + +# Initialize a run +run = wandb.init( + project="my-project", + config={ + "learning_rate": 0.001, + "epochs": 10, + "batch_size": 32, + "architecture": "ResNet50" + } +) + +# Training loop +for epoch in range(run.config.epochs): + # Your training code + train_loss = train_epoch() + val_loss = validate() + + # Log metrics + wandb.log({ + "epoch": epoch, + "train/loss": train_loss, + "val/loss": val_loss, + "train/accuracy": train_acc, + "val/accuracy": val_acc + }) + +# Finish the run +wandb.finish() +``` + +### With PyTorch + +```python +import torch +import wandb + +# Initialize +wandb.init(project="pytorch-demo", config={ + "lr": 0.001, + "epochs": 10 +}) + +# Access config +config = wandb.config + +# Training loop +for epoch in range(config.epochs): + for batch_idx, (data, target) in enumerate(train_loader): + # Forward pass + output = model(data) + loss = criterion(output, target) + + # Backward pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # Log every 100 batches + if batch_idx % 100 == 0: + wandb.log({ + "loss": loss.item(), + "epoch": epoch, + "batch": batch_idx + }) + +# Save model +torch.save(model.state_dict(), "model.pth") +wandb.save("model.pth") # Upload to W&B + +wandb.finish() +``` + +## Core Concepts + +### 1. Projects and Runs + +**Project**: Collection of related experiments +**Run**: Single execution of your training script + +```python +# Create/use project +run = wandb.init( + project="image-classification", + name="resnet50-experiment-1", # Optional run name + tags=["baseline", "resnet"], # Organize with tags + notes="First baseline run" # Add notes +) + +# Each run has unique ID +print(f"Run ID: {run.id}") +print(f"Run URL: {run.url}") +``` + +### 2. Configuration Tracking + +Track hyperparameters automatically: + +```python +config = { + # Model architecture + "model": "ResNet50", + "pretrained": True, + + # Training params + "learning_rate": 0.001, + "batch_size": 32, + "epochs": 50, + "optimizer": "Adam", + + # Data params + "dataset": "ImageNet", + "augmentation": "standard" +} + +wandb.init(project="my-project", config=config) + +# Access config during training +lr = wandb.config.learning_rate +batch_size = wandb.config.batch_size +``` + +### 3. Metric Logging + +```python +# Log scalars +wandb.log({"loss": 0.5, "accuracy": 0.92}) + +# Log multiple metrics +wandb.log({ + "train/loss": train_loss, + "train/accuracy": train_acc, + "val/loss": val_loss, + "val/accuracy": val_acc, + "learning_rate": current_lr, + "epoch": epoch +}) + +# Log with custom x-axis +wandb.log({"loss": loss}, step=global_step) + +# Log media (images, audio, video) +wandb.log({"examples": [wandb.Image(img) for img in images]}) + +# Log histograms +wandb.log({"gradients": wandb.Histogram(gradients)}) + +# Log tables +table = wandb.Table(columns=["id", "prediction", "ground_truth"]) +wandb.log({"predictions": table}) +``` + +### 4. Model Checkpointing + +```python +import torch +import wandb + +# Save model checkpoint +checkpoint = { + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': loss, +} + +torch.save(checkpoint, 'checkpoint.pth') + +# Upload to W&B +wandb.save('checkpoint.pth') + +# Or use Artifacts (recommended) +artifact = wandb.Artifact('model', type='model') +artifact.add_file('checkpoint.pth') +wandb.log_artifact(artifact) +``` + +## Hyperparameter Sweeps + +Automatically search for optimal hyperparameters. + +### Define Sweep Configuration + +```python +sweep_config = { + 'method': 'bayes', # or 'grid', 'random' + 'metric': { + 'name': 'val/accuracy', + 'goal': 'maximize' + }, + 'parameters': { + 'learning_rate': { + 'distribution': 'log_uniform', + 'min': 1e-5, + 'max': 1e-1 + }, + 'batch_size': { + 'values': [16, 32, 64, 128] + }, + 'optimizer': { + 'values': ['adam', 'sgd', 'rmsprop'] + }, + 'dropout': { + 'distribution': 'uniform', + 'min': 0.1, + 'max': 0.5 + } + } +} + +# Initialize sweep +sweep_id = wandb.sweep(sweep_config, project="my-project") +``` + +### Define Training Function + +```python +def train(): + # Initialize run + run = wandb.init() + + # Access sweep parameters + lr = wandb.config.learning_rate + batch_size = wandb.config.batch_size + optimizer_name = wandb.config.optimizer + + # Build model with sweep config + model = build_model(wandb.config) + optimizer = get_optimizer(optimizer_name, lr) + + # Training loop + for epoch in range(NUM_EPOCHS): + train_loss = train_epoch(model, optimizer, batch_size) + val_acc = validate(model) + + # Log metrics + wandb.log({ + "train/loss": train_loss, + "val/accuracy": val_acc + }) + +# Run sweep +wandb.agent(sweep_id, function=train, count=50) # Run 50 trials +``` + +### Sweep Strategies + +```python +# Grid search - exhaustive +sweep_config = { + 'method': 'grid', + 'parameters': { + 'lr': {'values': [0.001, 0.01, 0.1]}, + 'batch_size': {'values': [16, 32, 64]} + } +} + +# Random search +sweep_config = { + 'method': 'random', + 'parameters': { + 'lr': {'distribution': 'uniform', 'min': 0.0001, 'max': 0.1}, + 'dropout': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5} + } +} + +# Bayesian optimization (recommended) +sweep_config = { + 'method': 'bayes', + 'metric': {'name': 'val/loss', 'goal': 'minimize'}, + 'parameters': { + 'lr': {'distribution': 'log_uniform', 'min': 1e-5, 'max': 1e-1} + } +} +``` + +## Artifacts + +Track datasets, models, and other files with lineage. + +### Log Artifacts + +```python +# Create artifact +artifact = wandb.Artifact( + name='training-dataset', + type='dataset', + description='ImageNet training split', + metadata={'size': '1.2M images', 'split': 'train'} +) + +# Add files +artifact.add_file('data/train.csv') +artifact.add_dir('data/images/') + +# Log artifact +wandb.log_artifact(artifact) +``` + +### Use Artifacts + +```python +# Download and use artifact +run = wandb.init(project="my-project") + +# Download artifact +artifact = run.use_artifact('training-dataset:latest') +artifact_dir = artifact.download() + +# Use the data +data = load_data(f"{artifact_dir}/train.csv") +``` + +### Model Registry + +```python +# Log model as artifact +model_artifact = wandb.Artifact( + name='resnet50-model', + type='model', + metadata={'architecture': 'ResNet50', 'accuracy': 0.95} +) + +model_artifact.add_file('model.pth') +wandb.log_artifact(model_artifact, aliases=['best', 'production']) + +# Link to model registry +run.link_artifact(model_artifact, 'model-registry/production-models') +``` + +## Integration Examples + +### HuggingFace Transformers + +```python +from transformers import Trainer, TrainingArguments +import wandb + +# Initialize W&B +wandb.init(project="hf-transformers") + +# Training arguments with W&B +training_args = TrainingArguments( + output_dir="./results", + report_to="wandb", # Enable W&B logging + run_name="bert-finetuning", + logging_steps=100, + save_steps=500 +) + +# Trainer automatically logs to W&B +trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset +) + +trainer.train() +``` + +### PyTorch Lightning + +```python +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import WandbLogger +import wandb + +# Create W&B logger +wandb_logger = WandbLogger( + project="lightning-demo", + log_model=True # Log model checkpoints +) + +# Use with Trainer +trainer = Trainer( + logger=wandb_logger, + max_epochs=10 +) + +trainer.fit(model, datamodule=dm) +``` + +### Keras/TensorFlow + +```python +import wandb +from wandb.keras import WandbCallback + +# Initialize +wandb.init(project="keras-demo") + +# Add callback +model.fit( + x_train, y_train, + validation_data=(x_val, y_val), + epochs=10, + callbacks=[WandbCallback()] # Auto-logs metrics +) +``` + +## Visualization & Analysis + +### Custom Charts + +```python +# Log custom visualizations +import matplotlib.pyplot as plt + +fig, ax = plt.subplots() +ax.plot(x, y) +wandb.log({"custom_plot": wandb.Image(fig)}) + +# Log confusion matrix +wandb.log({"conf_mat": wandb.plot.confusion_matrix( + probs=None, + y_true=ground_truth, + preds=predictions, + class_names=class_names +)}) +``` + +### Reports + +Create shareable reports in W&B UI: +- Combine runs, charts, and text +- Markdown support +- Embeddable visualizations +- Team collaboration + +## Best Practices + +### 1. Organize with Tags and Groups + +```python +wandb.init( + project="my-project", + tags=["baseline", "resnet50", "imagenet"], + group="resnet-experiments", # Group related runs + job_type="train" # Type of job +) +``` + +### 2. Log Everything Relevant + +```python +# Log system metrics +wandb.log({ + "gpu/util": gpu_utilization, + "gpu/memory": gpu_memory_used, + "cpu/util": cpu_utilization +}) + +# Log code version +wandb.log({"git_commit": git_commit_hash}) + +# Log data splits +wandb.log({ + "data/train_size": len(train_dataset), + "data/val_size": len(val_dataset) +}) +``` + +### 3. Use Descriptive Names + +```python +# ✅ Good: Descriptive run names +wandb.init( + project="nlp-classification", + name="bert-base-lr0.001-bs32-epoch10" +) + +# ❌ Bad: Generic names +wandb.init(project="nlp", name="run1") +``` + +### 4. Save Important Artifacts + +```python +# Save final model +artifact = wandb.Artifact('final-model', type='model') +artifact.add_file('model.pth') +wandb.log_artifact(artifact) + +# Save predictions for analysis +predictions_table = wandb.Table( + columns=["id", "input", "prediction", "ground_truth"], + data=predictions_data +) +wandb.log({"predictions": predictions_table}) +``` + +### 5. Use Offline Mode for Unstable Connections + +```python +import os + +# Enable offline mode +os.environ["WANDB_MODE"] = "offline" + +wandb.init(project="my-project") +# ... your code ... + +# Sync later +# wandb sync +``` + +## Team Collaboration + +### Share Runs + +```python +# Runs are automatically shareable via URL +run = wandb.init(project="team-project") +print(f"Share this URL: {run.url}") +``` + +### Team Projects + +- Create team account at wandb.ai +- Add team members +- Set project visibility (private/public) +- Use team-level artifacts and model registry + +## Pricing + +- **Free**: Unlimited public projects, 100GB storage +- **Academic**: Free for students/researchers +- **Teams**: $50/seat/month, private projects, unlimited storage +- **Enterprise**: Custom pricing, on-prem options + +## Resources + +- **Documentation**: https://docs.wandb.ai +- **GitHub**: https://github.com/wandb/wandb (10.5k+ stars) +- **Examples**: https://github.com/wandb/examples +- **Community**: https://wandb.ai/community +- **Discord**: https://wandb.me/discord + +## See Also + +- `references/sweeps.md` - Comprehensive hyperparameter optimization guide +- `references/artifacts.md` - Data and model versioning patterns +- `references/integrations.md` - Framework-specific examples diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md new file mode 100644 index 0000000000..27ab41b5e2 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md @@ -0,0 +1,99 @@ +--- +title: "Huggingface Hub" +sidebar_label: "Huggingface Hub" +description: "Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Space..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Huggingface Hub + +Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/huggingface-hub` | +| Version | `1.0.0` | +| Author | Hugging Face | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Hugging Face CLI (`hf`) Reference Guide + +The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces. + +> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command. + +## Quick Start +* **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s` +* **Help:** Use `hf --help` to view all available functions and real-world examples. +* **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag. + +--- + +## Core Commands + +### General Operations +* `hf download REPO_ID`: Download files from the Hub. +* `hf upload REPO_ID`: Upload files/folders (recommended for single-commit). +* `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories. +* `hf sync`: Sync files between a local directory and a bucket. +* `hf env` / `hf version`: View environment and version details. + +### Authentication (`hf auth`) +* `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). +* `list` / `switch`: Manage and toggle between multiple stored access tokens. +* `whoami`: Identify the currently logged-in account. + +### Repository Management (`hf repos`) +* `create` / `delete`: Create or permanently remove repositories. +* `duplicate`: Clone a model, dataset, or Space to a new ID. +* `move`: Transfer a repository between namespaces. +* `branch` / `tag`: Manage Git-like references. +* `delete-files`: Remove specific files using patterns. + +--- + +## Specialized Hub Interactions + +### Datasets & Models +* **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs). +* **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs. +* **Models:** `hf models list` and `info`. +* **Papers:** `hf papers list` — View daily papers. + +### Discussions & Pull Requests (`hf discussions`) +* Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`. +* `diff`: View changes in a PR. +* `merge`: Finalize pull requests. + +### Infrastructure & Compute +* **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`). +* **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring. +* **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts. + +### Storage & Automation +* **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`). +* **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks). +* **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`). +* **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`). + +--- + +## Advanced Usage & Tips + +### Global Flags +* `--format json`: Produces machine-readable output for automation. +* `-q` / `--quiet`: Limits output to IDs only. + +### Extensions & Skills +* **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`. +* **Skills:** Manage AI assistant skills with `hf skills add`. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md new file mode 100644 index 0000000000..19f08067f8 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md @@ -0,0 +1,266 @@ +--- +title: "Llama Cpp — llama" +sidebar_label: "Llama Cpp" +description: "llama" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Llama Cpp + +llama.cpp local GGUF inference + HF Hub model discovery. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/inference/llama-cpp` | +| Version | `2.1.2` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `llama-cpp-python>=0.2.0` | +| Tags | `llama.cpp`, `GGUF`, `Quantization`, `Hugging Face Hub`, `CPU Inference`, `Apple Silicon`, `Edge Deployment`, `AMD GPUs`, `Intel GPUs`, `NVIDIA`, `URL-first` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# llama.cpp + GGUF + +Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp. + +## When to use + +- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs +- Find the right GGUF for a specific Hugging Face repo +- Build a `llama-server` or `llama-cli` command from the Hub +- Search the Hub for models that already support llama.cpp +- Enumerate available `.gguf` files and sizes for a repo +- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM + +## Model Discovery workflow + +Prefer URL workflows before asking for `hf`, Python, or custom scripts. + +1. Search for candidate repos on the Hub: + - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending` + - Add `search=` for a model family + - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints +2. Open the repo with the llama.cpp local-app view: + - `https://huggingface.co/?local-app=llama.cpp` +3. Treat the local-app snippet as the source of truth when it is visible: + - copy the exact `llama-server` or `llama-cli` command + - report the recommended quant exactly as HF shows it +4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`: + - prefer its exact quant labels and sizes over generic tables + - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL` + - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance +5. Query the tree API to confirm what actually exists: + - `https://huggingface.co/api/models//tree/main?recursive=true` + - keep entries where `type` is `file` and `path` ends with `.gguf` + - use `path` and `size` as the source of truth for filenames and byte sizes + - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files + - use `https://huggingface.co//tree/main` only as a human fallback +6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant: + - shorthand quant selection: `llama-server -hf :` + - exact-file fallback: `llama-server --hf-repo --hf-file ` +7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files. + +## Quick start + +### Install llama.cpp + +```bash +# macOS / Linux (simplest) +brew install llama.cpp +``` + +```bash +winget install llama.cpp +``` + +```bash +git clone https://github.com/ggml-org/llama.cpp +cd llama.cpp +cmake -B build +cmake --build build --config Release +``` + +### Run directly from the Hugging Face Hub + +```bash +llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +```bash +llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +### Run an exact GGUF file from the Hub + +Use this when the tree API shows custom file naming or the exact HF snippet is missing. + +```bash +llama-server \ + --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \ + --hf-file Phi-3-mini-4k-instruct-q4.gguf \ + -c 4096 +``` + +### OpenAI-compatible server check + +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "Write a limerick about Python exceptions"} + ] + }' +``` + +## Python bindings (llama-cpp-python) + +`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`). + +### Basic generation + +```python +from llama_cpp import Llama + +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, # 0 for CPU, 99 to offload everything + n_threads=8, +) + +out = llm("What is machine learning?", max_tokens=256, temperature=0.7) +print(out["choices"][0]["text"]) +``` + +### Chat + streaming + +```python +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, + chat_format="llama-3", # or "chatml", "mistral", etc. +) + +resp = llm.create_chat_completion( + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Python?"}, + ], + max_tokens=256, +) +print(resp["choices"][0]["message"]["content"]) + +# Streaming +for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True): + print(chunk["choices"][0]["text"], end="", flush=True) +``` + +### Embeddings + +```python +llm = Llama(model_path="./model-q4_k_m.gguf", embedding=True, n_gpu_layers=35) +vec = llm.embed("This is a test sentence.") +print(f"Embedding dimension: {len(vec)}") +``` + +You can also load a GGUF straight from the Hub: + +```python +llm = Llama.from_pretrained( + repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", + filename="*Q4_K_M.gguf", + n_gpu_layers=35, +) +``` + +## Choosing a quant + +Use the Hub page first, generic heuristics second. + +- Prefer the exact quant that HF marks as compatible for the user's hardware profile. +- For general chat, start with `Q4_K_M`. +- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows. +- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality. +- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file. +- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`. + +## Extracting available GGUFs from a repo + +When the user asks what GGUFs exist, return: + +- filename +- file size +- quant label +- whether it is a main model or an auxiliary projector + +Ignore unless requested: + +- README +- BF16 shard files +- imatrix blobs or calibration artifacts + +Use the tree API for this step: + +- `https://huggingface.co/api/models//tree/main?recursive=true` + +For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename. + +## Search patterns + +Use these URL shapes directly: + +```text +https://huggingface.co/models?apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending +https://huggingface.co/?local-app=llama.cpp +https://huggingface.co/api/models//tree/main?recursive=true +https://huggingface.co//tree/main +``` + +## Output format + +When answering discovery requests, prefer a compact structured result like: + +```text +Repo: +Recommended quant from HF: