From ce089169d578b96c82641f17186ba63c288b22d8 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 06:20:19 -0700 Subject: [PATCH 001/264] feat(skills-guard): gate agent-created scanner on config.skills.guard_agent_created (default off) Replaces the blanket 'always allow' change from the previous commit with an opt-in config flag so users who want belt-and-suspenders security can still get the keyword scan on skill_manage output. ## Default behavior (flag off) skill_manage(action='create'|'edit'|'patch') no longer runs the keyword scanner. The agent can write skills that mention risky keywords in prose (documenting what reviewers should watch for, describing cache-bust semantics in a PR-review skill, referencing AGENTS.md, etc.) without getting blocked. Rationale: the agent can already execute the same code paths via terminal() with no gate, so the scan adds friction without meaningful security against a compromised or malicious agent. ## Opt-in behavior (flag on) Set skills.guard_agent_created: true in config.yaml to get the original behavior back. Scanner runs on every skill_manage write; dangerous verdicts surface as a tool error the agent can react to (retry without the flagged content). ## External hub installs unaffected trusted/community sources (hermes skills install) always get scanned regardless of this flag. The gate is specifically for skill_manage, which only agents call. ## Changes - hermes_cli/config.py: add skills.guard_agent_created: False to DEFAULT_CONFIG - tools/skill_manager_tool.py: _guard_agent_created_enabled() reads the flag; _security_scan_skill() short-circuits to None when the flag is off - tools/skills_guard.py: restore INSTALL_POLICY['agent-created'] = ('allow', 'allow', 'ask') so the scan remains strict when it does run - tests/tools/test_skills_guard.py: restore original ask/force tests - tests/tools/test_skill_manager_tool.py: new TestSecurityScanGate class covering both flag states + config error handling ## Validation - tests/tools/test_skills_guard.py + test_skill_manager_tool.py: 115/115 pass - E2E: flagged-keyword skill creates with default config, blocks with flag on --- hermes_cli/config.py | 11 ++++ tests/tools/test_skill_manager_tool.py | 82 ++++++++++++++++++++++++++ tests/tools/test_skills_guard.py | 23 ++++---- tools/skill_manager_tool.py | 30 ++++++++-- tools/skills_guard.py | 10 ++-- 5 files changed, 134 insertions(+), 22 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6d4c49fd4b..fd19adae90 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -760,6 +760,17 @@ DEFAULT_CONFIG = { "inline_shell": False, # Timeout (seconds) for each !`cmd` snippet when inline_shell is on. "inline_shell_timeout": 10, + # Run the keyword/pattern security scanner on skills the agent + # writes via skill_manage (create/edit/patch). Off by default + # because the agent can already execute the same code paths via + # terminal() with no gate, so the scan adds friction (blocks + # skills that mention risky keywords in prose) without meaningful + # security. Turn on if you want the belt-and-suspenders — a + # dangerous verdict will then surface as a tool error to the + # agent, which can retry with the flagged content removed. + # External hub installs (trusted/community sources) are always + # scanned regardless of this setting. + "guard_agent_created": False, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index dd0ae17f8c..9918a826cb 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -484,3 +484,85 @@ class TestSkillManageDispatcher: raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT) result = json.loads(raw) assert result["success"] is True + + +class TestSecurityScanGate: + """_security_scan_skill is gated by skills.guard_agent_created config flag.""" + + def test_scan_noop_when_flag_off(self, tmp_path): + """Default config (flag off) short-circuits before running scan_skill.""" + from tools.skill_manager_tool import _security_scan_skill + + with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=False), \ + patch("tools.skill_manager_tool.scan_skill") as mock_scan: + result = _security_scan_skill(tmp_path) + + assert result is None + mock_scan.assert_not_called() # scan never ran + + def test_scan_runs_when_flag_on(self, tmp_path): + """When flag is on, scan_skill is invoked and its verdict is honored.""" + from tools.skill_manager_tool import _security_scan_skill + from tools.skills_guard import ScanResult + + # Fake a safe scan result — caller should return None (allow) + fake_result = ScanResult( + skill_name="test", + source="agent-created", + trust_level="agent-created", + verdict="safe", + findings=[], + summary="ok", + ) + with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \ + patch("tools.skill_manager_tool.scan_skill", return_value=fake_result) as mock_scan: + result = _security_scan_skill(tmp_path) + + assert result is None + mock_scan.assert_called_once() + + def test_scan_blocks_dangerous_when_flag_on(self, tmp_path): + """Dangerous verdict + flag on → returns an error string for the agent.""" + from tools.skill_manager_tool import _security_scan_skill + from tools.skills_guard import ScanResult, Finding + + finding = Finding( + pattern_id="test", severity="critical", category="exfiltration", + file="SKILL.md", line=1, match="curl $TOKEN", description="test", + ) + fake_result = ScanResult( + skill_name="test", + source="agent-created", + trust_level="agent-created", + verdict="dangerous", + findings=[finding], + summary="dangerous", + ) + with patch("tools.skill_manager_tool._guard_agent_created_enabled", return_value=True), \ + patch("tools.skill_manager_tool.scan_skill", return_value=fake_result): + result = _security_scan_skill(tmp_path) + + assert result is not None + assert "Security scan blocked" in result + + def test_guard_flag_reads_config_default_false(self): + """_guard_agent_created_enabled returns False when config doesn't set it.""" + from tools.skill_manager_tool import _guard_agent_created_enabled + + with patch("hermes_cli.config.load_config", return_value={"skills": {}}): + assert _guard_agent_created_enabled() is False + + def test_guard_flag_reads_config_when_set(self): + """_guard_agent_created_enabled returns True when user explicitly enables.""" + from tools.skill_manager_tool import _guard_agent_created_enabled + + with patch("hermes_cli.config.load_config", + return_value={"skills": {"guard_agent_created": True}}): + assert _guard_agent_created_enabled() is True + + def test_guard_flag_handles_config_error(self): + """If load_config raises, _guard_agent_created_enabled defaults to False (fail-safe off).""" + from tools.skill_manager_tool import _guard_agent_created_enabled + + with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")): + assert _guard_agent_created_enabled() is False diff --git a/tests/tools/test_skills_guard.py b/tests/tools/test_skills_guard.py index 12c527ca78..ccc55da205 100644 --- a/tests/tools/test_skills_guard.py +++ b/tests/tools/test_skills_guard.py @@ -174,27 +174,24 @@ class TestShouldAllowInstall: assert allowed is True assert "agent-created" in reason - def test_dangerous_agent_created_allowed(self): - """Agent-created skills bypass verdict gating — agent can already - execute the same code via terminal(), so skill_manage allows all - verdicts. This prevents friction when the agent writes skills that - mention risky keywords in prose (e.g. describing cache-busting or - persistence semantics in a PR-review skill).""" + def test_dangerous_agent_created_asks(self): + """Agent-created skills with dangerous verdict return None (ask for confirmation) + when the scan runs. The caller (_security_scan_skill) surfaces this as an error + to the agent, who can retry without the flagged content. + + This gate only runs when skills.guard_agent_created is enabled (off by default).""" f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")] allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f)) - assert allowed is True - assert "agent-created" in reason + assert allowed is None + assert "Requires confirmation" in reason - def test_force_noop_for_agent_created_dangerous(self): - """With agent-created dangerous mapped to 'allow', force becomes a - no-op — the allow branch returns first. Force still works for any - trust level that maps to block (community/trusted).""" + def test_force_overrides_dangerous_for_agent_created(self): f = [Finding("x", "critical", "c", "f", 1, "m", "d")] allowed, reason = should_allow_install( self._result("agent-created", "dangerous", f), force=True ) assert allowed is True - assert "agent-created" in reason + assert "Force-installed" in reason # --------------------------------------------------------------------------- diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 493b434c51..c28f421a7f 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -44,8 +44,8 @@ from typing import Dict, Any, Optional, Tuple logger = logging.getLogger(__name__) -# Import security scanner — agent-created skills get the same scrutiny as -# community hub installs. +# Import security scanner — external hub installs always get scanned; +# agent-created skills only get scanned when skills.guard_agent_created is on. try: from tools.skills_guard import scan_skill, should_allow_install, format_scan_report _GUARD_AVAILABLE = True @@ -53,10 +53,31 @@ except ImportError: _GUARD_AVAILABLE = False +def _guard_agent_created_enabled() -> bool: + """Read skills.guard_agent_created from config (default False). + + Off by default because the agent can already execute the same code + paths via terminal() with no gate, so the scan adds friction without + meaningful security. Users who want belt-and-suspenders can turn it + on via `hermes config set skills.guard_agent_created true`. + """ + try: + from hermes_cli.config import load_config + cfg = load_config() + return bool(cfg.get("skills", {}).get("guard_agent_created", False)) + except Exception: + return False + + def _security_scan_skill(skill_dir: Path) -> Optional[str]: - """Scan a skill directory after write. Returns error string if blocked, else None.""" + """Scan a skill directory after write. Returns error string if blocked, else None. + + No-op when skills.guard_agent_created is disabled (the default). + """ if not _GUARD_AVAILABLE: return None + if not _guard_agent_created_enabled(): + return None try: result = scan_skill(skill_dir, source="agent-created") allowed, reason = should_allow_install(result) @@ -65,7 +86,8 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]: return f"Security scan blocked this skill ({reason}):\n{report}" if allowed is None: # "ask" verdict — for agent-created skills this means dangerous - # findings were detected. Block the skill and include the report. + # findings were detected. Surface as an error so the agent can + # retry with the flagged content removed. report = format_scan_report(result) logger.warning("Agent-created skill blocked (dangerous findings): %s", reason) return f"Security scan blocked this skill ({reason}):\n{report}" diff --git a/tools/skills_guard.py b/tools/skills_guard.py index fadbb8173a..ffb965b521 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -43,11 +43,11 @@ INSTALL_POLICY = { "builtin": ("allow", "allow", "allow"), "trusted": ("allow", "allow", "block"), "community": ("allow", "block", "block"), - # Agent-created skills run in the same process as the agent that - # wrote them — the agent could already execute the same code via - # terminal(), so a dangerous-pattern gate on skill_manage adds - # friction without meaningful security. Allow all verdicts. - "agent-created": ("allow", "allow", "allow"), + # Agent-created: "ask" on dangerous surfaces as an error to the agent, + # which can retry without the flagged content. This gate only runs when + # skills.guard_agent_created is enabled (off by default) — see + # tools/skill_manager_tool.py::_guard_agent_created_enabled. + "agent-created": ("allow", "allow", "ask"), } VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2} From bc9518f660c75244b45d47f0a7a87f6cd067be62 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 11:44:27 -0500 Subject: [PATCH 002/264] fix(ui-tui): force full xterm.js alt-screen repaints - force full alt-screen damage in xterm.js hosts to avoid stale glyph artifacts - skip incremental scroll optimization there and repaint from a cleared screen atomically --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 1543dc7fce..5f50be1579 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -70,7 +70,7 @@ import { startSelection, updateSelection } from './selection.js' -import { supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' +import { isXtermJs, supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' import { CURSOR_HOME, cursorMove, @@ -728,12 +728,17 @@ export default class Ink { } } + // xterm.js occasionally leaves stale glyphs behind during incremental + // alt-screen updates. Force full repaint there; native terminals keep + // the cheaper diff path unless layout/overlay state says otherwise. + const forceFullAltScreenRepaint = this.altScreenActive && isXtermJs() + // Full-damage backstop: applies on BOTH alt-screen and main-screen. // Layout shifts (spinner appears, status line resizes) can leave stale // cells at sibling boundaries that per-node damage tracking misses. // Selection/highlight overlays write via setCellStyleId which doesn't // track damage. prevFrameContaminated covers the cleanup frame. - if (didLayoutShift() || selActive || hlActive || this.prevFrameContaminated) { + if (didLayoutShift() || selActive || hlActive || this.prevFrameContaminated || forceFullAltScreenRepaint) { frame.screen.damage = { x: 0, y: 0, @@ -771,7 +776,7 @@ export default class Ink { // renders the scrolled-but-not-yet-repainted intermediate state. // tmux is the main case (re-emits DECSTBM with its own timing and // doesn't implement DEC 2026, so SYNC_OUTPUT_SUPPORTED is false). - SYNC_OUTPUT_SUPPORTED + SYNC_OUTPUT_SUPPORTED && !forceFullAltScreenRepaint ) const diffMs = performance.now() - tDiff @@ -824,7 +829,9 @@ export default class Ink { // erase+paint lands, then swaps in one go. Writing ERASE_SCREEN // synchronously in handleResize would blank the screen for the ~80ms // render() takes. - if (this.needsEraseBeforePaint) { + const eraseBeforePaint = this.needsEraseBeforePaint || forceFullAltScreenRepaint + + if (eraseBeforePaint) { this.needsEraseBeforePaint = false optimized.unshift(ERASE_THEN_HOME_PATCH) } else { From 071bdb5a3f099be5a7c824906315d483ee5b003d Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 11:55:09 -0500 Subject: [PATCH 003/264] Revert "fix(ui-tui): force full xterm.js alt-screen repaints" This reverts commit bc9518f660c75244b45d47f0a7a87f6cd067be62. --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 5f50be1579..1543dc7fce 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -70,7 +70,7 @@ import { startSelection, updateSelection } from './selection.js' -import { isXtermJs, supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' +import { supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' import { CURSOR_HOME, cursorMove, @@ -728,17 +728,12 @@ export default class Ink { } } - // xterm.js occasionally leaves stale glyphs behind during incremental - // alt-screen updates. Force full repaint there; native terminals keep - // the cheaper diff path unless layout/overlay state says otherwise. - const forceFullAltScreenRepaint = this.altScreenActive && isXtermJs() - // Full-damage backstop: applies on BOTH alt-screen and main-screen. // Layout shifts (spinner appears, status line resizes) can leave stale // cells at sibling boundaries that per-node damage tracking misses. // Selection/highlight overlays write via setCellStyleId which doesn't // track damage. prevFrameContaminated covers the cleanup frame. - if (didLayoutShift() || selActive || hlActive || this.prevFrameContaminated || forceFullAltScreenRepaint) { + if (didLayoutShift() || selActive || hlActive || this.prevFrameContaminated) { frame.screen.damage = { x: 0, y: 0, @@ -776,7 +771,7 @@ export default class Ink { // renders the scrolled-but-not-yet-repainted intermediate state. // tmux is the main case (re-emits DECSTBM with its own timing and // doesn't implement DEC 2026, so SYNC_OUTPUT_SUPPORTED is false). - SYNC_OUTPUT_SUPPORTED && !forceFullAltScreenRepaint + SYNC_OUTPUT_SUPPORTED ) const diffMs = performance.now() - tDiff @@ -829,9 +824,7 @@ export default class Ink { // erase+paint lands, then swaps in one go. Writing ERASE_SCREEN // synchronously in handleResize would blank the screen for the ~80ms // render() takes. - const eraseBeforePaint = this.needsEraseBeforePaint || forceFullAltScreenRepaint - - if (eraseBeforePaint) { + if (this.needsEraseBeforePaint) { this.needsEraseBeforePaint = false optimized.unshift(ERASE_THEN_HOME_PATCH) } else { From 82a0ed1afb3fb3840a0bdca94a22fa8b005ac49a Mon Sep 17 00:00:00 2001 From: kshitij <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 23 Apr 2026 10:06:25 -0700 Subject: [PATCH 004/264] feat: add Xiaomi MiMo v2.5-pro and v2.5 model support (#14635) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Merged Adds MiMo v2.5-pro and v2.5 support to Xiaomi native provider, OpenCode Go, and setup wizard. ### Changes - Context lengths: added v2.5-pro (1M) and v2.5 (1M), corrected existing MiMo entries to exact values (262144) - Provider lists: xiaomi, opencode-go, setup wizard - Vision: upgraded from mimo-v2-omni to mimo-v2.5 (omnimodal) - Config description updated for XIAOMI_API_KEY - Tests updated for new vision model preference ### Verification - 4322 tests passed, 0 new regressions - Live API tested on Xiaomi portal: basic, reasoning, tool calling, multi-tool, file ops, system prompt, vision — all pass - Self-review found and fixed 2 issues (redundant vision check, stale HuggingFace context length) --- agent/auxiliary_client.py | 2 +- agent/model_metadata.py | 12 ++++++------ hermes_cli/config.py | 2 +- hermes_cli/models.py | 6 +++++- hermes_cli/setup.py | 2 +- tests/agent/test_auxiliary_main_first.py | 8 ++++---- tests/hermes_cli/test_xiaomi_provider.py | 4 ++-- 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 1563b866c9..5735648f13 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -151,7 +151,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { # differs from their main chat model, map it here. The vision auto-detect # "exotic provider" branch checks this before falling back to the main model. _PROVIDER_VISION_MODELS: Dict[str, str] = { - "xiaomi": "mimo-v2-omni", + "xiaomi": "mimo-v2.5", "zai": "glm-5v-turbo", } diff --git a/agent/model_metadata.py b/agent/model_metadata.py index e3c07684c9..2916a7fa7d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -183,12 +183,12 @@ DEFAULT_CONTEXT_LENGTHS = { "moonshotai/Kimi-K2.6": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, - "XiaomiMiMo/MiMo-V2-Flash": 256000, - "mimo-v2-pro": 1000000, - "mimo-v2-omni": 256000, - "mimo-v2-flash": 256000, - "mimo-v2.5-pro": 1000000, - "mimo-v2.5": 1000000, + "XiaomiMiMo/MiMo-V2-Flash": 262144, + "mimo-v2-pro": 1048576, + "mimo-v2.5-pro": 1048576, + "mimo-v2.5": 1048576, + "mimo-v2-omni": 262144, + "mimo-v2-flash": 262144, "zai-org/GLM-5": 202752, } diff --git a/hermes_cli/config.py b/hermes_cli/config.py index fd19adae90..c78b01b150 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1291,7 +1291,7 @@ OPTIONAL_ENV_VARS = { "advanced": True, }, "XIAOMI_API_KEY": { - "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", + "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)", "prompt": "Xiaomi MiMo API Key", "url": "https://platform.xiaomimimo.com", "password": True, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index bc7f402587..43f4905db3 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -250,6 +250,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "deepseek-reasoner", ], "xiaomi": [ + "mimo-v2.5-pro", + "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "mimo-v2-flash", @@ -301,6 +303,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2.5", "glm-5.1", "glm-5", + "mimo-v2.5-pro", + "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", @@ -692,7 +696,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), - ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"), + ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"), ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 3629616895..ebc7de9407 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = { "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"], + "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", diff --git a/tests/agent/test_auxiliary_main_first.py b/tests/agent/test_auxiliary_main_first.py index d756d6ffb1..ab065bde01 100644 --- a/tests/agent/test_auxiliary_main_first.py +++ b/tests/agent/test_auxiliary_main_first.py @@ -245,7 +245,7 @@ class TestResolveVisionMainFirst: assert model == "xiaomi/mimo-v2-omni" def test_exotic_provider_with_vision_override_preserved(self): - """xiaomi → mimo-v2-omni override still wins over main_model.""" + """xiaomi → mimo-v2.5 override still wins over main_model.""" with patch( "agent.auxiliary_client._read_main_provider", return_value="xiaomi", ), patch( @@ -257,15 +257,15 @@ class TestResolveVisionMainFirst: "agent.auxiliary_client._resolve_task_provider_model", return_value=("auto", None, None, None, None), ): - mock_resolve.return_value = (MagicMock(), "mimo-v2-omni") + mock_resolve.return_value = (MagicMock(), "mimo-v2.5") from agent.auxiliary_client import resolve_vision_provider_client provider, client, model = resolve_vision_provider_client() assert provider == "xiaomi" - # Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main) - assert mock_resolve.call_args.args[1] == "mimo-v2-omni" + # Should use mimo-v2.5 (vision override), not mimo-v2-pro (text main) + assert mock_resolve.call_args.args[1] == "mimo-v2.5" def test_main_unavailable_vision_falls_through_to_aggregators(self): """Main provider fails → fall back to OpenRouter/Nous strict backends.""" diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py index f26740483c..7205cf5a26 100644 --- a/tests/hermes_cli/test_xiaomi_provider.py +++ b/tests/hermes_cli/test_xiaomi_provider.py @@ -287,10 +287,10 @@ class TestXiaomiAuxiliary: assert "xiaomi" not in _API_KEY_PROVIDER_AUX_MODELS def test_vision_model_override(self): - """Xiaomi vision tasks should use mimo-v2-omni (multimodal), not the main model.""" + """Xiaomi vision tasks should use mimo-v2.5 (multimodal), not the main model.""" from agent.auxiliary_client import _PROVIDER_VISION_MODELS assert "xiaomi" in _PROVIDER_VISION_MODELS - assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2-omni" + assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2.5" # ============================================================================= From 2e7546006697c87ded650573dbbad52d505b63f4 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 12:08:23 -0500 Subject: [PATCH 005/264] test(ui-tui): add log-update diff contract tests - steady-state diff skips unchanged rows - width change emits clearTerminal before repaint - drift repro: prev.screen desync from terminal leaves orphaned cells no code path can reach --- .../hermes-ink/src/ink/log-update.test.ts | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 ui-tui/packages/hermes-ink/src/ink/log-update.test.ts diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts new file mode 100644 index 0000000000..2802faf24f --- /dev/null +++ b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from 'vitest' + +import type { Frame } from './frame.js' +import { LogUpdate } from './log-update.js' +import { + CellWidth, + CharPool, + createScreen, + HyperlinkPool, + type Screen, + setCellAt, + StylePool +} from './screen.js' + +/** + * Contract tests for LogUpdate.render() — the diff-to-ANSI path that owns + * whether the terminal picks up each React commit correctly. + * + * These tests pin down a few load-bearing invariants so that any fix for + * the "scattered letters after rapid resize" artifact in xterm.js hosts + * can be grounded against them. + */ + +const stylePool = new StylePool() +const charPool = new CharPool() +const hyperlinkPool = new HyperlinkPool() + +const mkScreen = (w: number, h: number) => createScreen(w, h, stylePool, charPool, hyperlinkPool) + +const paint = (screen: Screen, y: number, text: string) => { + for (let x = 0; x < text.length; x++) { + setCellAt(screen, x, y, { + char: text[x]!, + styleId: stylePool.none, + width: CellWidth.Narrow, + hyperlink: undefined + }) + } +} + +const mkFrame = (screen: Screen, viewportW: number, viewportH: number): Frame => ({ + screen, + viewport: { width: viewportW, height: viewportH }, + cursor: { x: 0, y: 0, visible: true } +}) + +const stdoutOnly = (diff: ReturnType) => + diff + .filter(p => p.type === 'stdout') + .map(p => (p as { type: 'stdout'; content: string }).content) + .join('') + +describe('LogUpdate.render diff contract', () => { + it('emits only changed cells when most rows match', () => { + const w = 20 + const h = 4 + const prev = mkScreen(w, h) + paint(prev, 0, 'HELLO') + paint(prev, 1, 'WORLD') + paint(prev, 2, 'STAYSHERE') + + const next = mkScreen(w, h) + paint(next, 0, 'HELLO') + paint(next, 1, 'CHANGE') + paint(next, 2, 'STAYSHERE') + next.damage = { x: 0, y: 0, width: w, height: h } + + const log = new LogUpdate({ isTTY: true, stylePool }) + const diff = log.render(mkFrame(prev, w, h), mkFrame(next, w, h), true, false) + + const written = stdoutOnly(diff) + expect(written).toContain('CHANGE') + expect(written).not.toContain('HELLO') + expect(written).not.toContain('STAYSHERE') + }) + + it('width change emits a clearTerminal patch before repainting', () => { + const prevW = 20 + const nextW = 15 + const h = 3 + + const prev = mkScreen(prevW, h) + paint(prev, 0, 'thiswaswiderrow') + + const next = mkScreen(nextW, h) + paint(next, 0, 'shorterrownow') + next.damage = { x: 0, y: 0, width: nextW, height: h } + + const log = new LogUpdate({ isTTY: true, stylePool }) + const diff = log.render(mkFrame(prev, prevW, h), mkFrame(next, nextW, h), true, false) + + expect(diff.some(p => p.type === 'clearTerminal')).toBe(true) + expect(stdoutOnly(diff)).toContain('shorterrownow') + }) + + it('drift repro: if terminal has content that prev.screen does not know about, diff leaves it orphaned', () => { + // Simulates prev/terminal desync: the physical terminal has STALE + // content at row 2 from a prior frame that was never reconciled into + // prev.screen. next.screen is blank at row 2. Diff finds prev==next + // (both blank at row 2), emits nothing → the stale content survives + // on the terminal as an artifact. + // + // This is the load-bearing theory for the rapid-resize scattered-letter + // bug: whenever the ink renderer believes prev.screen is authoritative + // but the physical terminal was mutated out-of-band (resize-induced + // reflow writing past the prev-frame's tracked cells), those cells + // drift and artifacts appear at that row on subsequent frames. + const w = 20 + const h = 3 + const prevAsInk = mkScreen(w, h) + paint(prevAsInk, 0, 'same') + // row 2 in prevAsInk is blank — but pretend the terminal has stale + // characters there. ink has no way to know. + const terminalReally = mkScreen(w, h) + paint(terminalReally, 0, 'same') + paint(terminalReally, 2, 'orphaned') + + const next = mkScreen(w, h) + paint(next, 0, 'same') + next.damage = { x: 0, y: 0, width: w, height: h } + + const log = new LogUpdate({ isTTY: true, stylePool }) + const diff = log.render(mkFrame(prevAsInk, w, h), mkFrame(next, w, h), true, false) + + const written = stdoutOnly(diff) + expect(written).not.toContain('orphaned') + expect(diff.some(p => p.type === 'clearTerminal')).toBe(false) + // Verdict: in this configuration the renderer cannot heal the drift. + // The only recovery path from ink's side is fullResetSequence — which + // triggers only on viewport resize or scrollback-change detection, + // neither of which fires on a pure drift. A fix has to either (a) + // defensively emit a full repaint on every xterm.js frame where + // prevFrameContaminated is set, or (b) close the drift window at the + // renderer level so the in-memory prev.screen cannot diverge. + }) +}) From f7e86577bc258985ddf9cc328f7e7343585ff382 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 12:21:09 -0500 Subject: [PATCH 006/264] fix(ui-tui): heal xterm.js resize-burst render drift --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 1543dc7fce..4e0c54ec9e 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -70,7 +70,7 @@ import { startSelection, updateSelection } from './selection.js' -import { supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' +import { isXtermJs, supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' import { CURSOR_HOME, cursorMove, @@ -245,6 +245,7 @@ export default class Ink { // microtask. Dims are captured sync in handleResize; only the // expensive tree rebuild defers. private pendingResizeRender = false + private resizeSettleTimer: ReturnType | null = null // Fold synchronous re-entry (selection fanout, onFrame callback) // into one follow-up microtask instead of stacking renders. @@ -439,6 +440,11 @@ export default class Ink { this.drainTimer = null } + if (this.resizeSettleTimer !== null) { + clearTimeout(this.resizeSettleTimer) + this.resizeSettleTimer = null + } + // Alt screen: reset frame buffers so the next render repaints from // scratch (prevFrameContaminated → every cell written, wrapped in // BSU/ESU — old content stays visible until the new frame swaps @@ -456,6 +462,20 @@ export default class Ink { this.resetFramesForAltScreen() this.needsEraseBeforePaint = true + + if (isXtermJs()) { + this.resizeSettleTimer = setTimeout(() => { + this.resizeSettleTimer = null + + if (this.isUnmounted || this.isPaused || !this.altScreenActive || !this.options.stdout.isTTY) { + return + } + + this.resetFramesForAltScreen() + this.needsEraseBeforePaint = true + this.scheduleRender() + }, 160) + } } // Already queued: later events in this burst updated dims/alt-screen From 3e01de0b092c7b14842c5165d09146b45c140066 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 12:40:39 -0500 Subject: [PATCH 007/264] fix(ui-tui): preserve composer after resize-burst healing - run the xterm.js settle-heal pass through a full render commit instead of diff-only scheduleRender - guard against overlapping resize renders and clear settle timers on unmount --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 28 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 4e0c54ec9e..036954a770 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -467,13 +467,29 @@ export default class Ink { this.resizeSettleTimer = setTimeout(() => { this.resizeSettleTimer = null - if (this.isUnmounted || this.isPaused || !this.altScreenActive || !this.options.stdout.isTTY) { + if ( + this.isUnmounted || + this.isPaused || + !this.altScreenActive || + !this.options.stdout.isTTY || + this.currentNode === null || + this.pendingResizeRender + ) { return } - this.resetFramesForAltScreen() - this.needsEraseBeforePaint = true - this.scheduleRender() + this.pendingResizeRender = true + queueMicrotask(() => { + this.pendingResizeRender = false + + if (this.isUnmounted || this.isPaused || !this.altScreenActive || !this.options.stdout.isTTY || this.currentNode === null) { + return + } + + this.resetFramesForAltScreen() + this.needsEraseBeforePaint = true + this.render(this.currentNode) + }) }, 160) } } @@ -1954,6 +1970,10 @@ export default class Ink { clearTimeout(this.drainTimer) this.drainTimer = null } + if (this.resizeSettleTimer !== null) { + clearTimeout(this.resizeSettleTimer) + this.resizeSettleTimer = null + } reconciler.updateContainerSync(null, this.container, null, noop) reconciler.flushSyncWork() From 60d1edc38a0e1773193a4c7738781ffa1b724bbc Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 12:44:56 -0500 Subject: [PATCH 008/264] fix(ui-tui): keep bottom statusbar in composer layout Render the bottom status bar inside the composer pane so aggressive resize + streaming churn cannot cull the input row via sibling overlap. --- ui-tui/src/components/appLayout.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx index cdac992d30..164ef5dd4a 100644 --- a/ui-tui/src/components/appLayout.tsx +++ b/ui-tui/src/components/appLayout.tsx @@ -237,6 +237,8 @@ const ComposerPane = memo(function ComposerPane({ )} {!composer.empty && !ui.sid && ⚕ {ui.status}} + + ) }) @@ -320,8 +322,6 @@ export const AppLayout = memo(function AppLayout({ /> - - )} From e91be4d7dcc26d1155520bc17cb3f61616ad87e1 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:18:33 +0530 Subject: [PATCH 009/264] fix: resolve_alias prefers highest version + merges static catalog Three bugs fixed in model alias resolution: 1. resolve_alias() returned the FIRST catalog match with no version preference. '/model mimo' picked mimo-v2-omni (index 0 in dict) instead of mimo-v2.5-pro. Now collects all prefix matches, sorts by version descending with pro/max ranked above bare names, and returns the highest. 2. models.dev registry missing newly added models (e.g. v2.5 for native xiaomi). resolve_alias() now merges static _PROVIDER_MODELS entries into the catalog so models resolve immediately without waiting for models.dev to sync. 3. hermes model picker showed only models.dev results (3 xiaomi models), hiding curated entries (5 total). The picker now merges curated models into the models.dev list so all models appear. Also fixes a trailing-dot float parsing edge case in _model_sort_key where '5.4.' failed float() and multi-dot versions like '5.4.1' weren't parsed correctly. --- hermes_cli/main.py | 13 ++- hermes_cli/model_switch.py | 161 ++++++++++++++++++++++++++++++++----- 2 files changed, 154 insertions(+), 20 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index ec0441f8b2..cb70261b46 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -3984,7 +3984,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""): pass if mdev_models: - model_list = mdev_models + # Merge models.dev with curated list so newly added models + # (not yet in models.dev) still appear in the picker. + if curated: + seen = {m.lower() for m in mdev_models} + merged = list(mdev_models) + for m in curated: + if m.lower() not in seen: + merged.append(m) + seen.add(m.lower()) + model_list = merged + else: + model_list = mdev_models print(f" Found {len(model_list)} model(s) from models.dev registry") elif curated and len(curated) >= 8: # Curated list is substantial — use it directly, skip live probe diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index a7c98d9c88..41fbe36deb 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]: # Alias resolution # --------------------------------------------------------------------------- +def _model_sort_key(model_id: str, prefix: str) -> tuple: + """Sort key for model version preference. + + Extracts version numbers after the family prefix and returns a sort key + that prefers higher versions. Suffix tokens (``pro``, ``omni``, etc.) + are used as tiebreakers, with common quality indicators ranked. + + Examples (with prefix ``"mimo"``):: + + mimo-v2.5-pro → (-2.5, 0, 'pro') # highest version wins + mimo-v2.5 → (-2.5, 1, '') # no suffix = lower than pro + mimo-v2-pro → (-2.0, 0, 'pro') + mimo-v2-omni → (-2.0, 1, 'omni') + mimo-v2-flash → (-2.0, 1, 'flash') + """ + # Strip the prefix (and optional "/" separator for aggregator slugs) + rest = model_id[len(prefix):] + if rest.startswith("/"): + rest = rest[1:] + rest = rest.lstrip("-").strip() + + # Parse version and suffix from the remainder. + # "v2.5-pro" → version [2.5], suffix "pro" + # "-omni" → version [], suffix "omni" + # State machine: start → in_version → between → in_suffix + nums: list[float] = [] + suffix_buf = "" + state = "start" + num_buf = "" + + for ch in rest: + if state == "start": + if ch in "vV": + state = "in_version" + elif ch.isdigit(): + state = "in_version" + num_buf += ch + elif ch in "-_.": + pass # skip separators before any content + else: + state = "in_suffix" + suffix_buf += ch + elif state == "in_version": + if ch.isdigit(): + num_buf += ch + elif ch == ".": + if "." in num_buf: + # Second dot — flush current number, start new component + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + else: + num_buf += ch + elif ch in "-_.": + if num_buf: + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + state = "between" + else: + if num_buf: + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + num_buf = "" + state = "in_suffix" + suffix_buf += ch + elif state == "between": + if ch.isdigit(): + state = "in_version" + num_buf = ch + elif ch in "vV": + state = "in_version" + elif ch in "-_.": + pass + else: + state = "in_suffix" + suffix_buf += ch + elif state == "in_suffix": + suffix_buf += ch + + # Flush remaining buffer (strip trailing dots — "5.4." → "5.4") + if num_buf and state == "in_version": + try: + nums.append(float(num_buf.rstrip("."))) + except ValueError: + pass + + suffix = suffix_buf.lower().strip("-_.") + suffix = suffix.strip() + + # Negate versions so higher → sorts first + version_key = tuple(-n for n in nums) + + # Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite + # Lower number = preferred + _SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0} + suffix_rank = _SUFFIX_RANK.get(suffix, 1) + + return version_key + (suffix_rank, suffix) + + def resolve_alias( raw_input: str, current_provider: str, @@ -311,9 +418,9 @@ def resolve_alias( """Resolve a short alias against the current provider's catalog. Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the - current provider's models.dev catalog for the first model whose ID - starts with ``vendor/family`` (or just ``family`` for non-aggregator - providers). + current provider's models.dev catalog for the model whose ID starts + with ``vendor/family`` (or just ``family`` for non-aggregator + providers) and has the **highest version**. Returns: ``(provider, resolved_model_id, alias_name)`` if a match is @@ -341,28 +448,44 @@ def resolve_alias( vendor, family = identity - # Search the provider's catalog from models.dev + # Build catalog from models.dev, then merge in static _PROVIDER_MODELS + # entries that models.dev may be missing (e.g. newly added models not + # yet synced to the registry). catalog = list_provider_models(current_provider) - if not catalog: - return None + try: + from hermes_cli.models import _PROVIDER_MODELS + static = _PROVIDER_MODELS.get(current_provider, []) + if static: + seen = {m.lower() for m in catalog} + for m in static: + if m.lower() not in seen: + catalog.append(m) + except Exception: + pass # For aggregators, models are vendor/model-name format aggregator = is_aggregator(current_provider) - for model_id in catalog: - mid_lower = model_id.lower() - if aggregator: - # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet" - prefix = f"{vendor}/{family}".lower() - if mid_lower.startswith(prefix): - return (current_provider, model_id, key) - else: - # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6" - family_lower = family.lower() - if mid_lower.startswith(family_lower): - return (current_provider, model_id, key) + if aggregator: + prefix = f"{vendor}/{family}".lower() + matches = [ + mid for mid in catalog + if mid.lower().startswith(prefix) + ] + else: + family_lower = family.lower() + matches = [ + mid for mid in catalog + if mid.lower().startswith(family_lower) + ] - return None + if not matches: + return None + + # Sort by version descending — prefer the latest/highest version + prefix_for_sort = f"{vendor}/{family}" if aggregator else family + matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort)) + return (current_provider, matches[0], key) def get_authenticated_provider_slugs( From 7c4dd7d660f3ea3872c7a9fea873ecb388738e5e Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 12:49:49 -0500 Subject: [PATCH 010/264] refactor(ui-tui): collapse xterm.js resize settle dance Replace 28-line guard + nested queueMicrotask + pendingResizeRender flag-reuse with a named canAltScreenRepaint predicate and a single flat paint. setTimeout already drained the burst coalescer; the nested defer and flag dance were paranoia. --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 40 +++++++++++----------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 036954a770..5a32ceeffd 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -463,33 +463,21 @@ export default class Ink { this.resetFramesForAltScreen() this.needsEraseBeforePaint = true + // xterm.js burst-drift healer: 160ms after the last resize, force one + // full reconcile so Yoga/React catch up to the final viewport. No flag + // dance — setTimeout already drained the burst coalescer; a concurrent + // render would be idempotent. if (isXtermJs()) { this.resizeSettleTimer = setTimeout(() => { this.resizeSettleTimer = null - if ( - this.isUnmounted || - this.isPaused || - !this.altScreenActive || - !this.options.stdout.isTTY || - this.currentNode === null || - this.pendingResizeRender - ) { + if (!this.canAltScreenRepaint()) { return } - this.pendingResizeRender = true - queueMicrotask(() => { - this.pendingResizeRender = false - - if (this.isUnmounted || this.isPaused || !this.altScreenActive || !this.options.stdout.isTTY || this.currentNode === null) { - return - } - - this.resetFramesForAltScreen() - this.needsEraseBeforePaint = true - this.render(this.currentNode) - }) + this.resetFramesForAltScreen() + this.needsEraseBeforePaint = true + this.render(this.currentNode!) }, 160) } } @@ -513,6 +501,17 @@ export default class Ink { this.render(this.currentNode) }) } + + private canAltScreenRepaint(): boolean { + return ( + !this.isUnmounted && + !this.isPaused && + this.altScreenActive && + !!this.options.stdout.isTTY && + this.currentNode !== null + ) + } + resolveExitPromise: () => void = () => {} rejectExitPromise: (reason?: Error) => void = () => {} unsubscribeExit: () => void = () => {} @@ -1970,6 +1969,7 @@ export default class Ink { clearTimeout(this.drainTimer) this.drainTimer = null } + if (this.resizeSettleTimer !== null) { clearTimeout(this.resizeSettleTimer) this.resizeSettleTimer = null From f28f07e98eda5533abbaebbe9b0640f465bb581a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 13:03:06 -0500 Subject: [PATCH 011/264] test(ui-tui): drop dead terminalReally from drift repro Copilot flagged the variable as unused. LogUpdate.render only sees prev/next, so a simulated "physical terminal" has no hook in the public API. Kept the narrative in the comment and tightened the assertion to demonstrate the test's actual invariant: identical prev/next emits no heal patches. --- .../hermes-ink/src/ink/log-update.test.ts | 43 +++++++------------ 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts index 2802faf24f..55a1362f77 100644 --- a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts @@ -93,44 +93,31 @@ describe('LogUpdate.render diff contract', () => { expect(stdoutOnly(diff)).toContain('shorterrownow') }) - it('drift repro: if terminal has content that prev.screen does not know about, diff leaves it orphaned', () => { - // Simulates prev/terminal desync: the physical terminal has STALE - // content at row 2 from a prior frame that was never reconciled into - // prev.screen. next.screen is blank at row 2. Diff finds prev==next - // (both blank at row 2), emits nothing → the stale content survives - // on the terminal as an artifact. + it('drift repro: identical prev/next emits no heal, even when the physical terminal is stale', () => { + // Load-bearing theory for the rapid-resize scattered-letter bug: if the + // physical terminal has stale cells that prev.screen doesn't know about + // (e.g. resize-induced reflow wrote past ink's tracked range), the + // renderer has no signal to heal them. LogUpdate.render only sees + // prev/next — no view of the physical terminal — so when prev==next, + // it emits nothing and any orphaned glyphs survive. // - // This is the load-bearing theory for the rapid-resize scattered-letter - // bug: whenever the ink renderer believes prev.screen is authoritative - // but the physical terminal was mutated out-of-band (resize-induced - // reflow writing past the prev-frame's tracked cells), those cells - // drift and artifacts appear at that row on subsequent frames. + // The fix path is upstream of this diff: either (a) defensively + // full-repaint on xterm.js frames where prevFrameContaminated is set, + // or (b) close the drift window so prev.screen cannot diverge. const w = 20 const h = 3 - const prevAsInk = mkScreen(w, h) - paint(prevAsInk, 0, 'same') - // row 2 in prevAsInk is blank — but pretend the terminal has stale - // characters there. ink has no way to know. - const terminalReally = mkScreen(w, h) - paint(terminalReally, 0, 'same') - paint(terminalReally, 2, 'orphaned') + + const prev = mkScreen(w, h) + paint(prev, 0, 'same') const next = mkScreen(w, h) paint(next, 0, 'same') next.damage = { x: 0, y: 0, width: w, height: h } const log = new LogUpdate({ isTTY: true, stylePool }) - const diff = log.render(mkFrame(prevAsInk, w, h), mkFrame(next, w, h), true, false) + const diff = log.render(mkFrame(prev, w, h), mkFrame(next, w, h), true, false) - const written = stdoutOnly(diff) - expect(written).not.toContain('orphaned') + expect(stdoutOnly(diff)).toBe('') expect(diff.some(p => p.type === 'clearTerminal')).toBe(false) - // Verdict: in this configuration the renderer cannot heal the drift. - // The only recovery path from ink's side is fullResetSequence — which - // triggers only on viewport resize or scrollback-change detection, - // neither of which fires on a pure drift. A fix has to either (a) - // defensively emit a full repaint on every xterm.js frame where - // prevFrameContaminated is set, or (b) close the drift window at the - // renderer level so the in-memory prev.screen cannot diverge. }) }) From 1e445b2547c5f83a4632358f44ba4e51497eb050 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 13:10:52 -0500 Subject: [PATCH 012/264] fix(ui-tui): heal post-resize alt-screen drift Broaden the settle repaint from xterm.js-only to all alt-screen terminals. Ink upstream and ConPTY/xterm reports point to resize/reflow desync as a general stale-cell class, not a host-specific quirk. --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 31 +++++++++++----------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 5a32ceeffd..4cfb435c7c 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -70,7 +70,7 @@ import { startSelection, updateSelection } from './selection.js' -import { isXtermJs, supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' +import { supportsExtendedKeys, SYNC_OUTPUT_SUPPORTED, type Terminal, writeDiffToTerminal } from './terminal.js' import { CURSOR_HOME, cursorMove, @@ -463,23 +463,22 @@ export default class Ink { this.resetFramesForAltScreen() this.needsEraseBeforePaint = true - // xterm.js burst-drift healer: 160ms after the last resize, force one - // full reconcile so Yoga/React catch up to the final viewport. No flag - // dance — setTimeout already drained the burst coalescer; a concurrent - // render would be idempotent. - if (isXtermJs()) { - this.resizeSettleTimer = setTimeout(() => { - this.resizeSettleTimer = null + // Post-resize drift healer: 160ms after the last resize, force one full + // reconcile so Yoga/React catch up to the final viewport and any stale + // terminal cells from host-side reflow get repainted away. Ink upstream + // and ConPTY/xterm reports point to this as a general resize/reflow + // desync class, not an xterm.js-only quirk. + this.resizeSettleTimer = setTimeout(() => { + this.resizeSettleTimer = null - if (!this.canAltScreenRepaint()) { - return - } + if (!this.canAltScreenRepaint()) { + return + } - this.resetFramesForAltScreen() - this.needsEraseBeforePaint = true - this.render(this.currentNode!) - }, 160) - } + this.resetFramesForAltScreen() + this.needsEraseBeforePaint = true + this.render(this.currentNode!) + }, 160) } // Already queued: later events in this burst updated dims/alt-screen From f5af6520d0bfac5b17c9ce460a5a06bf3249972c Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:45:07 +0530 Subject: [PATCH 013/264] fix: add extra_content property to ToolCall for Gemini thought_signature (#14488) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 43de1ca8 removed the _nr_to_assistant_message shim in favor of duck-typed properties on the ToolCall dataclass. However, the extra_content property (which carries the Gemini thought_signature) was omitted from the ToolCall definition. This caused _build_assistant_message to silently drop the signature via getattr(tc, 'extra_content', None) returning None, leading to HTTP 400 errors on subsequent turns for all Gemini 3 thinking models. Add the extra_content property to ToolCall (matching the existing call_id and response_item_id pattern) so the thought_signature round-trips correctly through the transport → agent loop → API replay path. Credit to @celttechie for identifying the root cause and providing the fix. Closes #14488 --- agent/transports/types.py | 14 ++++++++++++++ tests/agent/transports/test_types.py | 29 ++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/agent/transports/types.py b/agent/transports/types.py index 5199a5db1d..74481f85cd 100644 --- a/agent/transports/types.py +++ b/agent/transports/types.py @@ -61,6 +61,20 @@ class ToolCall: """Codex response_item_id from provider_data.""" return (self.provider_data or {}).get("response_item_id") + @property + def extra_content(self) -> Optional[Dict[str, Any]]: + """Gemini extra_content (thought_signature) from provider_data. + + Gemini 3 thinking models attach ``extra_content`` with a + ``thought_signature`` to each tool call. This signature must be + replayed on subsequent API calls — without it the API rejects the + request with HTTP 400. The chat_completions transport stores this + in ``provider_data["extra_content"]``; this property exposes it so + ``_build_assistant_message`` can ``getattr(tc, "extra_content")`` + uniformly. + """ + return (self.provider_data or {}).get("extra_content") + @dataclass class Usage: diff --git a/tests/agent/transports/test_types.py b/tests/agent/transports/test_types.py index 8391342496..dd3aadf1e1 100644 --- a/tests/agent/transports/test_types.py +++ b/tests/agent/transports/test_types.py @@ -200,6 +200,35 @@ class TestToolCallBackwardCompat: tc_no_pd = ToolCall(id="1", name="fn", arguments="{}") assert getattr(tc_no_pd, "call_id", None) is None + def test_extra_content_from_provider_data(self): + """Gemini thought_signature stored in provider_data is exposed via property.""" + ec = {"google": {"thought_signature": "SIG_ABC123"}} + tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec}) + assert tc.extra_content == ec + + def test_extra_content_none_when_no_provider_data(self): + tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None) + assert tc.extra_content is None + + def test_extra_content_none_when_key_absent(self): + tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"}) + assert tc.extra_content is None + + def test_extra_content_getattr_pattern(self): + """_build_assistant_message uses getattr(tc, 'extra_content', None). + + This is the exact pattern that was broken before the extra_content + property was added — ToolCall lacked the property so getattr always + returned None, silently dropping the Gemini thought_signature and + causing HTTP 400 on subsequent turns (issue #14488). + """ + ec = {"google": {"thought_signature": "SIG_ABC123"}} + tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec}) + assert getattr(tc, "extra_content", None) == ec + + tc_no_extra = ToolCall(id="1", name="fn", arguments="{}") + assert getattr(tc_no_extra, "extra_content", None) is None + class TestNormalizedResponseBackwardCompat: """Test properties that replaced _nr_to_assistant_message() shim.""" From c8ff70fe03f5c0fb5726392bed9586544b1d8b15 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 13:16:18 -0500 Subject: [PATCH 014/264] perf(ui-tui): freeze offscreen live tail during scroll When the viewport is away from the bottom, keep the last visible progress snapshot instead of rebuilding the streaming/thinking subtree on every turn-store update. This cuts scroll-time churn while preserving live updates near the tail and on turn completion. --- ui-tui/src/app/useMainApp.ts | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 39c4b534c8..fdfdd8d54c 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -22,7 +22,7 @@ import type { Msg, PanelSection, SlashCatalog } from '../types.js' import { createGatewayEventHandler } from './createGatewayEventHandler.js' import { createSlashHandler } from './createSlashHandler.js' -import { type GatewayRpc, type TranscriptRow } from './interfaces.js' +import { type AppLayoutProgressProps, type GatewayRpc, type TranscriptRow } from './interfaces.js' import { $overlayState, patchOverlayState } from './overlayStore.js' import { turnController } from './turnController.js' import { $turnState, patchTurnState } from './turnStore.js' @@ -658,11 +658,36 @@ export function useMainApp(gw: GatewayClient) { [cols, composerActions, composerState, empty, pagerPageSize, submit] ) - const appProgress = useMemo( + const liveTailVisible = (() => { + const s = scrollRef.current + + if (!s) { + return true + } + + const top = Math.max(0, s.getScrollTop() + s.getPendingDelta()) + const vp = Math.max(0, s.getViewportHeight()) + const total = Math.max(vp, s.getScrollHeight()) + + return top + vp >= total - 3 + })() + + const liveProgress = useMemo( () => ({ ...turn, showProgressArea, showStreamingArea: Boolean(turn.streaming) }), [turn, showProgressArea] ) + const frozenProgressRef = useRef(liveProgress) + + // When the live tail is offscreen, freeze its snapshot so scroll work doesn't + // keep rebuilding the streaming/thinking subtree the user can't see. Thaw as + // soon as the viewport comes back near the bottom or the turn finishes. + if (liveTailVisible || !ui.busy) { + frozenProgressRef.current = liveProgress + } + + const appProgress = liveTailVisible || !ui.busy ? liveProgress : frozenProgressRef.current + const cwd = ui.info?.cwd || process.env.HERMES_CWD || process.cwd() const gitBranch = useGitBranch(cwd) From aa47812edfb9cd945822a2a69a260467e8136926 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 14:19:32 -0500 Subject: [PATCH 015/264] fix(ui-tui): clear sticky prompt when follow snaps to bottom Renderer-driven follow-to-bottom was restoring the viewport to the tail without notifying ScrollBox subscribers, so StickyPromptTracker could stay stale-visible. Notify on render-time scroll/sticky changes and treat near-bottom as bottom for prompt hiding. --- .../hermes-ink/src/ink/components/ScrollBox.tsx | 1 + ui-tui/packages/hermes-ink/src/ink/dom.ts | 1 + .../hermes-ink/src/ink/render-node-to-output.ts | 5 +++++ ui-tui/src/components/appChrome.tsx | 10 ++++++++-- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/components/ScrollBox.tsx b/ui-tui/packages/hermes-ink/src/ink/components/ScrollBox.tsx index aac8f2b334..ed4239cef0 100644 --- a/ui-tui/packages/hermes-ink/src/ink/components/ScrollBox.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/components/ScrollBox.tsx @@ -257,6 +257,7 @@ function ScrollBox({ children, ref, stickyScroll, ...style }: PropsWithChildren< if (el) { el.scrollTop ??= 0 + el.notifyScrollChange = notify } }} style={{ diff --git a/ui-tui/packages/hermes-ink/src/ink/dom.ts b/ui-tui/packages/hermes-ink/src/ink/dom.ts index 6c4b198304..735ab0b0c5 100644 --- a/ui-tui/packages/hermes-ink/src/ink/dom.ts +++ b/ui-tui/packages/hermes-ink/src/ink/dom.ts @@ -72,6 +72,7 @@ export type DOMElement = { scrollViewportHeight?: number scrollViewportTop?: number stickyScroll?: boolean + notifyScrollChange?: () => void // Set by ScrollBox.scrollToElement; render-node-to-output reads // el.yogaNode.getComputedTop() (FRESH — same Yoga pass as scrollHeight) // and sets scrollTop = top + offset, then clears this. Unlike an diff --git a/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts b/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts index dd7372a092..12d689c166 100644 --- a/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts +++ b/ui-tui/packages/hermes-ink/src/ink/render-node-to-output.ts @@ -761,6 +761,7 @@ function renderNodeToOutput( // active text selection by the same delta (native terminal behavior: // view keeps scrolling, highlight walks up with the text). const scrollTopBeforeFollow = node.scrollTop ?? 0 + const stickyBeforeFollow = node.stickyScroll const sticky = node.stickyScroll ?? Boolean(node.attributes['stickyScroll']) @@ -863,6 +864,10 @@ function renderNodeToOutput( scrollDrainNode = node } + if ((node.scrollTop ?? 0) !== scrollTopBeforeFollow || node.stickyScroll !== stickyBeforeFollow) { + node.notifyScrollChange?.() + } + scrollTop = clamped if (content && contentYoga) { diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx index d12a4debff..8b1f816ce4 100644 --- a/ui-tui/src/components/appChrome.tsx +++ b/ui-tui/src/components/appChrome.tsx @@ -256,15 +256,21 @@ export function StickyPromptTracker({ messages, offsets, scrollRef, onChange }: } const top = Math.max(0, s.getScrollTop() + s.getPendingDelta()) + const vp = Math.max(0, s.getViewportHeight()) + const total = Math.max(vp, s.getScrollHeight()) + const atBottom = s.isSticky() || top + vp >= total - 2 - return s.isSticky() ? -1 - top : top + return atBottom ? -1 - top : top }, () => NaN ) const s = scrollRef.current const top = Math.max(0, (s?.getScrollTop() ?? 0) + (s?.getPendingDelta() ?? 0)) - const text = stickyPromptFromViewport(messages, offsets, top, s?.isSticky() ?? true) + const vp = Math.max(0, s?.getViewportHeight() ?? 0) + const total = Math.max(vp, s?.getScrollHeight() ?? vp) + const atBottom = (s?.isSticky() ?? true) || top + vp >= total - 2 + const text = stickyPromptFromViewport(messages, offsets, top, atBottom) useEffect(() => onChange(text), [onChange, text]) From 9a885fba31e5ae8a8a24b7f5dcf8ea19dedddf5c Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 14:32:29 -0500 Subject: [PATCH 016/264] fix(ui-tui): hide stale sticky prompt when newer prompt is visible Sticky prompt selection only considered the top edge of the viewport, so it could keep showing an older user prompt even when a newer one was already visible lower down. Suppress sticky output whenever a user message is visible in the viewport and cover it with a regression test. --- ui-tui/src/__tests__/viewport.test.ts | 31 +++++++++++++++++++++++++++ ui-tui/src/components/appChrome.tsx | 2 +- ui-tui/src/domain/viewport.ts | 10 ++++++++- 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 ui-tui/src/__tests__/viewport.test.ts diff --git a/ui-tui/src/__tests__/viewport.test.ts b/ui-tui/src/__tests__/viewport.test.ts new file mode 100644 index 0000000000..0a949e44c5 --- /dev/null +++ b/ui-tui/src/__tests__/viewport.test.ts @@ -0,0 +1,31 @@ +import { describe, expect, it } from 'vitest' + +import { stickyPromptFromViewport } from '../domain/viewport.js' + +describe('stickyPromptFromViewport', () => { + it('hides the sticky prompt when a newer user message is already visible', () => { + const messages = [ + { role: 'user' as const, text: 'older prompt' }, + { role: 'assistant' as const, text: 'older answer' }, + { role: 'user' as const, text: 'current prompt' }, + { role: 'assistant' as const, text: 'current answer' } + ] + + const offsets = [0, 2, 10, 12, 20] + + expect(stickyPromptFromViewport(messages, offsets, 16, 8, false)).toBe('') + }) + + it('shows the latest user message above the viewport when no user message is visible', () => { + const messages = [ + { role: 'user' as const, text: 'older prompt' }, + { role: 'assistant' as const, text: 'older answer' }, + { role: 'user' as const, text: 'current prompt' }, + { role: 'assistant' as const, text: 'current answer' } + ] + + const offsets = [0, 2, 10, 12, 20] + + expect(stickyPromptFromViewport(messages, offsets, 20, 16, false)).toBe('current prompt') + }) +}) diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx index 8b1f816ce4..d7974d5332 100644 --- a/ui-tui/src/components/appChrome.tsx +++ b/ui-tui/src/components/appChrome.tsx @@ -270,7 +270,7 @@ export function StickyPromptTracker({ messages, offsets, scrollRef, onChange }: const vp = Math.max(0, s?.getViewportHeight() ?? 0) const total = Math.max(vp, s?.getScrollHeight() ?? vp) const atBottom = (s?.isSticky() ?? true) || top + vp >= total - 2 - const text = stickyPromptFromViewport(messages, offsets, top, atBottom) + const text = stickyPromptFromViewport(messages, offsets, top + vp, top, atBottom) useEffect(() => onChange(text), [onChange, text]) diff --git a/ui-tui/src/domain/viewport.ts b/ui-tui/src/domain/viewport.ts index 788f94269e..3a358eb6f2 100644 --- a/ui-tui/src/domain/viewport.ts +++ b/ui-tui/src/domain/viewport.ts @@ -18,6 +18,7 @@ const upperBound = (offsets: ArrayLike, target: number) => { export const stickyPromptFromViewport = ( messages: readonly Msg[], offsets: ArrayLike, + bottom: number, top: number, sticky: boolean ) => { @@ -26,8 +27,15 @@ export const stickyPromptFromViewport = ( } const first = Math.max(0, Math.min(messages.length - 1, upperBound(offsets, top) - 1)) + const last = Math.max(first, Math.min(messages.length - 1, upperBound(offsets, bottom) - 1)) - for (let i = first; i >= 0; i--) { + for (let i = first; i <= last; i++) { + if (messages[i]?.role === 'user') { + return '' + } + } + + for (let i = first - 1; i >= 0; i--) { if (messages[i]?.role !== 'user') { continue } From 9bf6e1cd6eeecf83ddd4fe97b7c756d6bf2f34cb Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 14:37:00 -0500 Subject: [PATCH 017/264] refactor(ui-tui): clean touched resize and sticky prompt paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trim comment noise, remove redundant typing, normalize sticky prompt viewport args to top→bottom order, and reuse one sticky viewport helper instead of duplicating the math. --- ui-tui/packages/hermes-ink/src/ink/ink.tsx | 7 ++--- ui-tui/src/__tests__/viewport.test.ts | 4 +-- ui-tui/src/app/useMainApp.ts | 11 +++----- ui-tui/src/components/appChrome.tsx | 31 +++++++++++----------- ui-tui/src/domain/viewport.ts | 2 +- 5 files changed, 23 insertions(+), 32 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index 4cfb435c7c..8e43f60ea6 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -463,11 +463,8 @@ export default class Ink { this.resetFramesForAltScreen() this.needsEraseBeforePaint = true - // Post-resize drift healer: 160ms after the last resize, force one full - // reconcile so Yoga/React catch up to the final viewport and any stale - // terminal cells from host-side reflow get repainted away. Ink upstream - // and ConPTY/xterm reports point to this as a general resize/reflow - // desync class, not an xterm.js-only quirk. + // One last repaint after the resize burst settles closes any host-side + // reflow drift the normal diff path can't see. this.resizeSettleTimer = setTimeout(() => { this.resizeSettleTimer = null diff --git a/ui-tui/src/__tests__/viewport.test.ts b/ui-tui/src/__tests__/viewport.test.ts index 0a949e44c5..d8500c8d20 100644 --- a/ui-tui/src/__tests__/viewport.test.ts +++ b/ui-tui/src/__tests__/viewport.test.ts @@ -13,7 +13,7 @@ describe('stickyPromptFromViewport', () => { const offsets = [0, 2, 10, 12, 20] - expect(stickyPromptFromViewport(messages, offsets, 16, 8, false)).toBe('') + expect(stickyPromptFromViewport(messages, offsets, 8, 16, false)).toBe('') }) it('shows the latest user message above the viewport when no user message is visible', () => { @@ -26,6 +26,6 @@ describe('stickyPromptFromViewport', () => { const offsets = [0, 2, 10, 12, 20] - expect(stickyPromptFromViewport(messages, offsets, 20, 16, false)).toBe('current prompt') + expect(stickyPromptFromViewport(messages, offsets, 16, 20, false)).toBe('current prompt') }) }) diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index fdfdd8d54c..75fe73c86d 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -22,7 +22,7 @@ import type { Msg, PanelSection, SlashCatalog } from '../types.js' import { createGatewayEventHandler } from './createGatewayEventHandler.js' import { createSlashHandler } from './createSlashHandler.js' -import { type AppLayoutProgressProps, type GatewayRpc, type TranscriptRow } from './interfaces.js' +import { type GatewayRpc, type TranscriptRow } from './interfaces.js' import { $overlayState, patchOverlayState } from './overlayStore.js' import { turnController } from './turnController.js' import { $turnState, patchTurnState } from './turnStore.js' @@ -672,16 +672,11 @@ export function useMainApp(gw: GatewayClient) { return top + vp >= total - 3 })() - const liveProgress = useMemo( - () => ({ ...turn, showProgressArea, showStreamingArea: Boolean(turn.streaming) }), - [turn, showProgressArea] - ) + const liveProgress = useMemo(() => ({ ...turn, showProgressArea, showStreamingArea: Boolean(turn.streaming) }), [turn, showProgressArea]) const frozenProgressRef = useRef(liveProgress) - // When the live tail is offscreen, freeze its snapshot so scroll work doesn't - // keep rebuilding the streaming/thinking subtree the user can't see. Thaw as - // soon as the viewport comes back near the bottom or the turn finishes. + // Freeze the offscreen live tail so scroll doesn't rebuild unseen streaming UI. if (liveTailVisible || !ui.busy) { frozenProgressRef.current = liveProgress } diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx index d7974d5332..8de2a63019 100644 --- a/ui-tui/src/components/appChrome.tsx +++ b/ui-tui/src/components/appChrome.tsx @@ -249,28 +249,15 @@ export function StickyPromptTracker({ messages, offsets, scrollRef, onChange }: useSyncExternalStore( useCallback((cb: () => void) => scrollRef.current?.subscribe(cb) ?? (() => {}), [scrollRef]), () => { - const s = scrollRef.current - - if (!s) { - return NaN - } - - const top = Math.max(0, s.getScrollTop() + s.getPendingDelta()) - const vp = Math.max(0, s.getViewportHeight()) - const total = Math.max(vp, s.getScrollHeight()) - const atBottom = s.isSticky() || top + vp >= total - 2 + const { atBottom, top } = getStickyViewport(scrollRef.current) return atBottom ? -1 - top : top }, () => NaN ) - const s = scrollRef.current - const top = Math.max(0, (s?.getScrollTop() ?? 0) + (s?.getPendingDelta() ?? 0)) - const vp = Math.max(0, s?.getViewportHeight() ?? 0) - const total = Math.max(vp, s?.getScrollHeight() ?? vp) - const atBottom = (s?.isSticky() ?? true) || top + vp >= total - 2 - const text = stickyPromptFromViewport(messages, offsets, top + vp, top, atBottom) + const { atBottom, bottom, top } = getStickyViewport(scrollRef.current) + const text = stickyPromptFromViewport(messages, offsets, top, bottom, atBottom) useEffect(() => onChange(text), [onChange, text]) @@ -395,3 +382,15 @@ interface TranscriptScrollbarProps { scrollRef: RefObject t: Theme } + +function getStickyViewport(s?: ScrollBoxHandle | null) { + const top = Math.max(0, (s?.getScrollTop() ?? 0) + (s?.getPendingDelta() ?? 0)) + const vp = Math.max(0, s?.getViewportHeight() ?? 0) + const total = Math.max(vp, s?.getScrollHeight() ?? vp) + + return { + atBottom: (s?.isSticky() ?? true) || top + vp >= total - 2, + bottom: top + vp, + top + } +} diff --git a/ui-tui/src/domain/viewport.ts b/ui-tui/src/domain/viewport.ts index 3a358eb6f2..48d7427fd1 100644 --- a/ui-tui/src/domain/viewport.ts +++ b/ui-tui/src/domain/viewport.ts @@ -18,8 +18,8 @@ const upperBound = (offsets: ArrayLike, target: number) => { export const stickyPromptFromViewport = ( messages: readonly Msg[], offsets: ArrayLike, - bottom: number, top: number, + bottom: number, sticky: boolean ) => { if (sticky || !messages.length) { From 882278520ba9de4e1219a0575313a19e5e8b67de Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Thu, 23 Apr 2026 14:37:27 -0500 Subject: [PATCH 018/264] chore: uptick --- ui-tui/packages/hermes-ink/src/ink/log-update.test.ts | 10 +--------- ui-tui/src/app/useMainApp.ts | 5 ++++- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts index 55a1362f77..be2b711ecc 100644 --- a/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/log-update.test.ts @@ -2,15 +2,7 @@ import { describe, expect, it } from 'vitest' import type { Frame } from './frame.js' import { LogUpdate } from './log-update.js' -import { - CellWidth, - CharPool, - createScreen, - HyperlinkPool, - type Screen, - setCellAt, - StylePool -} from './screen.js' +import { CellWidth, CharPool, createScreen, HyperlinkPool, type Screen, setCellAt, StylePool } from './screen.js' /** * Contract tests for LogUpdate.render() — the diff-to-ANSI path that owns diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 75fe73c86d..41edcc8282 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -672,7 +672,10 @@ export function useMainApp(gw: GatewayClient) { return top + vp >= total - 3 })() - const liveProgress = useMemo(() => ({ ...turn, showProgressArea, showStreamingArea: Boolean(turn.streaming) }), [turn, showProgressArea]) + const liveProgress = useMemo( + () => ({ ...turn, showProgressArea, showStreamingArea: Boolean(turn.streaming) }), + [turn, showProgressArea] + ) const frozenProgressRef = useRef(liveProgress) From 8f5fee3e3e4e86124acd1677fbd151e93ee46a9b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:32:43 -0700 Subject: [PATCH 019/264] feat(codex): add gpt-5.5 and wire live model discovery into picker (#14720) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenAI launched GPT-5.5 on Codex today (Apr 23 2026). Adds it to the static catalog and pipes the user's OAuth access token into the openai-codex path of provider_model_ids() so /model mid-session and the gateway picker hit the live ChatGPT codex/models endpoint — new models appear for each user according to what ChatGPT actually lists for their account, without a Hermes release. Verified live: 'gpt-5.5' returns priority 0 (featured) from the endpoint, 400k context per OpenAI's launch article. 'hermes chat --provider openai-codex --model gpt-5.5' completes end-to-end. Changes: - hermes_cli/codex_models.py: add gpt-5.5 to DEFAULT_CODEX_MODELS + forward-compat - agent/model_metadata.py: 400k context length entry - hermes_cli/models.py: resolve codex OAuth token before calling get_codex_model_ids() in provider_model_ids('openai-codex') --- agent/model_metadata.py | 3 +++ hermes_cli/codex_models.py | 2 ++ hermes_cli/models.py | 14 +++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 2916a7fa7d..8ce70da331 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -123,6 +123,9 @@ DEFAULT_CONTEXT_LENGTHS = { "claude": 200000, # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models + # GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models + # endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex. + "gpt-5.5": 400000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index 9e2181b501..e39b2c5943 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -12,6 +12,7 @@ import os logger = logging.getLogger(__name__) DEFAULT_CODEX_MODELS: List[str] = [ + "gpt-5.5", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex", @@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [ ] _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [ + ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")), ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")), ("gpt-5.3-codex", ("gpt-5.2-codex",)), diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 43f4905db3..a1f2cbec61 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1678,7 +1678,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) if normalized == "openai-codex": from hermes_cli.codex_models import get_codex_model_ids - return get_codex_model_ids() + # Pass the live OAuth access token so the picker matches whatever + # ChatGPT lists for this account right now (new models appear without + # a Hermes release). Falls back to the hardcoded catalog if no token + # or the endpoint is unreachable. + access_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + + creds = resolve_codex_runtime_credentials(refresh_if_expiring=True) + access_token = creds.get("api_key") + except Exception: + access_token = None + return get_codex_model_ids(access_token=access_token) if normalized in {"copilot", "copilot-acp"}: try: live = _fetch_github_models(_resolve_copilot_catalog_api_key()) From 255ba5bf26a10911925c5cd01d14b0cd9adb639c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:49:51 -0700 Subject: [PATCH 020/264] feat(dashboard): expand themes to fonts, layout, density (#14725) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dashboard themes now control typography and layout, not just colors. Each built-in theme picks its own fonts, base size, radius, and density so switching produces visible changes beyond hue. Schema additions (per theme): - typography — fontSans, fontMono, fontDisplay, fontUrl, baseSize, lineHeight, letterSpacing. fontUrl is injected as on switch so Google/Bunny/self-hosted stylesheets all work. - layout — radius (any CSS length) and density (compact | comfortable | spacious, multiplies Tailwind spacing). - colorOverrides (optional) — pin individual shadcn tokens that would otherwise derive from the palette. Built-in themes are now distinct beyond palette: - default — system stack, 15px, 0.5rem radius, comfortable - midnight — Inter + JetBrains Mono, 14px, 0.75rem, comfortable - ember — Spectral (serif) + IBM Plex Mono, 15px, 0.25rem - mono — IBM Plex Sans + Mono, 13px, 0 radius, compact - cyberpunk— Share Tech Mono everywhere, 14px, 0 radius, compact - rose — Fraunces (serif) + DM Mono, 16px, 1rem, spacious Also fixes two bugs: 1. Custom user themes silently fell back to default. ThemeProvider only applied BUILTIN_THEMES[name], so YAML files in ~/.hermes/dashboard-themes/ showed in the picker but did nothing. Server now ships the full normalised definition; client applies it. 2. Docs documented a 21-token flat colors schema that never matched the code (applyPalette reads a 3-layer palette). Rewrote the Themes section against the actual shape. Implementation: - web/src/themes/types.ts: extend DashboardTheme with typography, layout, colorOverrides; ThemeListEntry carries optional definition. - web/src/themes/presets.ts: 6 built-ins with distinct typography+layout. - web/src/themes/context.tsx: applyTheme() writes palette+typography+ layout+overrides as CSS vars, injects fontUrl stylesheet, fixes the fallback-to-default bug via resolveTheme(name). - web/src/index.css: html/body/code read the new theme-font vars; --radius-sm/md/lg/xl derive from --theme-radius; --spacing scales with --theme-spacing-mul so Tailwind utilities shift with density. - hermes_cli/web_server.py: _normalise_theme_definition() parses loose YAML (bare hex strings, partial blocks) into the canonical wire shape; /api/dashboard/themes ships full definitions for user themes. - tests/hermes_cli/test_web_server.py: 16 new tests covering the normaliser and discovery (rejection cases, clamping, defaults). - website/docs/user-guide/features/web-dashboard.md: rewrite Themes section with real schema, per-model tables, full YAML example. --- hermes_cli/web_server.py | 159 +++++++++++- tests/hermes_cli/test_web_server.py | 184 ++++++++++++++ web/src/index.css | 53 +++- web/src/lib/api.ts | 5 + web/src/themes/context.tsx | 228 ++++++++++++++++-- web/src/themes/presets.ts | 112 ++++++++- web/src/themes/types.ts | 99 +++++++- .../docs/user-guide/features/web-dashboard.md | 150 ++++++++---- 8 files changed, 898 insertions(+), 92 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 9cdfdb37df..10b92f69a9 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -2304,8 +2304,134 @@ _BUILTIN_DASHBOARD_THEMES = [ ] +def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]: + """Normalise a theme layer spec from YAML into `{hex, alpha}` form. + + Accepts shorthand (a bare hex string) or full dict form. Returns + ``None`` on garbage input so the caller can fall back to a built-in + default rather than blowing up. + """ + if value is None: + return {"hex": default_hex, "alpha": default_alpha} + if isinstance(value, str): + return {"hex": value, "alpha": default_alpha} + if isinstance(value, dict): + hex_val = value.get("hex", default_hex) + alpha_val = value.get("alpha", default_alpha) + if not isinstance(hex_val, str): + return None + try: + alpha_f = float(alpha_val) + except (TypeError, ValueError): + alpha_f = default_alpha + return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))} + return None + + +_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = { + "fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif', + "fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace', + "baseSize": "15px", + "lineHeight": "1.55", + "letterSpacing": "0", +} + +_THEME_DEFAULT_LAYOUT: Dict[str, str] = { + "radius": "0.5rem", + "density": "comfortable", +} + +_THEME_OVERRIDE_KEYS = { + "card", "cardForeground", "popover", "popoverForeground", + "primary", "primaryForeground", "secondary", "secondaryForeground", + "muted", "mutedForeground", "accent", "accentForeground", + "destructive", "destructiveForeground", "success", "warning", + "border", "input", "ring", +} + + +def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]: + """Normalise a user theme YAML into the wire format `ThemeProvider` + expects. Returns ``None`` if the theme is unusable. + + Accepts both the full schema (palette/typography/layout) and a loose + form with bare hex strings, so hand-written YAMLs stay friendly. + """ + if not isinstance(data, dict): + return None + name = data.get("name") + if not isinstance(name, str) or not name.strip(): + return None + + # Palette + palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {} + # Allow top-level `colors.background` as a shorthand too. + colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {} + + def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]: + spec = palette_src.get(key, colors_src.get(key)) + parsed = _parse_theme_layer(spec, default_hex, default_alpha) + return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha} + + palette = { + "background": _layer("background", "#041c1c", 1.0), + "midground": _layer("midground", "#ffe6cb", 1.0), + "foreground": _layer("foreground", "#ffffff", 0.0), + "warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)", + "noiseOpacity": 1.0, + } + raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity")) + try: + palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0 + except (TypeError, ValueError): + palette["noiseOpacity"] = 1.0 + + # Typography + typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {} + typography = dict(_THEME_DEFAULT_TYPOGRAPHY) + for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"): + val = typo_src.get(key) + if isinstance(val, str) and val.strip(): + typography[key] = val + + # Layout + layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {} + layout = dict(_THEME_DEFAULT_LAYOUT) + radius = layout_src.get("radius") + if isinstance(radius, str) and radius.strip(): + layout["radius"] = radius + density = layout_src.get("density") + if isinstance(density, str) and density in ("compact", "comfortable", "spacious"): + layout["density"] = density + + # Color overrides — keep only valid keys with string values. + overrides_src = data.get("colorOverrides", {}) + color_overrides: Dict[str, str] = {} + if isinstance(overrides_src, dict): + for key, val in overrides_src.items(): + if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip(): + color_overrides[key] = val + + result: Dict[str, Any] = { + "name": name, + "label": data.get("label") or name, + "description": data.get("description", ""), + "palette": palette, + "typography": typography, + "layout": layout, + } + if color_overrides: + result["colorOverrides"] = color_overrides + return result + + def _discover_user_themes() -> list: - """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.""" + """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes. + + Returns a list of fully-normalised theme definitions ready to ship + to the frontend, so the client can apply them without a secondary + round-trip or a built-in stub. + """ themes_dir = get_hermes_home() / "dashboard-themes" if not themes_dir.is_dir(): return [] @@ -2313,33 +2439,42 @@ def _discover_user_themes() -> list: for f in sorted(themes_dir.glob("*.yaml")): try: data = yaml.safe_load(f.read_text(encoding="utf-8")) - if isinstance(data, dict) and data.get("name"): - result.append({ - "name": data["name"], - "label": data.get("label", data["name"]), - "description": data.get("description", ""), - }) except Exception: continue + normalised = _normalise_theme_definition(data) + if normalised is not None: + result.append(normalised) return result @app.get("/api/dashboard/themes") async def get_dashboard_themes(): - """Return available themes and the currently active one.""" + """Return available themes and the currently active one. + + Built-in entries ship name/label/description only (the frontend owns + their full definitions in `web/src/themes/presets.ts`). User themes + from `~/.hermes/dashboard-themes/*.yaml` ship with their full + normalised definition under `definition`, so the client can apply + them without a stub. + """ config = load_config() active = config.get("dashboard", {}).get("theme", "default") user_themes = _discover_user_themes() - # Merge built-in + user, user themes override built-in by name. seen = set() themes = [] for t in _BUILTIN_DASHBOARD_THEMES: seen.add(t["name"]) themes.append(t) for t in user_themes: - if t["name"] not in seen: - themes.append(t) - seen.add(t["name"]) + if t["name"] in seen: + continue + themes.append({ + "name": t["name"], + "label": t["label"], + "description": t["description"], + "definition": t, + }) + seen.add(t["name"]) return {"themes": themes, "active": active} diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index f990ed56ae..572549bd40 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -1256,3 +1256,187 @@ class TestStatusRemoteGateway: assert data["gateway_running"] is True assert data["gateway_pid"] is None assert data["gateway_state"] == "running" + + +# --------------------------------------------------------------------------- +# Dashboard theme normaliser tests +# --------------------------------------------------------------------------- + + +class TestNormaliseThemeDefinition: + """Tests for _normalise_theme_definition() — parses YAML theme files.""" + + def test_rejects_missing_name(self): + from hermes_cli.web_server import _normalise_theme_definition + assert _normalise_theme_definition({}) is None + assert _normalise_theme_definition({"name": ""}) is None + assert _normalise_theme_definition({"name": " "}) is None + + def test_rejects_non_dict(self): + from hermes_cli.web_server import _normalise_theme_definition + assert _normalise_theme_definition("string") is None + assert _normalise_theme_definition(None) is None + assert _normalise_theme_definition([1, 2, 3]) is None + + def test_loose_colors_shorthand(self): + """Bare hex strings under `colors` parse as {hex, alpha=1.0}.""" + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({ + "name": "loose", + "colors": {"background": "#000000", "midground": "#ffffff"}, + }) + assert result is not None + assert result["palette"]["background"] == {"hex": "#000000", "alpha": 1.0} + assert result["palette"]["midground"] == {"hex": "#ffffff", "alpha": 1.0} + # foreground falls back to default (transparent white) + assert result["palette"]["foreground"]["hex"] == "#ffffff" + assert result["palette"]["foreground"]["alpha"] == 0.0 + + def test_full_palette_form(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({ + "name": "full", + "palette": { + "background": {"hex": "#0a1628", "alpha": 1.0}, + "midground": {"hex": "#a8d0ff", "alpha": 0.9}, + "warmGlow": "rgba(255, 0, 0, 0.5)", + "noiseOpacity": 0.5, + }, + }) + assert result["palette"]["background"]["hex"] == "#0a1628" + assert result["palette"]["midground"]["alpha"] == 0.9 + assert result["palette"]["warmGlow"] == "rgba(255, 0, 0, 0.5)" + assert result["palette"]["noiseOpacity"] == 0.5 + + def test_default_typography_applied_when_missing(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({"name": "minimal"}) + typo = result["typography"] + assert "fontSans" in typo + assert "fontMono" in typo + assert typo["baseSize"] == "15px" + assert typo["lineHeight"] == "1.55" + assert typo["letterSpacing"] == "0" + + def test_partial_typography_merges_with_defaults(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({ + "name": "partial", + "typography": { + "fontSans": "MyFont, sans-serif", + "baseSize": "12px", + }, + }) + assert result["typography"]["fontSans"] == "MyFont, sans-serif" + assert result["typography"]["baseSize"] == "12px" + # fontMono defaulted + assert "monospace" in result["typography"]["fontMono"] + + def test_layout_defaults(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({"name": "minimal"}) + assert result["layout"]["radius"] == "0.5rem" + assert result["layout"]["density"] == "comfortable" + + def test_invalid_density_falls_back(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({ + "name": "bad", + "layout": {"density": "ultra-spacious"}, + }) + assert result["layout"]["density"] == "comfortable" + + def test_valid_densities_accepted(self): + from hermes_cli.web_server import _normalise_theme_definition + for d in ("compact", "comfortable", "spacious"): + r = _normalise_theme_definition({"name": "x", "layout": {"density": d}}) + assert r["layout"]["density"] == d + + def test_color_overrides_filter_unknown_keys(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({ + "name": "o", + "colorOverrides": { + "card": "#123456", + "fakeToken": "#abcdef", + "primary": 42, # non-string rejected + "destructive": "#ff0000", + }, + }) + assert result["colorOverrides"] == { + "card": "#123456", + "destructive": "#ff0000", + } + + def test_color_overrides_omitted_when_empty(self): + from hermes_cli.web_server import _normalise_theme_definition + result = _normalise_theme_definition({"name": "x"}) + assert "colorOverrides" not in result + + def test_alpha_clamped_to_unit_range(self): + from hermes_cli.web_server import _normalise_theme_definition + r = _normalise_theme_definition({ + "name": "c", + "palette": {"background": {"hex": "#000", "alpha": 99.5}}, + }) + assert r["palette"]["background"]["alpha"] == 1.0 + r2 = _normalise_theme_definition({ + "name": "c", + "palette": {"background": {"hex": "#000", "alpha": -5}}, + }) + assert r2["palette"]["background"]["alpha"] == 0.0 + + def test_invalid_alpha_uses_default(self): + from hermes_cli.web_server import _normalise_theme_definition + r = _normalise_theme_definition({ + "name": "c", + "palette": {"background": {"hex": "#000", "alpha": "not a number"}}, + }) + assert r["palette"]["background"]["alpha"] == 1.0 + + +class TestDiscoverUserThemes: + """Tests for _discover_user_themes() — scans ~/.hermes/dashboard-themes/.""" + + def test_returns_empty_when_dir_missing(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + from hermes_cli import web_server + assert web_server._discover_user_themes() == [] + + def test_loads_and_normalises_yaml(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + themes_dir = tmp_path / "dashboard-themes" + themes_dir.mkdir() + (themes_dir / "ocean.yaml").write_text( + "name: ocean\n" + "label: Ocean\n" + "palette:\n" + " background:\n" + " hex: \"#0a1628\"\n" + " alpha: 1.0\n" + "layout:\n" + " density: spacious\n" + ) + from hermes_cli import web_server + results = web_server._discover_user_themes() + assert len(results) == 1 + assert results[0]["name"] == "ocean" + assert results[0]["label"] == "Ocean" + assert results[0]["palette"]["background"]["hex"] == "#0a1628" + assert results[0]["layout"]["density"] == "spacious" + # defaults filled in + assert "fontSans" in results[0]["typography"] + + def test_malformed_yaml_skipped(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + themes_dir = tmp_path / "dashboard-themes" + themes_dir.mkdir() + (themes_dir / "bad.yaml").write_text("::: not valid yaml :::\n\tindent wrong") + (themes_dir / "nameless.yaml").write_text("label: No Name Here\n") + (themes_dir / "ok.yaml").write_text("name: ok\n") + from hermes_cli import web_server + results = web_server._discover_user_themes() + names = [r["name"] for r in results] + assert "ok" in names + assert "bad" not in names # malformed YAML + assert len(results) == 1 # only the valid one diff --git a/web/src/index.css b/web/src/index.css index b602361e2e..24260c6b40 100644 --- a/web/src/index.css +++ b/web/src/index.css @@ -29,6 +29,48 @@ /* Consumed by ; also theme-switchable. */ --warm-glow: rgba(255, 189, 56, 0.35); --noise-opacity-mul: 1; + + /* Typography tokens — rewritten by ThemeProvider. Defaults match the + system stack so themes that don't override look native. */ + --theme-font-sans: system-ui, -apple-system, "Segoe UI", Roboto, + "Helvetica Neue", Arial, sans-serif; + --theme-font-mono: ui-monospace, "SF Mono", "Cascadia Mono", Menlo, + Consolas, monospace; + --theme-font-display: var(--theme-font-sans); + --theme-base-size: 15px; + --theme-line-height: 1.55; + --theme-letter-spacing: 0; + + /* Layout tokens. */ + --radius: 0.5rem; + --theme-radius: 0.5rem; + --theme-spacing-mul: 1; + --theme-density: comfortable; +} + +/* Theme tokens cascade into the document root so every descendant inherits + the font stack, base size, and letter spacing without explicit calls. */ +html { + font-family: var(--theme-font-sans); + font-size: var(--theme-base-size); + line-height: var(--theme-line-height); + letter-spacing: var(--theme-letter-spacing); +} + +body { + font-family: var(--theme-font-sans); +} + +code, kbd, pre, samp, .font-mono, .font-mono-ui { + font-family: var(--theme-font-mono); +} + +/* Density: scale the shadcn spacing utilities via a multiplier. The DS + components use `p-N` / `gap-N` / `space-*` classes which resolve against + Tailwind's spacing scale; multiplying `--spacing` at :root scales them + all proportionally in Tailwind v4. */ +@theme inline { + --spacing: calc(0.25rem * var(--theme-spacing-mul, 1)); } /* Nousnet's hermes-agent layout bumps `small` and `code` to readable @@ -65,6 +107,11 @@ code { font-size: 0.875rem; } --color-ring: var(--midground); --color-popover: color-mix(in srgb, var(--midground-base) 4%, var(--background-base)); --color-popover-foreground: var(--midground); + + --radius-sm: calc(var(--theme-radius) - 4px); + --radius-md: calc(var(--theme-radius) - 2px); + --radius-lg: var(--theme-radius); + --radius-xl: calc(var(--theme-radius) + 4px); } @@ -94,9 +141,11 @@ code { font-size: 0.875rem; } /* System UI-monospace stack — distinct from `font-courier` (Courier Prime), used for dense data readouts where the display font would - break the grid. */ + break the grid. Routes through the theme's mono stack so themes + with a different monospace (JetBrains Mono, IBM Plex Mono, etc.) + still apply here. */ .font-mono-ui { - font-family: ui-monospace, 'SF Mono', 'Cascadia Mono', Menlo, monospace; + font-family: var(--theme-font-mono); } /* Subtle grain overlay for badges. */ diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 04951c02b7..45c0618a5f 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -1,5 +1,7 @@ const BASE = ""; +import type { DashboardTheme } from "@/themes/types"; + // Ephemeral session token for protected endpoints. // Injected into index.html by the server — never fetched via API. declare global { @@ -486,6 +488,9 @@ export interface DashboardThemeSummary { description: string; label: string; name: string; + /** Full theme definition for user themes; undefined for built-ins + * (which the frontend already has locally). */ + definition?: DashboardTheme; } export interface DashboardThemesResponse { diff --git a/web/src/themes/context.tsx b/web/src/themes/context.tsx index 4bc50f9b33..1fa1c16324 100644 --- a/web/src/themes/context.tsx +++ b/web/src/themes/context.tsx @@ -8,16 +8,35 @@ import { type ReactNode, } from "react"; import { BUILTIN_THEMES, defaultTheme } from "./presets"; -import type { DashboardTheme, ThemeLayer, ThemePalette } from "./types"; +import type { + DashboardTheme, + ThemeColorOverrides, + ThemeDensity, + ThemeLayer, + ThemeLayout, + ThemePalette, + ThemeTypography, +} from "./types"; import { api } from "@/lib/api"; /** LocalStorage key — pre-applied before the React tree mounts to avoid * a visible flash of the default palette on theme-overridden installs. */ const STORAGE_KEY = "hermes-dashboard-theme"; +/** Tracks fontUrls we've already injected so multiple theme switches don't + * pile up tags. Keyed by URL. */ +const INJECTED_FONT_URLS = new Set(); + +// --------------------------------------------------------------------------- +// CSS variable builders +// --------------------------------------------------------------------------- + /** Turn a ThemeLayer into the two CSS expressions the DS consumes: * `--` (color-mix'd with alpha) and `---base` (opaque hex). */ -function layerVars(name: "background" | "midground" | "foreground", layer: ThemeLayer) { +function layerVars( + name: "background" | "midground" | "foreground", + layer: ThemeLayer, +): Record { const pct = Math.round(layer.alpha * 100); return { [`--${name}`]: `color-mix(in srgb, ${layer.hex} ${pct}%, transparent)`, @@ -26,28 +45,145 @@ function layerVars(name: "background" | "midground" | "foreground", layer: Theme }; } -/** Write a theme's palette to `document.documentElement` as inline styles. - * Inline styles beat the `:root { }` rule in index.css, so this cascades - * into every shadcn-compat token defined over the DS triplet. */ -function applyPalette(palette: ThemePalette) { - const root = document.documentElement; - const vars = { +function paletteVars(palette: ThemePalette): Record { + return { ...layerVars("background", palette.background), ...layerVars("midground", palette.midground), ...layerVars("foreground", palette.foreground), "--warm-glow": palette.warmGlow, "--noise-opacity-mul": String(palette.noiseOpacity), }; +} + +const DENSITY_MULTIPLIERS: Record = { + compact: "0.85", + comfortable: "1", + spacious: "1.2", +}; + +function typographyVars(typo: ThemeTypography): Record { + return { + "--theme-font-sans": typo.fontSans, + "--theme-font-mono": typo.fontMono, + "--theme-font-display": typo.fontDisplay ?? typo.fontSans, + "--theme-base-size": typo.baseSize, + "--theme-line-height": typo.lineHeight, + "--theme-letter-spacing": typo.letterSpacing, + }; +} + +function layoutVars(layout: ThemeLayout): Record { + return { + "--radius": layout.radius, + "--theme-radius": layout.radius, + "--theme-spacing-mul": DENSITY_MULTIPLIERS[layout.density] ?? "1", + "--theme-density": layout.density, + }; +} + +/** Map a color-overrides key (camelCase) to its `--color-*` CSS var. */ +const OVERRIDE_KEY_TO_VAR: Record = { + card: "--color-card", + cardForeground: "--color-card-foreground", + popover: "--color-popover", + popoverForeground: "--color-popover-foreground", + primary: "--color-primary", + primaryForeground: "--color-primary-foreground", + secondary: "--color-secondary", + secondaryForeground: "--color-secondary-foreground", + muted: "--color-muted", + mutedForeground: "--color-muted-foreground", + accent: "--color-accent", + accentForeground: "--color-accent-foreground", + destructive: "--color-destructive", + destructiveForeground: "--color-destructive-foreground", + success: "--color-success", + warning: "--color-warning", + border: "--color-border", + input: "--color-input", + ring: "--color-ring", +}; + +/** Keys we might have written on a previous theme — needed to know which + * properties to clear when a theme with fewer overrides replaces one + * with more. */ +const ALL_OVERRIDE_VARS = Object.values(OVERRIDE_KEY_TO_VAR); + +function overrideVars( + overrides: ThemeColorOverrides | undefined, +): Record { + if (!overrides) return {}; + const out: Record = {}; + for (const [key, value] of Object.entries(overrides)) { + if (!value) continue; + const cssVar = OVERRIDE_KEY_TO_VAR[key as keyof ThemeColorOverrides]; + if (cssVar) out[cssVar] = value; + } + return out; +} + +// --------------------------------------------------------------------------- +// Font stylesheet injection +// --------------------------------------------------------------------------- + +function injectFontStylesheet(url: string | undefined) { + if (!url || typeof document === "undefined") return; + if (INJECTED_FONT_URLS.has(url)) return; + // Also skip if the page already has this href (e.g. SSR'd or persisted). + const existing = document.querySelector( + `link[rel="stylesheet"][href="${CSS.escape(url)}"]`, + ); + if (existing) { + INJECTED_FONT_URLS.add(url); + return; + } + const link = document.createElement("link"); + link.rel = "stylesheet"; + link.href = url; + link.setAttribute("data-hermes-theme-font", "true"); + document.head.appendChild(link); + INJECTED_FONT_URLS.add(url); +} + +// --------------------------------------------------------------------------- +// Apply a full theme to :root +// --------------------------------------------------------------------------- + +function applyTheme(theme: DashboardTheme) { + if (typeof document === "undefined") return; + const root = document.documentElement; + + // Clear any overrides from a previous theme before applying the new set. + for (const cssVar of ALL_OVERRIDE_VARS) { + root.style.removeProperty(cssVar); + } + + const vars = { + ...paletteVars(theme.palette), + ...typographyVars(theme.typography), + ...layoutVars(theme.layout), + ...overrideVars(theme.colorOverrides), + }; for (const [k, v] of Object.entries(vars)) { root.style.setProperty(k, v); } + + injectFontStylesheet(theme.typography.fontUrl); } +// --------------------------------------------------------------------------- +// Provider +// --------------------------------------------------------------------------- + export function ThemeProvider({ children }: { children: ReactNode }) { + /** Name of the currently active theme (built-in id or user YAML name). */ const [themeName, setThemeName] = useState(() => { if (typeof window === "undefined") return "default"; return window.localStorage.getItem(STORAGE_KEY) ?? "default"; }); + + /** All selectable themes (shown in the picker). Starts with just the + * built-ins; the API call below merges in user themes. */ const [availableThemes, setAvailableThemes] = useState< Array<{ description: string; label: string; name: string }> >(() => @@ -58,18 +194,56 @@ export function ThemeProvider({ children }: { children: ReactNode }) { })), ); - useEffect(() => { - const t = BUILTIN_THEMES[themeName] ?? defaultTheme; - applyPalette(t.palette); - }, [themeName]); + /** Full definitions for user themes keyed by name — the API provides + * these so custom YAMLs apply without a client-side stub. */ + const [userThemeDefs, setUserThemeDefs] = useState< + Record + >({}); + // Resolve a theme name to a full DashboardTheme, falling back to default + // only when neither a built-in nor a user theme is found. + const resolveTheme = useCallback( + (name: string): DashboardTheme => { + return ( + BUILTIN_THEMES[name] ?? + userThemeDefs[name] ?? + defaultTheme + ); + }, + [userThemeDefs], + ); + + // Re-apply on every themeName change, or when user themes arrive from + // the API (since the active theme might be a user theme whose definition + // hadn't loaded yet on first render). + useEffect(() => { + applyTheme(resolveTheme(themeName)); + }, [themeName, resolveTheme]); + + // Load server-side themes (built-ins + user YAMLs) once on mount. useEffect(() => { let cancelled = false; api .getThemes() .then((resp) => { if (cancelled) return; - if (resp.themes?.length) setAvailableThemes(resp.themes); + if (resp.themes?.length) { + setAvailableThemes( + resp.themes.map((t) => ({ + name: t.name, + label: t.label, + description: t.description, + })), + ); + // Index any definitions the server shipped (user themes). + const defs: Record = {}; + for (const entry of resp.themes) { + if (entry.definition) { + defs[entry.name] = entry.definition; + } + } + if (Object.keys(defs).length > 0) setUserThemeDefs(defs); + } if (resp.active && resp.active !== themeName) { setThemeName(resp.active); window.localStorage.setItem(STORAGE_KEY, resp.active); @@ -79,23 +253,35 @@ export function ThemeProvider({ children }: { children: ReactNode }) { return () => { cancelled = true; }; + // eslint-disable-next-line react-hooks/exhaustive-deps }, []); - const setTheme = useCallback((name: string) => { - const next = BUILTIN_THEMES[name] ? name : "default"; - setThemeName(next); - window.localStorage.setItem(STORAGE_KEY, next); - api.setTheme(next).catch(() => {}); - }, []); + const setTheme = useCallback( + (name: string) => { + // Accept any name the server told us exists OR any built-in. + const knownNames = new Set([ + ...Object.keys(BUILTIN_THEMES), + ...availableThemes.map((t) => t.name), + ...Object.keys(userThemeDefs), + ]); + const next = knownNames.has(name) ? name : "default"; + setThemeName(next); + if (typeof window !== "undefined") { + window.localStorage.setItem(STORAGE_KEY, next); + } + api.setTheme(next).catch(() => {}); + }, + [availableThemes, userThemeDefs], + ); const value = useMemo( () => ({ - theme: BUILTIN_THEMES[themeName] ?? defaultTheme, + theme: resolveTheme(themeName), themeName, availableThemes, setTheme, }), - [themeName, availableThemes, setTheme], + [themeName, availableThemes, setTheme, resolveTheme], ); return {children}; diff --git a/web/src/themes/presets.ts b/web/src/themes/presets.ts index 20a7b47c22..d8ae293cd0 100644 --- a/web/src/themes/presets.ts +++ b/web/src/themes/presets.ts @@ -1,17 +1,43 @@ -import type { DashboardTheme } from "./types"; +import type { DashboardTheme, ThemeTypography, ThemeLayout } from "./types"; /** * Built-in dashboard themes. * - * The `default` theme mirrors LENS_0 (canonical Hermes teal) exactly — the - * same triplet `src/index.css` declares on `:root`. Applying it should be a - * visual no-op; other themes override the triplet + warm-glow and let the DS - * cascade handle every derived surface. + * Each theme defines its own palette, typography, and layout so switching + * themes produces visible changes beyond just color — fonts, density, and + * corner-radius all shift to match the theme's personality. * * Theme names must stay in sync with the backend's * `_BUILTIN_DASHBOARD_THEMES` list in `hermes_cli/web_server.py`. */ +// --------------------------------------------------------------------------- +// Shared typography / layout presets +// --------------------------------------------------------------------------- + +/** Default system stack — neutral, safe fallback for every platform. */ +const SYSTEM_SANS = + 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif'; +const SYSTEM_MONO = + 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace'; + +const DEFAULT_TYPOGRAPHY: ThemeTypography = { + fontSans: SYSTEM_SANS, + fontMono: SYSTEM_MONO, + baseSize: "15px", + lineHeight: "1.55", + letterSpacing: "0", +}; + +const DEFAULT_LAYOUT: ThemeLayout = { + radius: "0.5rem", + density: "comfortable", +}; + +// --------------------------------------------------------------------------- +// Themes +// --------------------------------------------------------------------------- + export const defaultTheme: DashboardTheme = { name: "default", label: "Hermes Teal", @@ -23,6 +49,8 @@ export const defaultTheme: DashboardTheme = { warmGlow: "rgba(255, 189, 56, 0.35)", noiseOpacity: 1, }, + typography: DEFAULT_TYPOGRAPHY, + layout: DEFAULT_LAYOUT, }; export const midnightTheme: DashboardTheme = { @@ -36,6 +64,19 @@ export const midnightTheme: DashboardTheme = { warmGlow: "rgba(167, 139, 250, 0.32)", noiseOpacity: 0.8, }, + typography: { + fontSans: `"Inter", ${SYSTEM_SANS}`, + fontMono: `"JetBrains Mono", ${SYSTEM_MONO}`, + fontUrl: + "https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;700&display=swap", + baseSize: "14px", + lineHeight: "1.6", + letterSpacing: "-0.005em", + }, + layout: { + radius: "0.75rem", + density: "comfortable", + }, }; export const emberTheme: DashboardTheme = { @@ -49,6 +90,23 @@ export const emberTheme: DashboardTheme = { warmGlow: "rgba(249, 115, 22, 0.38)", noiseOpacity: 1, }, + typography: { + fontSans: `"Spectral", Georgia, "Times New Roman", serif`, + fontMono: `"IBM Plex Mono", ${SYSTEM_MONO}`, + fontUrl: + "https://fonts.googleapis.com/css2?family=Spectral:wght@400;500;600;700&family=IBM+Plex+Mono:wght@400;500;700&display=swap", + baseSize: "15px", + lineHeight: "1.6", + letterSpacing: "0", + }, + layout: { + radius: "0.25rem", + density: "comfortable", + }, + colorOverrides: { + destructive: "#c92d0f", + warning: "#f97316", + }, }; export const monoTheme: DashboardTheme = { @@ -62,6 +120,19 @@ export const monoTheme: DashboardTheme = { warmGlow: "rgba(255, 255, 255, 0.1)", noiseOpacity: 0.6, }, + typography: { + fontSans: `"IBM Plex Sans", ${SYSTEM_SANS}`, + fontMono: `"IBM Plex Mono", ${SYSTEM_MONO}`, + fontUrl: + "https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:wght@400;500;600&family=IBM+Plex+Mono:wght@400;500&display=swap", + baseSize: "13px", + lineHeight: "1.5", + letterSpacing: "0", + }, + layout: { + radius: "0", + density: "compact", + }, }; export const cyberpunkTheme: DashboardTheme = { @@ -75,6 +146,24 @@ export const cyberpunkTheme: DashboardTheme = { warmGlow: "rgba(0, 255, 136, 0.22)", noiseOpacity: 1.2, }, + typography: { + fontSans: `"Share Tech Mono", "JetBrains Mono", ${SYSTEM_MONO}`, + fontMono: `"Share Tech Mono", "JetBrains Mono", ${SYSTEM_MONO}`, + fontUrl: + "https://fonts.googleapis.com/css2?family=Share+Tech+Mono&family=JetBrains+Mono:wght@400;700&display=swap", + baseSize: "14px", + lineHeight: "1.5", + letterSpacing: "0.02em", + }, + layout: { + radius: "0", + density: "compact", + }, + colorOverrides: { + success: "#00ff88", + warning: "#ffd700", + destructive: "#ff0055", + }, }; export const roseTheme: DashboardTheme = { @@ -88,6 +177,19 @@ export const roseTheme: DashboardTheme = { warmGlow: "rgba(249, 168, 212, 0.3)", noiseOpacity: 0.9, }, + typography: { + fontSans: `"Fraunces", Georgia, serif`, + fontMono: `"DM Mono", ${SYSTEM_MONO}`, + fontUrl: + "https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,400;9..144,500;9..144,600&family=DM+Mono:wght@400;500&display=swap", + baseSize: "16px", + lineHeight: "1.7", + letterSpacing: "0", + }, + layout: { + radius: "1rem", + density: "spacious", + }, }; export const BUILTIN_THEMES: Record = { diff --git a/web/src/themes/types.ts b/web/src/themes/types.ts index 4a423aeeee..c83c6464d9 100644 --- a/web/src/themes/types.ts +++ b/web/src/themes/types.ts @@ -1,13 +1,22 @@ /** * Dashboard theme model. * - * Unlike the pre-DS implementation (which overrode 21 shadcn tokens directly), - * themes are now expressed in the Nous DS's own 3-triplet vocabulary — - * `background`, `midground`, `foreground` — plus a warm-glow tint for the - * vignette in . All downstream shadcn-compat tokens - * (`--color-card`, `--color-muted-foreground`, `--color-border`, etc.) are - * defined in `src/index.css` as `color-mix()` expressions over the triplets, - * so overriding the triplets at runtime cascades to every surface. + * Themes customise three orthogonal layers: + * + * 1. `palette` — the 3-layer color triplet (background/midground/ + * foreground) + warm-glow + noise opacity. The + * design-system cascade in `src/index.css` derives + * every shadcn-compat token (card, muted, border, + * primary, etc.) from this triplet via `color-mix()`. + * 2. `typography` — font families, base font size, line height, + * letter spacing. An optional `fontUrl` is injected + * as `` so self-hosted and + * Google/Bunny/etc-hosted fonts both work. + * 3. `layout` — corner radius and density (spacing multiplier). + * + * Plus an optional `colorOverrides` escape hatch for themes that want to + * pin specific shadcn tokens to exact values (e.g. a pastel theme that + * needs a softer `destructive` red than the derived default). */ /** A color layer: hex base + alpha (0–1). */ @@ -31,14 +40,88 @@ export interface ThemePalette { noiseOpacity: number; } +export interface ThemeTypography { + /** CSS font-family stack for sans-serif body copy. */ + fontSans: string; + /** CSS font-family stack for monospace / code blocks. */ + fontMono: string; + /** Optional display/heading font stack. Falls back to `fontSans`. */ + fontDisplay?: string; + /** Optional external stylesheet URL (e.g. Google Fonts, Bunny Fonts, + * self-hosted .woff2 @font-face sheet). Injected as a in + * on theme switch. Same URL is never injected twice. */ + fontUrl?: string; + /** Root font size (controls rem scale). Example: `"14px"`, `"16px"`. */ + baseSize: string; + /** Default line-height. Example: `"1.5"`, `"1.65"`. */ + lineHeight: string; + /** Default letter-spacing. Example: `"0"`, `"0.01em"`, `"-0.01em"`. */ + letterSpacing: string; +} + +export type ThemeDensity = "compact" | "comfortable" | "spacious"; + +export interface ThemeLayout { + /** Corner-radius token. Example: `"0"`, `"0.25rem"`, `"0.5rem"`, + * `"1rem"`. Maps to `--radius` and cascades into every component. */ + radius: string; + /** Spacing multiplier. `compact` = 0.85, `comfortable` = 1.0 (default), + * `spacious` = 1.2. Applied via the `--spacing-mul` CSS var. */ + density: ThemeDensity; +} + +/** Optional hex overrides keyed by shadcn-compat token name (without the + * `--color-` prefix). Any key set here wins over the DS cascade. */ +export interface ThemeColorOverrides { + card?: string; + cardForeground?: string; + popover?: string; + popoverForeground?: string; + primary?: string; + primaryForeground?: string; + secondary?: string; + secondaryForeground?: string; + muted?: string; + mutedForeground?: string; + accent?: string; + accentForeground?: string; + destructive?: string; + destructiveForeground?: string; + success?: string; + warning?: string; + border?: string; + input?: string; + ring?: string; +} + export interface DashboardTheme { description: string; label: string; name: string; palette: ThemePalette; + typography: ThemeTypography; + layout: ThemeLayout; + colorOverrides?: ThemeColorOverrides; +} + +/** + * Wire response shape for `GET /api/dashboard/themes`. + * + * The `themes` list is intentionally partial — built-in themes are fully + * defined in `presets.ts`; user themes carry their full definition so the + * client can apply them without a second round-trip. + */ +export interface ThemeListEntry { + description: string; + label: string; + name: string; + /** Full theme definition. Present for user-defined themes loaded from + * `~/.hermes/dashboard-themes/*.yaml`; undefined for built-ins (the + * client already has those in `BUILTIN_THEMES`). */ + definition?: DashboardTheme; } export interface ThemeListResponse { active: string; - themes: Array<{ description: string; label: string; name: string }>; + themes: ThemeListEntry[]; } diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md index 2ef04297dc..ebcfe3698b 100644 --- a/website/docs/user-guide/features/web-dashboard.md +++ b/website/docs/user-guide/features/web-dashboard.md @@ -301,68 +301,130 @@ When you run `hermes update`, the web frontend is automatically rebuilt if `npm` ## Themes -The dashboard supports visual themes that change colors, overlay effects, and overall feel. Switch themes live from the header bar — click the palette icon next to the language switcher. +Themes control the dashboard's visual presentation across three layers: -### Built-in Themes +- **Palette** — colors (background, text, accents, warm glow, noise) +- **Typography** — font families, base size, line height, letter spacing +- **Layout** — corner radius and density (spacing multiplier) -| Theme | Description | -|-------|-------------| -| **Hermes Teal** | Classic dark teal (default) | -| **Midnight** | Deep blue-violet with cool accents | -| **Ember** | Warm crimson and bronze | -| **Mono** | Clean grayscale, minimal | -| **Cyberpunk** | Neon green on black | -| **Rosé** | Soft pink and warm ivory | +Switch themes live from the header bar — click the palette icon next to the language switcher. Selection persists to `config.yaml` under `dashboard.theme` and is restored on page load. -Theme selection is persisted to `config.yaml` under `dashboard.theme` and restored on page load. +### Built-in themes -### Custom Themes +Each built-in ships its own palette, typography, and layout — switching produces visible changes beyond color alone. -Create a YAML file in `~/.hermes/dashboard-themes/`: +| Theme | Palette | Typography | Layout | +|-------|---------|------------|--------| +| **Hermes Teal** (`default`) | Dark teal + cream | System stack, 15px | 0.5rem radius, comfortable | +| **Midnight** (`midnight`) | Deep blue-violet | Inter + JetBrains Mono, 14px | 0.75rem radius, comfortable | +| **Ember** (`ember`) | Warm crimson / bronze | Spectral (serif) + IBM Plex Mono, 15px | 0.25rem radius, comfortable | +| **Mono** (`mono`) | Grayscale | IBM Plex Sans + IBM Plex Mono, 13px | 0 radius, compact | +| **Cyberpunk** (`cyberpunk`) | Neon green on black | Share Tech Mono everywhere, 14px | 0 radius, compact | +| **Rosé** (`rose`) | Pink and ivory | Fraunces (serif) + DM Mono, 16px | 1rem radius, spacious | + +Themes that reference Google Fonts (everything except Hermes Teal) load the stylesheet on demand — the first time you switch to them, a `` tag is injected into ``. + +### Custom themes + +Drop a YAML file in `~/.hermes/dashboard-themes/` and it appears in the picker automatically. The file can be as minimal as a name plus the fields you want to override — every missing field inherits a sane default. + +Minimal example (colors only, bare hex shorthand): + +```yaml +# ~/.hermes/dashboard-themes/neon.yaml +name: neon +label: Neon +description: Pure magenta on black +colors: + background: "#000000" + midground: "#ff00ff" +``` + +Full example (every knob): ```yaml # ~/.hermes/dashboard-themes/ocean.yaml name: ocean -label: Ocean +label: Ocean Deep description: Deep sea blues with coral accents -colors: - background: "#0a1628" - foreground: "#e0f0ff" - card: "#0f1f35" - card-foreground: "#e0f0ff" - primary: "#ff6b6b" - primary-foreground: "#0a1628" - secondary: "#152540" - secondary-foreground: "#e0f0ff" - muted: "#1a2d4a" - muted-foreground: "#7899bb" - accent: "#1f3555" - accent-foreground: "#e0f0ff" - destructive: "#fb2c36" - destructive-foreground: "#fff" - success: "#4ade80" - warning: "#fbbf24" - border: "color-mix(in srgb, #ff6b6b 15%, transparent)" - input: "color-mix(in srgb, #ff6b6b 15%, transparent)" - ring: "#ff6b6b" - popover: "#0f1f35" - popover-foreground: "#e0f0ff" +palette: + background: + hex: "#0a1628" + alpha: 1.0 + midground: + hex: "#a8d0ff" + alpha: 1.0 + foreground: + hex: "#ffffff" + alpha: 0.0 + warmGlow: "rgba(255, 107, 107, 0.35)" + noiseOpacity: 0.7 -overlay: - noiseOpacity: 0.08 - noiseBlendMode: color-dodge - warmGlowOpacity: 0.15 - warmGlowColor: "rgba(255,107,107,0.2)" +typography: + fontSans: "Poppins, system-ui, sans-serif" + fontMono: "Fira Code, ui-monospace, monospace" + fontDisplay: "Poppins, system-ui, sans-serif" # optional, falls back to fontSans + fontUrl: "https://fonts.googleapis.com/css2?family=Poppins:wght@400;500;600&family=Fira+Code:wght@400;500&display=swap" + baseSize: "15px" + lineHeight: "1.6" + letterSpacing: "-0.003em" + +layout: + radius: "0.75rem" # 0 | 0.25rem | 0.5rem | 0.75rem | 1rem | any length + density: comfortable # compact | comfortable | spacious + +# Optional — pin individual shadcn tokens that would otherwise derive from +# the palette. Any key listed here wins over the palette cascade. +colorOverrides: + destructive: "#ff6b6b" + ring: "#ff6b6b" ``` -The 21 color tokens map directly to the CSS custom properties used throughout the dashboard. All fields are required for custom themes. The `overlay` section is optional — it controls the grain texture and ambient glow effects. +Refresh the dashboard after creating the file. -Refresh the dashboard after creating the file. Custom themes appear in the theme picker alongside built-ins. +### Palette model + +The palette is a 3-layer triplet — **background**, **midground**, **foreground** — plus a warm-glow rgba() string and a noise-opacity multiplier. Every shadcn token (card, muted, border, primary, popover, etc.) is derived from this triplet via CSS `color-mix()` in the dashboard's stylesheet, so overriding three colors cascades into the whole UI. + +- `background` — deepest canvas color (typically near-black). The page background and card fill come from this. +- `midground` — primary text and accent. Most UI chrome reads this. +- `foreground` — top-layer highlight. In the default theme this is white at alpha 0 (invisible); themes that want a bright accent on top can raise its alpha. +- `warmGlow` — rgba() vignette color used by the ambient backdrop. +- `noiseOpacity` — 0–1.2 multiplier on the grain overlay. Lower = softer, higher = grittier. + +Each layer accepts `{hex, alpha}` or a bare hex string (alpha defaults to 1.0). + +### Typography model + +| Key | Type | Description | +|-----|------|-------------| +| `fontSans` | string | CSS font-family stack for body copy (applied to `html`, `body`) | +| `fontMono` | string | CSS font-family stack for code blocks, ``, `.font-mono` utilities, dense readouts | +| `fontDisplay` | string | Optional heading/display font stack. Falls back to `fontSans` | +| `fontUrl` | string | Optional external stylesheet URL. Injected as `` in `` on theme switch. Same URL is never injected twice. Works with Google Fonts, Bunny Fonts, self-hosted `@font-face` sheets, anything you can link | +| `baseSize` | string | Root font size — controls the rem scale for the whole dashboard. Example: `"14px"`, `"16px"` | +| `lineHeight` | string | Default line-height, e.g. `"1.5"`, `"1.65"` | +| `letterSpacing` | string | Default letter-spacing, e.g. `"0"`, `"0.01em"`, `"-0.01em"` | + +### Layout model + +| Key | Values | Description | +|-----|--------|-------------| +| `radius` | any CSS length | Corner-radius token. Cascades into `--radius-sm/md/lg/xl` so every rounded element shifts together. | +| `density` | `compact` \| `comfortable` \| `spacious` | Spacing multiplier. Compact = 0.85×, comfortable = 1.0× (default), spacious = 1.2×. Scales Tailwind's base spacing, so padding, gap, and space-between utilities all shift proportionally. | + +### Color overrides (optional) + +Most themes won't need this — the 3-layer palette derives every shadcn token. But if you want a specific accent that the derivation won't produce (a softer destructive red for a pastel theme, a specific success green for a brand), pin individual tokens here. + +Supported keys: `card`, `cardForeground`, `popover`, `popoverForeground`, `primary`, `primaryForeground`, `secondary`, `secondaryForeground`, `muted`, `mutedForeground`, `accent`, `accentForeground`, `destructive`, `destructiveForeground`, `success`, `warning`, `border`, `input`, `ring`. + +Any key set here overrides the derived value for the active theme only — switching to another theme clears the overrides. ### Theme API | Endpoint | Method | Description | |----------|--------|-------------| -| `/api/dashboard/themes` | GET | List available themes + active name | +| `/api/dashboard/themes` | GET | List available themes + active name. Built-ins return `{name, label, description}`; user themes also include a `definition` field with the full normalised theme object. | | `/api/dashboard/theme` | PUT | Set active theme. Body: `{"name": "midnight"}` | From b5333abc3025b59312788b29093ec6bb88052895 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:50:21 -0700 Subject: [PATCH 021/264] fix(auth): refuse to touch real auth.json during pytest; delete sandbox-escaping test (#14729) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A test in tests/agent/test_credential_pool.py (test_try_refresh_current_updates_only_current_entry) monkeypatched refresh_codex_oauth_pure() to return the literal fixture strings 'access-new'/'refresh-new', then executed the real production code path in agent/credential_pool.py::try_refresh_current which calls _sync_device_code_entry_to_auth_store → _save_provider_state → writes to `providers.openai-codex.tokens`. That writer resolves the target via get_hermes_home()/auth.json. If the test ran with HERMES_HOME unset (direct pytest invocation, IDE runner bypassing conftest discovery, or any other sandbox escape), it would overwrite the real user's auth store with the fixture strings. Observed in the wild: Teknium's ~/.hermes/auth.json providers.openai-codex.tokens held 'access-new'/'refresh-new' for five days. His CLI kept working because the credential_pool entries still held real JWTs, but `hermes model`'s live discovery path (which reads via resolve_codex_runtime_credentials → _read_codex_tokens → providers.tokens) was silently 401-ing. Fixes: - Delete test_try_refresh_current_updates_only_current_entry. It was the only test that exercised a writer hitting providers.openai-codex.tokens with literal stub tokens. The entry-level rotation behavior it asserted is still covered by test_mark_exhausted_and_rotate_persists_status above. - Add a seat belt in hermes_cli.auth._auth_file_path(): if PYTEST_CURRENT_TEST is set AND the resolved path equals the real ~/.hermes/auth.json, raise with a clear message. In production (no PYTEST_CURRENT_TEST), a single dict lookup. Any future test that forgets to monkeypatch HERMES_HOME fails loudly instead of corrupting the user's credentials. Validation: - production (no PYTEST_CURRENT_TEST): returns real path, unchanged behavior - pytest + HERMES_HOME unset (points at real home): raises with message - pytest + HERMES_HOME=/tmp/...: returns tmp path, tests pass normally --- hermes_cli/auth.py | 20 +++++++++- tests/agent/test_credential_pool.py | 60 ----------------------------- 2 files changed, 19 insertions(+), 61 deletions(-) diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 98ac4edb31..28c5bd9a61 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -619,7 +619,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any # ============================================================================= def _auth_file_path() -> Path: - return get_hermes_home() / "auth.json" + path = get_hermes_home() / "auth.json" + # Seat belt: if pytest is running and HERMES_HOME resolves to the real + # user's auth store, refuse rather than silently corrupt it. This catches + # tests that forgot to monkeypatch HERMES_HOME, tests invoked without the + # hermetic conftest, or sandbox escapes via threads/subprocesses. In + # production (no PYTEST_CURRENT_TEST) this is a single dict lookup. + if os.environ.get("PYTEST_CURRENT_TEST"): + real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False) + try: + resolved = path.resolve(strict=False) + except Exception: + resolved = path + if resolved == real_home_auth: + raise RuntimeError( + f"Refusing to touch real user auth store during test run: {path}. " + "Set HERMES_HOME to a tmp_path in your test fixture, or run " + "via scripts/run_tests.sh for hermetic CI-parity env." + ) + return path def _auth_lock_path() -> Path: diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index 7ec0385b60..76e1412bf4 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -333,66 +333,6 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch): assert persisted["last_error_code"] == 402 -def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch): - monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) - _write_auth_store( - tmp_path, - { - "version": 1, - "credential_pool": { - "openai-codex": [ - { - "id": "cred-1", - "label": "primary", - "auth_type": "oauth", - "priority": 0, - "source": "device_code", - "access_token": "access-old", - "refresh_token": "refresh-old", - "base_url": "https://chatgpt.com/backend-api/codex", - }, - { - "id": "cred-2", - "label": "secondary", - "auth_type": "oauth", - "priority": 1, - "source": "device_code", - "access_token": "access-other", - "refresh_token": "refresh-other", - "base_url": "https://chatgpt.com/backend-api/codex", - }, - ] - }, - }, - ) - - from agent.credential_pool import load_pool - - monkeypatch.setattr( - "hermes_cli.auth.refresh_codex_oauth_pure", - lambda access_token, refresh_token, timeout_seconds=20.0: { - "access_token": "access-new", - "refresh_token": "refresh-new", - }, - ) - - pool = load_pool("openai-codex") - current = pool.select() - assert current.id == "cred-1" - - refreshed = pool.try_refresh_current() - - assert refreshed is not None - assert refreshed.access_token == "access-new" - - auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text()) - primary, secondary = auth_payload["credential_pool"]["openai-codex"] - assert primary["access_token"] == "access-new" - assert primary["refresh_token"] == "refresh-new" - assert secondary["access_token"] == "access-other" - assert secondary["refresh_token"] == "refresh-other" - - def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes")) monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded") From 64e61656862b24776d5d7f5b87699097a16b9ec3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:56:26 -0700 Subject: [PATCH 022/264] fix(delegate): remove model-facing max_iterations override; config is authoritative (#14732) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously delegate_task exposed 'max_iterations' in its JSON schema and used `max_iterations or default_max_iter` — so a model guessing conservatively (or copy-pasting a docstring hint like 'Only set lower for simple tasks') could silently shrink a subagent's budget below the user's configured delegation.max_iterations. One such call this session capped a deep forensic audit at 40 iterations while the user's config was set to 250. Changes: - Drop 'max_iterations' from DELEGATE_TASK_SCHEMA['parameters']['properties']. Models can no longer emit it. - In delegate_task(): ignore any caller-supplied max_iterations, always use delegation.max_iterations from config. Log at debug if a stale schema or internal caller still passes one through. - Keep the Python kwarg on the function signature for internal callers (_build_child_agent tests pass it through the plumbing layer). - Update test_schema_valid to assert the param is now absent (intentional contract change, not a change-detector). --- tests/tools/test_delegate.py | 5 ++++- tools/delegate_tool.py | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 356f72d889..9c93f05c7a 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -69,7 +69,10 @@ class TestDelegateRequirements(unittest.TestCase): self.assertIn("tasks", props) self.assertIn("context", props) self.assertIn("toolsets", props) - self.assertIn("max_iterations", props) + # max_iterations is intentionally NOT exposed to the model — it's + # config-authoritative via delegation.max_iterations so users get + # predictable budgets. + self.assertNotIn("max_iterations", props) self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 7b0179c4c0..2c35c7c7e7 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -1558,7 +1558,18 @@ def delegate_task( # Load config cfg = _load_config() default_max_iter = cfg.get("max_iterations", DEFAULT_MAX_ITERATIONS) - effective_max_iter = max_iterations or default_max_iter + # Model-supplied max_iterations is ignored — the config value is authoritative + # so users get predictable budgets. The kwarg is retained for internal callers + # and tests; a model-emitted value here would only shrink the budget and + # surprise the user mid-run. Log and drop it if one slips through from a + # cached tool schema or a stale provider. + if max_iterations is not None and max_iterations != default_max_iter: + logger.debug( + "delegate_task: ignoring caller-supplied max_iterations=%s; " + "using delegation.max_iterations=%s from config", + max_iterations, default_max_iter, + ) + effective_max_iter = default_max_iter # Resolve delegation credentials (provider:model pair). # When delegation.provider is configured, this resolves the full credential @@ -2098,13 +2109,6 @@ DELEGATE_TASK_SCHEMA = { "When provided, top-level goal/context/toolsets are ignored." ), }, - "max_iterations": { - "type": "integer", - "description": ( - "Max tool-calling turns per subagent (default: 50). " - "Only set lower for simple tasks." - ), - }, "role": { "type": "string", "enum": ["leaf", "orchestrator"], From 327b57da91e5699564dd423bbb112ea95671b6e5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:59:29 -0700 Subject: [PATCH 023/264] fix(gateway): kill tool subprocesses before adapter disconnect on drain timeout (#14728) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #8202. Root cause: stop() reclaimed tool-call bash/sleep children only at the very end of the shutdown sequence — after a 60s drain, 5s interrupt grace, and per-adapter disconnect. Under systemd (TimeoutStopSec bounded by drain_timeout), that meant the cgroup SIGKILL escalation fired first, and systemd reaped the bash/sleep children instead of us. Fix: - Extract tool-subprocess cleanup into a local helper _kill_tool_subprocesses() in _stop_impl(). - Invoke it eagerly right after _interrupt_running_agents() on the drain-timeout path, before adapter disconnect. - Keep the existing catch-all call at the end for the graceful path and defense in depth against mid-teardown respawns. - Bump generated systemd unit TimeoutStopSec to drain_timeout + 30s so cleanup + disconnect + DB close has headroom above the drain budget, matching the 'subprocess timeout > TimeoutStopSec + margin' rule from the skill. Tests: - New: test_gateway_stop_kills_tool_subprocesses_before_adapter_disconnect_on_timeout asserts kill_all() runs before disconnect() when drain times out. - New: test_gateway_stop_kills_tool_subprocesses_on_graceful_path guards that the final catch-all still fires when drain succeeds (regression guard against accidental removal during refactor). - Updated: existing systemd unit generator tests expect TimeoutStopSec=90 (= 60s drain + 30s headroom) with explanatory comment. --- gateway/run.py | 67 ++++++++++++++----- hermes_cli/gateway.py | 9 ++- tests/gateway/test_gateway_shutdown.py | 83 ++++++++++++++++++++++++ tests/hermes_cli/test_gateway_service.py | 10 ++- 4 files changed, 150 insertions(+), 19 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index dcee18e518..2c377980cd 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2560,6 +2560,40 @@ class GatewayRunner: return async def _stop_impl() -> None: + def _kill_tool_subprocesses(phase: str) -> None: + """Kill tool subprocesses + tear down terminal envs + browsers. + + Called twice in the shutdown path: once eagerly after a + drain timeout forces agent interrupt (so we reclaim bash/ + sleep children before systemd TimeoutStopSec escalates to + SIGKILL on the cgroup — #8202), and once as a final + catch-all at the end of _stop_impl() for the graceful + path or anything respawned mid-teardown. + + All steps are best-effort; exceptions are swallowed so + one subsystem's failure doesn't block the rest. + """ + try: + from tools.process_registry import process_registry + _killed = process_registry.kill_all() + if _killed: + logger.info( + "Shutdown (%s): killed %d tool subprocess(es)", + phase, _killed, + ) + except Exception as _e: + logger.debug("process_registry.kill_all (%s) error: %s", phase, _e) + try: + from tools.terminal_tool import cleanup_all_environments + cleanup_all_environments() + except Exception as _e: + logger.debug("cleanup_all_environments (%s) error: %s", phase, _e) + try: + from tools.browser_tool import cleanup_all_browsers + cleanup_all_browsers() + except Exception as _e: + logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e) + logger.info( "Stopping gateway%s...", " for restart" if self._restart_requested else "", @@ -2621,6 +2655,16 @@ class GatewayRunner: self._update_runtime_status("draining") await asyncio.sleep(0.1) + # Kill lingering tool subprocesses NOW, before we spend more + # budget on adapter disconnect / session DB close. Under + # systemd (TimeoutStopSec bounded by drain_timeout+headroom), + # deferring this to the end of stop() risks systemd escalating + # to SIGKILL on the cgroup first — at which point bash/sleep + # children left behind by an interrupted terminal tool get + # killed by systemd instead of us (issue #8202). The final + # catch-all cleanup below still runs for the graceful path. + _kill_tool_subprocesses("post-interrupt") + if self._restart_requested and self._restart_detached: try: await self._launch_detached_restart_command() @@ -2656,22 +2700,13 @@ class GatewayRunner: self._shutdown_event.set() # Global cleanup: kill any remaining tool subprocesses not tied - # to a specific agent (catch-all for zombie prevention). - try: - from tools.process_registry import process_registry - process_registry.kill_all() - except Exception: - pass - try: - from tools.terminal_tool import cleanup_all_environments - cleanup_all_environments() - except Exception: - pass - try: - from tools.browser_tool import cleanup_all_browsers - cleanup_all_browsers() - except Exception: - pass + # to a specific agent (catch-all for zombie prevention). On the + # drain-timeout path we already did this earlier after agent + # interrupt — this second call catches (a) the graceful path + # where drain succeeded without interrupt, and (b) anything + # that got respawned between the earlier call and adapter + # disconnect (defense in depth; safe to call repeatedly). + _kill_tool_subprocesses("final-cleanup") # Close SQLite session DBs so the WAL write lock is released. # Without this, --replace and similar restart flows leave the diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 7796cc5759..9773299d5c 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -1469,7 +1469,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) path_entries.append(resolved_node_dir) common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"] - restart_timeout = max(60, int(_get_restart_drain_timeout() or 0)) + # systemd's TimeoutStopSec must exceed the gateway's drain_timeout so + # there's budget left for post-interrupt cleanup (tool subprocess kill, + # adapter disconnect, session DB close) before systemd escalates to + # SIGKILL on the cgroup — otherwise bash/sleep tool-call children left + # by a force-interrupted agent get reaped by systemd instead of us + # (#8202). 30s of headroom covers the worst case we've observed. + _drain_timeout = int(_get_restart_drain_timeout() or 0) + restart_timeout = max(60, _drain_timeout) + 30 if system: username, group_name, home_dir = _system_service_identity(run_as_user) diff --git a/tests/gateway/test_gateway_shutdown.py b/tests/gateway/test_gateway_shutdown.py index 4dc9919bc7..137ddfd036 100644 --- a/tests/gateway/test_gateway_shutdown.py +++ b/tests/gateway/test_gateway_shutdown.py @@ -145,3 +145,86 @@ async def test_drain_active_agents_throttles_status_updates(): # Start, one count-change update, and final update. Allow one extra update # if the loop observes the zero-agent state before exiting. assert 3 <= runner._update_runtime_status.call_count <= 4 + + +@pytest.mark.asyncio +async def test_gateway_stop_kills_tool_subprocesses_before_adapter_disconnect_on_timeout(monkeypatch): + """On drain timeout, tool subprocesses must be killed BEFORE adapter + disconnect so systemd's TimeoutStopSec doesn't SIGKILL the cgroup with + bash/sleep children still attached (#8202).""" + runner, adapter = make_restart_runner() + runner._restart_drain_timeout = 0.01 # force timeout path + + call_order: list[str] = [] + + def _fake_kill_all(task_id=None): + call_order.append("kill_all") + return 2 + + def _fake_cleanup_envs(): + call_order.append("cleanup_environments") + + def _fake_cleanup_browsers(): + call_order.append("cleanup_browsers") + + async def _disconnect(): + call_order.append("disconnect") + + # Patch the module-level names the stop() helper imports lazily. + import tools.process_registry as _pr + import tools.terminal_tool as _tt + import tools.browser_tool as _bt + monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all) + monkeypatch.setattr(_tt, "cleanup_all_environments", _fake_cleanup_envs) + monkeypatch.setattr(_bt, "cleanup_all_browsers", _fake_cleanup_browsers) + + adapter.disconnect = _disconnect + + runner._running_agents = {"session": MagicMock()} + + with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"): + await runner.stop() + + # First kill_all must precede the first disconnect. (Both the eager + # post-interrupt cleanup and the final catch-all call _kill_tool_ + # subprocesses, so we expect kill_all to appear twice total.) + assert "kill_all" in call_order + assert "disconnect" in call_order + first_kill = call_order.index("kill_all") + first_disconnect = call_order.index("disconnect") + assert first_kill < first_disconnect, ( + f"Tool subprocesses must be killed before adapter disconnect on " + f"drain timeout, got order: {call_order}" + ) + # Defense-in-depth final cleanup still runs. + assert call_order.count("kill_all") >= 2 + + +@pytest.mark.asyncio +async def test_gateway_stop_kills_tool_subprocesses_on_graceful_path(monkeypatch): + """Graceful shutdown (no drain timeout) must still kill tool subprocesses + exactly once via the final catch-all — regression guard against + accidentally removing that call when refactoring.""" + runner, adapter = make_restart_runner() + adapter.disconnect = AsyncMock() + + kill_count = 0 + + def _fake_kill_all(task_id=None): + nonlocal kill_count + kill_count += 1 + return 0 + + import tools.process_registry as _pr + import tools.terminal_tool as _tt + import tools.browser_tool as _bt + monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all) + monkeypatch.setattr(_tt, "cleanup_all_environments", lambda: None) + monkeypatch.setattr(_bt, "cleanup_all_browsers", lambda: None) + + # No running agents → drain returns immediately, no timeout, no eager cleanup. + with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"): + await runner.stop() + + # Only the final catch-all fires on the graceful path. + assert kill_count == 1 diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index 68554a4967..bd429bff2b 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -95,7 +95,10 @@ class TestGeneratedSystemdUnits: assert "ExecStop=" not in unit assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit - assert "TimeoutStopSec=60" in unit + # TimeoutStopSec must exceed the default drain_timeout (60s) so + # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup + # (tool subprocess kill, adapter disconnect) runs — issue #8202. + assert "TimeoutStopSec=90" in unit def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch): monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None) @@ -111,7 +114,10 @@ class TestGeneratedSystemdUnits: assert "ExecStop=" not in unit assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit - assert "TimeoutStopSec=60" in unit + # TimeoutStopSec must exceed the default drain_timeout (60s) so + # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup + # (tool subprocess kill, adapter disconnect) runs — issue #8202. + assert "TimeoutStopSec=90" in unit assert "WantedBy=multi-user.target" in unit From 165b2e481afa1cd7385c5c9f9ebf33eb4a524131 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:59:32 -0700 Subject: [PATCH 024/264] feat(agent): make API retry count configurable via agent.api_max_retries (#14730) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #11616. The agent's API retry loop hardcoded max_retries = 3, so users with fallback providers on flaky primaries burned through ~3 × provider timeout (e.g. 3 × 180s = 9 minutes) before their fallback chain got a chance to kick in. Expose a new config key: agent: api_max_retries: 3 # default unchanged Set it to 1 for fast failover when you have fallback providers, or raise it if you prefer longer tolerance on a single provider. Values < 1 are clamped to 1 (single attempt, no retry); non-integer values fall back to the default. This wraps the Hermes-level retry loop only — the OpenAI SDK's own low-level retries (max_retries=2 default) still run beneath this for transient network errors. Changes: - hermes_cli/config.py: add agent.api_max_retries default 3 with comment. - run_agent.py: read self._api_max_retries in AIAgent.__init__; replace hardcoded max_retries = 3 in the retry loop with self._api_max_retries. - cli-config.yaml.example: documented example entry. - hermes_cli/tips.py: discoverable tip line. - tests/run_agent/test_api_max_retries_config.py: 4 tests covering default, override, clamp-to-one, and invalid-value fallback. --- cli-config.yaml.example | 7 ++ hermes_cli/config.py | 9 +++ hermes_cli/tips.py | 1 + run_agent.py | 13 +++- .../run_agent/test_api_max_retries_config.py | 65 +++++++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 tests/run_agent/test_api_max_retries_config.py diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 64927c2b68..bd63901e12 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -507,6 +507,13 @@ agent: # finish, then interrupts anything still running after this timeout. # 0 = no drain, interrupt immediately. # restart_drain_timeout: 60 + + # Max app-level retry attempts for API errors (connection drops, provider + # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this + # to 1 if you use fallback providers and want fast failover on flaky + # primaries (default 3). The OpenAI SDK does its own low-level retries + # underneath this wrapper — this is the Hermes-level loop. + # api_max_retries: 3 # Enable verbose logging verbose: false diff --git a/hermes_cli/config.py b/hermes_cli/config.py index c78b01b150..36e478a70a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -361,6 +361,15 @@ DEFAULT_CONFIG = { # to finish, then interrupts any remaining runs after the timeout. # 0 = no drain, interrupt immediately. "restart_drain_timeout": 60, + # Max app-level retry attempts for API errors (connection drops, + # provider timeouts, 5xx, etc.) before the agent surfaces the + # failure. The OpenAI SDK already does its own low-level retries + # (max_retries=2 default) for transient network errors; this is + # the Hermes-level retry loop that wraps the whole call. Lower + # this to 1 if you use fallback providers and want fast failover + # on flaky primaries; raise it if you prefer to tolerate longer + # provider hiccups on a single provider. + "api_max_retries": 3, "service_tier": "", # Tool-use enforcement: injects system prompt guidance that tells the # model to actually call tools instead of describing intended actions. diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 24acc15f53..0c1bebe67e 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -289,6 +289,7 @@ TIPS = [ "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.", "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.", "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.", + "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.", "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.", "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.", "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.", diff --git a/run_agent.py b/run_agent.py index 855b67a847..63b0adb429 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1548,6 +1548,17 @@ class AIAgent: _agent_section = {} self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto") + # App-level API retry count (wraps each model API call). Default 3, + # overridable via agent.api_max_retries in config.yaml. See #11616. + try: + _raw_api_retries = _agent_section.get("api_max_retries", 3) + _api_retries = int(_raw_api_retries) + if _api_retries < 1: + _api_retries = 1 # 1 = no retry (single attempt) + except (TypeError, ValueError): + _api_retries = 3 + self._api_max_retries = _api_retries + # Initialize context compressor for automatic context management # Compresses conversation when approaching model's context limit # Configuration via config.yaml (compression section) @@ -9259,7 +9270,7 @@ class AIAgent: api_start_time = time.time() retry_count = 0 - max_retries = 3 + max_retries = self._api_max_retries primary_recovery_attempted = False max_compression_attempts = 3 codex_auth_retry_attempted=False diff --git a/tests/run_agent/test_api_max_retries_config.py b/tests/run_agent/test_api_max_retries_config.py new file mode 100644 index 0000000000..44e859986b --- /dev/null +++ b/tests/run_agent/test_api_max_retries_config.py @@ -0,0 +1,65 @@ +"""Tests for agent.api_max_retries config surface. + +Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API +retry loop user-configurable so fallback-provider setups can fail over +faster on flaky primaries instead of burning ~3x180s on the same stall. +""" +from unittest.mock import MagicMock, patch + +from run_agent import AIAgent + + +def _make_agent(api_max_retries=None): + """Build an AIAgent with a mocked config.load_config that returns a + config tree containing the given agent.api_max_retries (or default).""" + cfg = {"agent": {}} + if api_max_retries is not None: + cfg["agent"]["api_max_retries"] = api_max_retries + + with patch("run_agent.OpenAI"), \ + patch("hermes_cli.config.load_config", return_value=cfg): + return AIAgent( + api_key="test-key", + base_url="https://openrouter.ai/api/v1", + model="test/model", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + +def test_default_api_max_retries_is_three(): + """No config override → legacy default of 3 retries preserved.""" + agent = _make_agent() + assert agent._api_max_retries == 3 + + +def test_api_max_retries_honors_config_override(): + """Setting agent.api_max_retries in config propagates to the agent.""" + agent = _make_agent(api_max_retries=1) + assert agent._api_max_retries == 1 + + agent2 = _make_agent(api_max_retries=5) + assert agent2._api_max_retries == 5 + + +def test_api_max_retries_clamps_below_one_to_one(): + """0 or negative values would disable the retry loop entirely + (the ``while retry_count < max_retries`` guard would never execute), + so clamp to 1 = single attempt, no retry.""" + agent = _make_agent(api_max_retries=0) + assert agent._api_max_retries == 1 + + agent2 = _make_agent(api_max_retries=-3) + assert agent2._api_max_retries == 1 + + +def test_api_max_retries_falls_back_on_invalid_value(): + """Garbage values in config don't crash agent init — fall back to 3.""" + agent = _make_agent(api_max_retries="not-a-number") + assert agent._api_max_retries == 3 + + agent2 = _make_agent(api_max_retries=None) + # None with dict.get default fires → default(3), then int(None) raises + # TypeError → except branch sets to 3. + assert agent2._api_max_retries == 3 From 97b9b3d6a6848579c5c93cd4b5d8d26f6dde34b8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:01:57 -0700 Subject: [PATCH 025/264] fix(gateway): drain-aware hermes update + faster still-working pings (#14736) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cmd_update no longer SIGKILLs in-flight agent runs, and users get 'still working' status every 3 min instead of 10. Two long-standing sources of '@user — agent gives up mid-task' reports on Telegram and other gateways. Drain-aware update: - New helper hermes_cli.gateway._graceful_restart_via_sigusr1(pid, drain_timeout) sends SIGUSR1 to the gateway and polls os.kill(pid, 0) until the process exits or the budget expires. - cmd_update's systemd loop now reads MainPID via 'systemctl show --property=MainPID --value' and tries the graceful path first. The gateway's existing SIGUSR1 handler -> request_restart(via_service= True) -> drain -> exit(75) is wired in gateway/run.py and is respawned by systemd's Restart=on-failure (and the explicit RestartForceExitStatus=75 on newer units). - Falls back to 'systemctl restart' when MainPID is unknown, the drain budget elapses, or the unit doesn't respawn after exit (older units missing Restart=on-failure). Old install behavior preserved. - Drain budget = max(restart_drain_timeout, 30s) + 15s margin so the drain loop in run_agent + final exit have room before fallback fires. Composes with #14728's tool-subprocess reaping. Notification interval: - agent.gateway_notify_interval default 600 -> 180. - HERMES_AGENT_NOTIFY_INTERVAL env-var fallback in gateway/run.py matched. - 9-minute weak-model spinning runs now ping at 3 min and 6 min instead of 27 seconds before completion, removing the 'is the bot dead?' reflex that drives gateway-restart cycles. Tests: - Two new tests in tests/hermes_cli/test_update_gateway_restart.py: one asserts SIGUSR1 is sent and 'systemctl restart' is NOT called when MainPID is known and the helper succeeds; one asserts the fallback fires when the helper returns False. - E2E: spawned detached bash processes confirm the helper returns True on SIGUSR1-handling exit (~0.5s) and False on SIGUSR1-ignoring processes (timeout). Verified non-existent PID and pid=0 edge cases. - 41/41 in test_update_gateway_restart.py (was 39, +2 new). - 154/154 in shutdown-related suites including #14728's new tests. Reported by @GeoffWellman and @ANT_1515 on X. --- gateway/run.py | 4 +- hermes_cli/config.py | 6 +- hermes_cli/gateway.py | 54 ++++++ hermes_cli/main.py | 178 +++++++++++++----- .../hermes_cli/test_update_gateway_restart.py | 146 ++++++++++++++ 5 files changed, 339 insertions(+), 49 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 2c377980cd..3eb932cc24 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10373,9 +10373,9 @@ class GatewayRunner: # Periodic "still working" notifications for long-running tasks. # Fires every N seconds so the user knows the agent hasn't died. # Config: agent.gateway_notify_interval in config.yaml, or - # HERMES_AGENT_NOTIFY_INTERVAL env var. Default 600s (10 min). + # HERMES_AGENT_NOTIFY_INTERVAL env var. Default 180s (3 min). # 0 = disable notifications. - _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600)) + _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180)) _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None _notify_start = time.time() diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 36e478a70a..cfcc7ff28f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -384,7 +384,11 @@ DEFAULT_CONFIG = { # Periodic "still working" notification interval (seconds). # Sends a status message every N seconds so the user knows the # agent hasn't died during long tasks. 0 = disable notifications. - "gateway_notify_interval": 600, + # Lower values mean faster feedback on slow tasks but more chat + # noise; 180s is a compromise that catches spinning weak-model runs + # (60+ tool iterations with tiny output) before users assume the + # bot is dead and /restart. + "gateway_notify_interval": 180, }, "terminal": { diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 9773299d5c..3b828fecf5 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool: return True +def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool: + """Send SIGUSR1 to a gateway PID and wait for it to exit gracefully. + + SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)`` + which drains in-flight agent runs (up to ``agent.restart_drain_timeout`` + seconds), then exits with code 75. Both systemd (``Restart=on-failure`` + + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit + = false``) relaunch the process after the graceful exit. + + This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``, + which SIGKILL in-flight agents after a short timeout. + + Args: + pid: Gateway process PID (systemd MainPID, launchd PID, or bare + process PID). + drain_timeout: Seconds to wait for the process to exit after sending + SIGUSR1. Should be slightly larger than the gateway's + ``agent.restart_drain_timeout`` to allow the drain loop to + finish cleanly. + + Returns: + True if the PID was signalled and exited within the timeout. + False if SIGUSR1 couldn't be sent or the process didn't exit in + time (caller should fall back to a harder restart path). + """ + if not hasattr(signal, "SIGUSR1"): + return False + if pid <= 0: + return False + try: + os.kill(pid, signal.SIGUSR1) + except ProcessLookupError: + # Already gone — nothing to drain. + return True + except (PermissionError, OSError): + return False + + import time as _time + + deadline = _time.monotonic() + max(drain_timeout, 1.0) + while _time.monotonic() < deadline: + try: + os.kill(pid, 0) # signal 0 — probe liveness + except ProcessLookupError: + return True + except PermissionError: + # Process still exists but we can't signal it. Treat as alive + # so the caller falls back. + pass + _time.sleep(0.5) + # Drain didn't finish in time. + return False + + def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None: if pid is None or pid <= 0: return diff --git a/hermes_cli/main.py b/hermes_cli/main.py index cb70261b46..d7de309607 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5864,12 +5864,15 @@ def _cmd_update_impl(args, gateway_mode: bool): # Write exit code *before* the gateway restart attempt. # When running as ``hermes update --gateway`` (spawned by the gateway's # /update command), this process lives inside the gateway's systemd - # cgroup. ``systemctl restart hermes-gateway`` kills everything in the - # cgroup (KillMode=mixed → SIGKILL to remaining processes), including - # us and the wrapping bash shell. The shell never reaches its - # ``printf $status > .update_exit_code`` epilogue, so the exit-code - # marker file is never created. The new gateway's update watcher then - # polls for 30 minutes and sends a spurious timeout message. + # cgroup. A graceful SIGUSR1 restart keeps the drain loop alive long + # enough for the exit-code marker to be written below, but the + # fallback ``systemctl restart`` path (see below) kills everything in + # the cgroup (KillMode=mixed → SIGKILL to remaining processes), + # including us and the wrapping bash shell. The shell never reaches + # its ``printf $status > .update_exit_code`` epilogue, so the + # exit-code marker file would never be created. The new gateway's + # update watcher would then poll for 30 minutes and send a spurious + # timeout message. # # Writing the marker here — after git pull + pip install succeed but # before we attempt the restart — ensures the new gateway sees it @@ -5891,9 +5894,37 @@ def _cmd_update_impl(args, gateway_mode: bool): _ensure_user_systemd_env, find_gateway_pids, _get_service_pids, + _graceful_restart_via_sigusr1, ) import signal as _signal + # Drain budget for graceful SIGUSR1 restarts. The gateway drains + # for up to ``agent.restart_drain_timeout`` (default 60s) before + # exiting with code 75; we wait slightly longer so the drain + # completes before we fall back to a hard restart. On older + # systemd units without SIGUSR1 wiring this wait just times out + # and we fall back to ``systemctl restart`` (the old behaviour). + try: + from hermes_constants import ( + DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN, + ) + except Exception: + _DEFAULT_DRAIN = 60.0 + _cfg_drain = None + try: + from hermes_cli.config import load_config + _cfg_agent = (load_config().get("agent") or {}) + _cfg_drain = _cfg_agent.get("restart_drain_timeout") + except Exception: + pass + try: + _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN) + except (TypeError, ValueError): + _drain_budget = float(_DEFAULT_DRAIN) + # Add a 15s margin so the drain loop + final exit finish before + # we escalate to ``systemctl restart`` / SIGTERM. + _drain_budget = max(_drain_budget, 30.0) + 15.0 + restarted_services = [] killed_pids = set() @@ -5940,59 +5971,114 @@ def _cmd_update_impl(args, gateway_mode: bool): text=True, timeout=5, ) - if check.stdout.strip() == "active": - restart = subprocess.run( - scope_cmd + ["restart", svc_name], + if check.stdout.strip() != "active": + continue + + # Prefer a graceful SIGUSR1 restart so in-flight + # agent runs drain instead of being SIGKILLed. + # The gateway's SIGUSR1 handler calls + # request_restart(via_service=True) → drain → + # exit(75); systemd's Restart=on-failure (and + # RestartForceExitStatus=75) respawns the unit. + _main_pid = 0 + try: + _show = subprocess.run( + scope_cmd + [ + "show", svc_name, + "--property=MainPID", "--value", + ], + capture_output=True, text=True, timeout=5, + ) + _main_pid = int((_show.stdout or "").strip() or 0) + except (ValueError, subprocess.TimeoutExpired, FileNotFoundError): + _main_pid = 0 + + _graceful_ok = False + if _main_pid > 0: + print( + f" → {svc_name}: draining (up to {int(_drain_budget)}s)..." + ) + _graceful_ok = _graceful_restart_via_sigusr1( + _main_pid, drain_timeout=_drain_budget, + ) + + if _graceful_ok: + # Gateway exited 75; systemd should relaunch + # via Restart=on-failure. Verify the new + # process came up. + _time.sleep(3) + verify = subprocess.run( + scope_cmd + ["is-active", svc_name], + capture_output=True, text=True, timeout=5, + ) + if verify.stdout.strip() == "active": + restarted_services.append(svc_name) + continue + # Process exited but wasn't respawned (older + # unit without Restart=on-failure or + # RestartForceExitStatus=75). Fall through + # to systemctl start/restart. + print( + f" ⚠ {svc_name} drained but didn't relaunch — forcing restart" + ) + + # Fallback: blunt systemctl restart. This is + # what the old code always did; we get here only + # when the graceful path failed (unit missing + # SIGUSR1 wiring, drain exceeded the budget, + # restart-policy mismatch). + restart = subprocess.run( + scope_cmd + ["restart", svc_name], + capture_output=True, + text=True, + timeout=15, + ) + if restart.returncode == 0: + # Verify the service actually survived the + # restart. systemctl restart returns 0 even + # if the new process crashes immediately. + _time.sleep(3) + verify = subprocess.run( + scope_cmd + ["is-active", svc_name], capture_output=True, text=True, - timeout=15, + timeout=5, ) - if restart.returncode == 0: - # Verify the service actually survived the - # restart. systemctl restart returns 0 even - # if the new process crashes immediately. + if verify.stdout.strip() == "active": + restarted_services.append(svc_name) + else: + # Retry once — transient startup failures + # (stale module cache, import race) often + # resolve on the second attempt. + print( + f" ⚠ {svc_name} died after restart, retrying..." + ) + retry = subprocess.run( + scope_cmd + ["restart", svc_name], + capture_output=True, + text=True, + timeout=15, + ) _time.sleep(3) - verify = subprocess.run( + verify2 = subprocess.run( scope_cmd + ["is-active", svc_name], capture_output=True, text=True, timeout=5, ) - if verify.stdout.strip() == "active": + if verify2.stdout.strip() == "active": restarted_services.append(svc_name) + print(f" ✓ {svc_name} recovered on retry") else: - # Retry once — transient startup failures - # (stale module cache, import race) often - # resolve on the second attempt. print( - f" ⚠ {svc_name} died after restart, retrying..." + f" ✗ {svc_name} failed to stay running after restart.\n" + f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n" + f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}" ) - retry = subprocess.run( - scope_cmd + ["restart", svc_name], - capture_output=True, - text=True, - timeout=15, - ) - _time.sleep(3) - verify2 = subprocess.run( - scope_cmd + ["is-active", svc_name], - capture_output=True, - text=True, - timeout=5, - ) - if verify2.stdout.strip() == "active": - restarted_services.append(svc_name) - print(f" ✓ {svc_name} recovered on retry") - else: - print( - f" ✗ {svc_name} failed to stay running after restart.\n" - f" Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n" - f" Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}" - ) - else: - print( - f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}" - ) + else: + print( + f" ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}" + ) except (FileNotFoundError, subprocess.TimeoutExpired): pass diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 2a2bc962d8..1c7e1b96c9 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -422,6 +422,152 @@ class TestCmdUpdateLaunchdRestart: ] assert len(restart_calls) == 1 + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_prefers_sigusr1_over_systemctl_restart_when_mainpid_known( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """Drain-aware update: when systemctl show reports a MainPID, the + update path sends SIGUSR1 and waits for graceful exit + respawn, + instead of ``systemctl restart`` (which SIGKILLs in-flight agents). + """ + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) + monkeypatch.setattr(gateway_cli, "is_termux", lambda: False) + + # Track state: before kill → "active" (old PID), + # after kill + exit → briefly inactive, then "active" again (new PID). + state = {"killed": False} + + def side_effect(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + + if "rev-parse" in joined and "--abbrev-ref" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="") + if "rev-parse" in joined and "--verify" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + if "rev-list" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="") + + # Only expose a user-scope service. + if "systemctl" in joined and "list-units" in joined: + if "--user" in joined: + return subprocess.CompletedProcess( + cmd, 0, + stdout="hermes-gateway.service loaded active running\n", + stderr="", + ) + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + if "systemctl" in joined and "is-active" in joined: + # Pre-kill: active. Post-kill: active again (respawned by + # Restart=on-failure). The drain loop verifies liveness + # separately via os.kill(pid, 0). + return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="") + + # The new code path. + if "systemctl" in joined and "show" in joined and "MainPID" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="") + + # If systemctl restart is called, this test fails its intent — + # but still let it succeed so we can assert it was NOT called. + if "systemctl" in joined and "restart" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="") + + mock_run.side_effect = side_effect + + # Track SIGUSR1 delivery and simulate the gateway draining + exiting. + sigusr1_sent = {"value": False} + + def fake_kill(pid, sig): + import signal as _s + if pid == 4242 and sig == _s.SIGUSR1: + sigusr1_sent["value"] = True + state["killed"] = True + return + if pid == 4242 and sig == 0: + # Liveness probe — report dead once SIGUSR1 has been sent. + if state["killed"]: + raise ProcessLookupError() + return + # For any other PID/sig combination, succeed silently. + return + + monkeypatch.setattr("os.kill", fake_kill) + + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): + cmd_update(mock_args) + + # SIGUSR1 must have been delivered to the gateway MainPID. + assert sigusr1_sent["value"], "Expected SIGUSR1 to be sent to MainPID" + + # And `systemctl restart` must NOT have been used (that's the + # non-draining kill-everything path we're moving away from). + restart_calls = [ + c for c in mock_run.call_args_list + if "systemctl" in " ".join(str(a) for a in c.args[0]) + and "restart" in " ".join(str(a) for a in c.args[0]) + ] + assert restart_calls == [], ( + "Graceful SIGUSR1 succeeded; `systemctl restart` should not " + f"have been called. Got: {restart_calls}" + ) + + captured = capsys.readouterr().out + assert "draining" in captured.lower() + assert "Restarted hermes-gateway" in captured + + @patch("shutil.which", return_value=None) + @patch("subprocess.run") + def test_update_falls_back_to_systemctl_restart_when_sigusr1_times_out( + self, mock_run, _mock_which, mock_args, capsys, monkeypatch, + ): + """If the gateway doesn't exit within the drain budget (e.g. old unit + missing ``Restart=on-failure`` or an agent ignoring SIGUSR1), the + update path falls back to ``systemctl restart``. + """ + monkeypatch.setattr(gateway_cli, "is_macos", lambda: False) + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) + monkeypatch.setattr(gateway_cli, "is_termux", lambda: False) + + mock_run.side_effect = _make_run_side_effect( + commit_count="3", + systemd_active=True, + ) + + # Patch systemctl show to report MainPID=4242 so cmd_update attempts + # the graceful path. + orig = mock_run.side_effect + def wrapped(cmd, **kwargs): + joined = " ".join(str(c) for c in cmd) + if "systemctl" in joined and "show" in joined and "MainPID" in joined: + return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="") + return orig(cmd, **kwargs) + mock_run.side_effect = wrapped + + # Simulate the drain helper failing to confirm a clean exit — either + # because the gateway ignored SIGUSR1 or the drain budget was + # exceeded. cmd_update() should detect this and escalate. + monkeypatch.setattr( + "hermes_cli.gateway._graceful_restart_via_sigusr1", + lambda pid, drain_timeout: False, + ) + + with patch.object(gateway_cli, "find_gateway_pids", return_value=[]): + cmd_update(mock_args) + + # Fallback kicked in → systemctl restart was called. + restart_calls = [ + c for c in mock_run.call_args_list + if "systemctl" in " ".join(str(a) for a in c.args[0]) + and "restart" in " ".join(str(a) for a in c.args[0]) + ] + assert len(restart_calls) >= 1, ( + "Drain path failed; expected fallback `systemctl restart`." + ) + @patch("shutil.which", return_value=None) @patch("subprocess.run") def test_update_no_gateway_running_skips_restart( From 07046096d96bcf4db61fa0c12f79504f92f2a21c Mon Sep 17 00:00:00 2001 From: sgaofen <135070653+sgaofen@users.noreply.github.com> Date: Thu, 23 Apr 2026 09:54:12 -0700 Subject: [PATCH 026/264] fix(agent): clarify exhausted OpenRouter auxiliary credentials --- agent/auxiliary_client.py | 19 +++++++++++++++++-- tests/agent/test_auxiliary_client.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 5735648f13..e812a337f5 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -916,6 +916,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: default_headers=_OR_HEADERS), _OPENROUTER_MODEL +def _describe_openrouter_unavailable() -> str: + """Return a more precise OpenRouter auth failure reason for logs.""" + pool_present, entry = _select_pool_entry("openrouter") + if pool_present: + if entry is None: + return "OpenRouter credential pool has no usable entries (credentials may be exhausted)" + if not _pool_runtime_api_key(entry): + return "OpenRouter credential pool entry is missing a runtime API key" + if not str(os.getenv("OPENROUTER_API_KEY") or "").strip(): + return "OPENROUTER_API_KEY not set" + return "no usable OpenRouter credentials found" + + def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: # Check cross-session rate limit guard before attempting Nous — # if another session already recorded a 429, skip Nous entirely @@ -1627,8 +1640,10 @@ def resolve_provider_client( if provider == "openrouter": client, default = _try_openrouter() if client is None: - logger.warning("resolve_provider_client: openrouter requested " - "but OPENROUTER_API_KEY not set") + logger.warning( + "resolve_provider_client: openrouter requested but %s", + _describe_openrouter_unavailable(), + ) return None, None final_model = _normalize_resolved_model(model or default, provider) return (_to_async_client(client, final_model) if async_mode diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 4c775b8a6c..b5b74bd309 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -447,6 +447,34 @@ class TestExplicitProviderRouting: adapter = client.chat.completions assert adapter._is_oauth is False + def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)): + with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): + client, model = resolve_provider_client("openrouter") + assert client is None + assert model is None + assert any( + "credential pool has no usable entries" in record.message + for record in caplog.records + ) + assert not any( + "OPENROUTER_API_KEY not set" in record.message + for record in caplog.records + ) + + def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"): + client, model = resolve_provider_client("openrouter") + assert client is None + assert model is None + assert any( + "OPENROUTER_API_KEY not set" in record.message + for record in caplog.records + ) + class TestGetTextAuxiliaryClient: """Test the full resolution chain for get_text_auxiliary_client.""" From 1cc0bdd5f306effd91ec23f0c8d7044acf22473c Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:59:02 -0600 Subject: [PATCH 027/264] fix(dashboard): avoid auth header collision with reverse proxies --- hermes_cli/web_server.py | 28 +++++++++---- tests/hermes_cli/test_web_server.py | 63 ++++++++++++++++++++++------- web/src/lib/api.ts | 21 ++++++---- 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 10b92f69a9..ca473b0a58 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -71,6 +71,7 @@ app = FastAPI(title="Hermes Agent", version=__version__) # Injected into the SPA HTML so only the legitimate web UI can use it. # --------------------------------------------------------------------------- _SESSION_TOKEN = secrets.token_urlsafe(32) +_SESSION_HEADER_NAME = "X-Hermes-Session-Token" # Simple rate limiter for the reveal endpoint _reveal_timestamps: List[float] = [] @@ -104,14 +105,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({ }) -def _require_token(request: Request) -> None: - """Validate the ephemeral session token. Raises 401 on mismatch. +def _has_valid_session_token(request: Request) -> bool: + """True if the request carries a valid dashboard session token. - Uses ``hmac.compare_digest`` to prevent timing side-channels. + The dedicated session header avoids collisions with reverse proxies that + already use ``Authorization`` (for example Caddy ``basic_auth``). We still + accept the legacy Bearer path for backward compatibility with older + dashboard bundles. """ + session_header = request.headers.get(_SESSION_HEADER_NAME, "") + if session_header and hmac.compare_digest( + session_header.encode(), + _SESSION_TOKEN.encode(), + ): + return True + auth = request.headers.get("authorization", "") expected = f"Bearer {_SESSION_TOKEN}" - if not hmac.compare_digest(auth.encode(), expected.encode()): + return hmac.compare_digest(auth.encode(), expected.encode()) + + +def _require_token(request: Request) -> None: + """Validate the ephemeral session token. Raises 401 on mismatch.""" + if not _has_valid_session_token(request): raise HTTPException(status_code=401, detail="Unauthorized") @@ -205,9 +221,7 @@ async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" path = request.url.path if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"): - auth = request.headers.get("authorization", "") - expected = f"Bearer {_SESSION_TOKEN}" - if not hmac.compare_digest(auth.encode(), expected.encode()): + if not _has_valid_session_token(request): return JSONResponse( status_code=401, content={"detail": "Unauthorized"}, diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index 572549bd40..1f3a78b991 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -110,12 +110,12 @@ class TestWebServerEndpoints: import hermes_state from hermes_constants import get_hermes_home - from hermes_cli.web_server import app, _SESSION_TOKEN + from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db") self.client = TestClient(app) - self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" + self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN def test_get_status(self): resp = self.client.get("/api/status") @@ -221,12 +221,12 @@ class TestWebServerEndpoints: def test_reveal_env_var(self, tmp_path): """POST /api/env/reveal should return the real unredacted value.""" from hermes_cli.config import save_env_value - from hermes_cli.web_server import _SESSION_TOKEN + from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345") resp = self.client.post( "/api/env/reveal", json={"key": "TEST_REVEAL_KEY"}, - headers={"Authorization": f"Bearer {_SESSION_TOKEN}"}, + headers={_SESSION_HEADER_NAME: _SESSION_TOKEN}, ) assert resp.status_code == 200 data = resp.json() @@ -235,11 +235,11 @@ class TestWebServerEndpoints: def test_reveal_env_var_not_found(self): """POST /api/env/reveal should 404 for unknown keys.""" - from hermes_cli.web_server import _SESSION_TOKEN + from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN resp = self.client.post( "/api/env/reveal", json={"key": "NONEXISTENT_KEY_XYZ"}, - headers={"Authorization": f"Bearer {_SESSION_TOKEN}"}, + headers={_SESSION_HEADER_NAME: _SESSION_TOKEN}, ) assert resp.status_code == 404 @@ -249,7 +249,7 @@ class TestWebServerEndpoints: from hermes_cli.web_server import app from hermes_cli.config import save_env_value save_env_value("TEST_REVEAL_NOAUTH", "secret-value") - # Use a fresh client WITHOUT the Authorization header + # Use a fresh client WITHOUT the dashboard session header unauth_client = TestClient(app) resp = unauth_client.post( "/api/env/reveal", @@ -260,14 +260,47 @@ class TestWebServerEndpoints: def test_reveal_env_var_bad_token(self, tmp_path): """POST /api/env/reveal with wrong token should return 401.""" from hermes_cli.config import save_env_value + from hermes_cli.web_server import _SESSION_HEADER_NAME save_env_value("TEST_REVEAL_BADAUTH", "secret-value") resp = self.client.post( "/api/env/reveal", json={"key": "TEST_REVEAL_BADAUTH"}, - headers={"Authorization": "Bearer wrong-token-here"}, + headers={_SESSION_HEADER_NAME: "wrong-token-here"}, ) assert resp.status_code == 401 + def test_reveal_env_var_custom_session_header_ignores_proxy_authorization(self, tmp_path): + """A valid dashboard session header should coexist with proxy auth.""" + from hermes_cli.config import save_env_value + from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN + + save_env_value("TEST_REVEAL_PROXY_AUTH", "secret-value") + resp = self.client.post( + "/api/env/reveal", + json={"key": "TEST_REVEAL_PROXY_AUTH"}, + headers={ + _SESSION_HEADER_NAME: _SESSION_TOKEN, + "Authorization": "Basic dXNlcjpwYXNz", + }, + ) + + assert resp.status_code == 200 + assert resp.json()["value"] == "secret-value" + + def test_reveal_env_var_legacy_authorization_header_still_works(self, tmp_path): + """Keep old dashboard bundles working while the new header rolls out.""" + from hermes_cli.config import save_env_value + from hermes_cli.web_server import _SESSION_TOKEN + + save_env_value("TEST_REVEAL_LEGACY_AUTH", "secret-value") + resp = self.client.post( + "/api/env/reveal", + json={"key": "TEST_REVEAL_LEGACY_AUTH"}, + headers={"Authorization": f"Bearer {_SESSION_TOKEN}"}, + ) + + assert resp.status_code == 200 + def test_session_token_endpoint_removed(self): """GET /api/auth/session-token should no longer exist (token injected via HTML).""" resp = self.client.get("/api/auth/session-token") @@ -285,7 +318,7 @@ class TestWebServerEndpoints: """API requests without the session token should be rejected.""" from starlette.testclient import TestClient from hermes_cli.web_server import app - # Create a client WITHOUT the Authorization header + # Create a client WITHOUT the dashboard session header unauth_client = TestClient(app) resp = unauth_client.get("/api/env") assert resp.status_code == 401 @@ -388,9 +421,9 @@ class TestConfigRoundTrip: from starlette.testclient import TestClient except ImportError: pytest.skip("fastapi/starlette not installed") - from hermes_cli.web_server import app, _SESSION_TOKEN + from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN self.client = TestClient(app) - self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" + self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN def test_get_config_no_internal_keys(self): """GET /api/config should not expose _config_version or _model_meta.""" @@ -524,12 +557,12 @@ class TestNewEndpoints: import hermes_state from hermes_constants import get_hermes_home - from hermes_cli.web_server import app, _SESSION_TOKEN + from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db") self.client = TestClient(app) - self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" + self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN def test_get_logs_default(self): resp = self.client.get("/api/logs") @@ -1176,9 +1209,9 @@ class TestStatusRemoteGateway: except ImportError: pytest.skip("fastapi/starlette not installed") - from hermes_cli.web_server import app, _SESSION_TOKEN + from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN self.client = TestClient(app) - self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" + self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN def test_status_falls_back_to_remote_probe(self, monkeypatch): """When local PID check fails and remote probe succeeds, gateway shows running.""" diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index 45c0618a5f..a44c2f5e28 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -10,13 +10,20 @@ declare global { } } let _sessionToken: string | null = null; +const SESSION_HEADER = "X-Hermes-Session-Token"; + +function setSessionHeader(headers: Headers, token: string): void { + if (!headers.has(SESSION_HEADER)) { + headers.set(SESSION_HEADER, token); + } +} export async function fetchJSON(url: string, init?: RequestInit): Promise { // Inject the session token into all /api/ requests. const headers = new Headers(init?.headers); const token = window.__HERMES_SESSION_TOKEN__; - if (token && !headers.has("Authorization")) { - headers.set("Authorization", `Bearer ${token}`); + if (token) { + setSessionHeader(headers, token); } const res = await fetch(`${BASE}${url}`, { ...init, headers }); if (!res.ok) { @@ -92,7 +99,7 @@ export const api = { method: "POST", headers: { "Content-Type": "application/json", - Authorization: `Bearer ${token}`, + [SESSION_HEADER]: token, }, body: JSON.stringify({ key }), }); @@ -138,7 +145,7 @@ export const api = { `/api/providers/oauth/${encodeURIComponent(providerId)}`, { method: "DELETE", - headers: { Authorization: `Bearer ${token}` }, + headers: { [SESSION_HEADER]: token }, }, ); }, @@ -150,7 +157,7 @@ export const api = { method: "POST", headers: { "Content-Type": "application/json", - Authorization: `Bearer ${token}`, + [SESSION_HEADER]: token, }, body: "{}", }, @@ -164,7 +171,7 @@ export const api = { method: "POST", headers: { "Content-Type": "application/json", - Authorization: `Bearer ${token}`, + [SESSION_HEADER]: token, }, body: JSON.stringify({ session_id: sessionId, code }), }, @@ -180,7 +187,7 @@ export const api = { `/api/providers/oauth/sessions/${encodeURIComponent(sessionId)}`, { method: "DELETE", - headers: { Authorization: `Bearer ${token}` }, + headers: { [SESSION_HEADER]: token }, }, ); }, From 1dfcda4e3c195a03e51a3c69a88f18ebf525eeca Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Thu, 23 Apr 2026 12:35:16 -0600 Subject: [PATCH 028/264] fix(approval): guard env and config overwrites --- run_agent.py | 1 + tests/run_agent/test_run_agent.py | 8 +++++ tests/tools/test_approval.py | 53 ++++++++++++++++++++++++++++++- tools/approval.py | 7 ++++ 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 63b0adb429..3a26fdeadf 100644 --- a/run_agent.py +++ b/run_agent.py @@ -262,6 +262,7 @@ _MAX_TOOL_WORKERS = 8 _DESTRUCTIVE_PATTERNS = re.compile( r"""(?:^|\s|&&|\|\||;|`)(?: rm\s|rmdir\s| + cp\s|install\s| mv\s| sed\s+-i| truncate\s| diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 991ca07d24..8d5e21f11f 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -44,6 +44,14 @@ def _make_tool_defs(*names: str) -> list: ] +def test_is_destructive_command_treats_cp_as_mutating(): + assert run_agent._is_destructive_command("cp .env.local .env") is True + + +def test_is_destructive_command_treats_install_as_mutating(): + assert run_agent._is_destructive_command("install template.env .env") is True + + @pytest.fixture() def agent(): """Minimal AIAgent with mocked OpenAI client and tool loading.""" diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py index 2d7bfe6b0a..a752b68d7b 100644 --- a/tests/tools/test_approval.py +++ b/tests/tools/test_approval.py @@ -434,6 +434,58 @@ class TestSensitiveRedirectPattern: assert dangerous is False assert key is None + def test_redirect_to_local_dotenv_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("echo TOKEN=x > .env") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_redirect_to_nested_config_yaml_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("echo mode: prod > deploy/config.yaml") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_redirect_from_local_dotenv_source_is_safe(self): + dangerous, key, desc = detect_dangerous_command("cat .env > backup.txt") + assert dangerous is False + assert key is None + assert desc is None + + +class TestProjectSensitiveCopyPattern: + def test_cp_to_local_dotenv_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("cp .env.local .env") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_mv_to_nested_config_yaml_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("mv tmp/generated.yaml config/config.yaml") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_install_to_dotenv_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("install -m 600 template.env .env.production") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_cp_from_config_yaml_source_is_safe(self): + dangerous, key, desc = detect_dangerous_command("cp config.yaml backup.yaml") + assert dangerous is False + assert key is None + assert desc is None + + +class TestProjectSensitiveTeePattern: + def test_tee_to_local_dotenv_requires_approval(self): + dangerous, key, desc = detect_dangerous_command("printenv | tee .env.local") + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + class TestPatternKeyUniqueness: """Bug: pattern_key is derived by splitting on \\b and taking [1], so @@ -836,4 +888,3 @@ class TestChmodExecuteCombo: cmd = "chmod +x script.sh" dangerous, _, _ = detect_dangerous_command(cmd) assert dangerous is False - diff --git a/tools/approval.py b/tools/approval.py index fc344bd77b..d88b3e8770 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -63,11 +63,15 @@ _HERMES_ENV_PATH = ( r'(?:\$hermes_home|\$\{hermes_home\})/)' r'\.env\b' ) +_PROJECT_ENV_PATH = r'(?:(?:\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)' +_PROJECT_CONFIG_PATH = r'(?:(?:\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)' _SENSITIVE_WRITE_TARGET = ( r'(?:/etc/|/dev/sd|' rf'{_SSH_SENSITIVE_PATH}|' rf'{_HERMES_ENV_PATH})' ) +_PROJECT_SENSITIVE_WRITE_TARGET = rf'(?:{_PROJECT_ENV_PATH}|{_PROJECT_CONFIG_PATH})' +_COMMAND_TAIL = r'(?:\s*(?:&&|\|\||;).*)?$' # ========================================================================= # Dangerous command patterns @@ -99,6 +103,8 @@ DANGEROUS_PATTERNS = [ (r'\b(bash|sh|zsh|ksh)\s+<\s*>?\s*["\']?{_SENSITIVE_WRITE_TARGET}', "overwrite system file via redirection"), + (rf'\btee\b.*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via tee"), + (rf'>>?\s*["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config via redirection"), (r'\bxargs\s+.*\brm\b', "xargs with rm"), (r'\bfind\b.*-exec\s+(/\S*/)?rm\b', "find -exec rm"), (r'\bfind\b.*-delete\b', "find -delete"), @@ -120,6 +126,7 @@ DANGEROUS_PATTERNS = [ (r'\bkill\b.*`\s*pgrep\b', "kill process via backtick pgrep expansion (self-termination)"), # File copy/move/edit into sensitive system paths (r'\b(cp|mv|install)\b.*\s/etc/', "copy/move file into /etc/"), + (rf'\b(cp|mv|install)\b.*\s["\']?{_PROJECT_SENSITIVE_WRITE_TARGET}["\']?{_COMMAND_TAIL}', "overwrite project env/config file"), (r'\bsed\s+-[^\s]*i.*\s/etc/', "in-place edit of system config"), (r'\bsed\s+--in-place\b.*\s/etc/', "in-place edit of system config (long flag)"), # Script execution via heredoc — bypasses the -e/-c flag patterns above. From b848ce2c79bfff4c0d470113085cf1ec45aa8877 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 14:05:15 -0700 Subject: [PATCH 029/264] test: cover absolute paths in project env/config approval regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original regex only matched relative paths (./foo/.env or bare .env), so the exact command from the bug report — `cp /opt/data/.env.local /opt/data/.env` — did not trigger approval. Broaden the leading-path prefix to accept an absolute leading slash alongside ./ and ../, and add regressions for the bug-report command and its redirection variant. --- tests/tools/test_approval.py | 18 ++++++++++++++++++ tools/approval.py | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py index a752b68d7b..476fd0d32d 100644 --- a/tests/tools/test_approval.py +++ b/tests/tools/test_approval.py @@ -460,6 +460,24 @@ class TestProjectSensitiveCopyPattern: assert key is not None assert "project env/config" in desc.lower() + def test_cp_absolute_path_to_dotenv_requires_approval(self): + # Regression: the real-world bug report was `cp /opt/data/.env.local /opt/data/.env`. + # The regex must cover absolute paths, not just `./` / bare relative paths. + dangerous, key, desc = detect_dangerous_command( + "cp /opt/data/.env.local /opt/data/.env" + ) + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + + def test_redirect_absolute_path_to_dotenv_requires_approval(self): + dangerous, key, desc = detect_dangerous_command( + "cat /opt/data/.env.local > /opt/data/.env" + ) + assert dangerous is True + assert key is not None + assert "project env/config" in desc.lower() + def test_mv_to_nested_config_yaml_requires_approval(self): dangerous, key, desc = detect_dangerous_command("mv tmp/generated.yaml config/config.yaml") assert dangerous is True diff --git a/tools/approval.py b/tools/approval.py index d88b3e8770..258f66b6e9 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -63,8 +63,8 @@ _HERMES_ENV_PATH = ( r'(?:\$hermes_home|\$\{hermes_home\})/)' r'\.env\b' ) -_PROJECT_ENV_PATH = r'(?:(?:\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)' -_PROJECT_CONFIG_PATH = r'(?:(?:\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)' +_PROJECT_ENV_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*\.env(?:\.[^/\s"\'`]+)*)' +_PROJECT_CONFIG_PATH = r'(?:(?:/|\.{1,2}/)?(?:[^\s/"\'`]+/)*config\.yaml)' _SENSITIVE_WRITE_TARGET = ( r'(?:/etc/|/dev/sd|' rf'{_SSH_SENSITIVE_PATH}|' From a884f6d5d8cc3e461b611788a7daa027fe0a24ef Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Thu, 23 Apr 2026 11:39:04 -0600 Subject: [PATCH 030/264] fix(skills): follow symlinked category dirs consistently --- agent/skill_commands.py | 4 +- tests/agent/test_skill_commands.py | 26 +++++++++++ tests/tools/test_skills_tool.py | 72 ++++++++++++++++++++++++++++++ tools/skills_tool.py | 17 ++++--- 4 files changed, 112 insertions(+), 7 deletions(-) diff --git a/agent/skill_commands.py b/agent/skill_commands.py index a4345ca8c4..9c130ab84a 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -345,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]: _skill_commands = {} try: from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names - from agent.skill_utils import get_external_skills_dirs + from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files disabled = _get_disabled_skill_names() seen_names: set = set() @@ -356,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]: dirs_to_scan.extend(get_external_skills_dirs()) for scan_dir in dirs_to_scan: - for skill_md in scan_dir.rglob("SKILL.md"): + for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"): if any(part in ('.git', '.github', '.hub') for part in skill_md.parts): continue try: diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index e399db619e..bf8742690c 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -38,6 +38,18 @@ description: Description for {name}. return skill_dir +def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path: + """Create a category symlink under skills_dir pointing outside the tree.""" + external_category = linked_root / category + external_category.mkdir(parents=True, exist_ok=True) + symlink_path = skills_dir / category + try: + symlink_path.symlink_to(external_category, target_is_directory=True) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"symlinks unavailable in test environment: {exc}") + return external_category + + class TestScanSkillCommands: def test_finds_skills(self, tmp_path): with patch("tools.skills_tool.SKILLS_DIR", tmp_path): @@ -101,6 +113,20 @@ class TestScanSkillCommands: assert "/enabled-skill" in result assert "/disabled-skill" not in result + def test_finds_skills_in_symlinked_category_dir(self, tmp_path): + external_root = tmp_path / "repo" + skills_root = tmp_path / "skills" + skills_root.mkdir() + + external_category = _symlink_category(skills_root, external_root, "linked") + _make_skill(external_category.parent, "knowledge-brain", category="linked") + + with patch("tools.skills_tool.SKILLS_DIR", skills_root): + result = scan_skill_commands() + + assert "/knowledge-brain" in result + assert result["/knowledge-brain"]["name"] == "knowledge-brain" + def test_special_chars_stripped_from_cmd_key(self, tmp_path): """Skill names with +, /, or other special chars produce clean cmd keys.""" diff --git a/tests/tools/test_skills_tool.py b/tests/tools/test_skills_tool.py index 2a21f06b5f..3cdfa98a90 100644 --- a/tests/tools/test_skills_tool.py +++ b/tests/tools/test_skills_tool.py @@ -44,6 +44,18 @@ description: Description for {name}. return skill_dir +def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path: + """Create a category symlink under skills_dir pointing outside the tree.""" + external_category = linked_root / category + external_category.mkdir(parents=True, exist_ok=True) + symlink_path = skills_dir / category + try: + symlink_path.symlink_to(external_category, target_is_directory=True) + except (OSError, NotImplementedError) as exc: + pytest.skip(f"symlinks unavailable in test environment: {exc}") + return external_category + + # --------------------------------------------------------------------------- # _parse_frontmatter # --------------------------------------------------------------------------- @@ -255,6 +267,20 @@ class TestFindAllSkills: assert len(skills) == 1 assert skills[0]["name"] == "real-skill" + def test_finds_skills_in_symlinked_category_dir(self, tmp_path): + external_root = tmp_path / "repo" + skills_root = tmp_path / "skills" + skills_root.mkdir() + + external_category = _symlink_category(skills_root, external_root, "linked") + _make_skill(external_category.parent, "knowledge-brain", category="linked") + + with patch("tools.skills_tool.SKILLS_DIR", skills_root): + skills = _find_all_skills() + + assert [s["name"] for s in skills] == ["knowledge-brain"] + assert skills[0]["category"] == "linked" + # --------------------------------------------------------------------------- # skills_list @@ -288,6 +314,23 @@ class TestSkillsList: assert result["count"] == 1 assert result["skills"][0]["name"] == "skill-a" + def test_category_filter_finds_symlinked_category(self, tmp_path): + external_root = tmp_path / "repo" + skills_root = tmp_path / "skills" + skills_root.mkdir() + + external_category = _symlink_category(skills_root, external_root, "linked") + _make_skill(external_category.parent, "knowledge-brain", category="linked") + + with patch("tools.skills_tool.SKILLS_DIR", skills_root): + raw = skills_list(category="linked") + + result = json.loads(raw) + assert result["success"] is True + assert result["count"] == 1 + assert result["categories"] == ["linked"] + assert result["skills"][0]["name"] == "knowledge-brain" + # --------------------------------------------------------------------------- # skill_view @@ -389,6 +432,35 @@ class TestSkillView: result = json.loads(raw) assert result["success"] is True + def test_view_finds_skill_in_symlinked_category_dir(self, tmp_path): + external_root = tmp_path / "repo" + skills_root = tmp_path / "skills" + skills_root.mkdir() + + external_category = _symlink_category(skills_root, external_root, "linked") + _make_skill(external_category.parent, "knowledge-brain", category="linked") + + with patch("tools.skills_tool.SKILLS_DIR", skills_root): + raw = skill_view("knowledge-brain") + + result = json.loads(raw) + assert result["success"] is True + assert result["name"] == "knowledge-brain" + + def test_not_found_hint_uses_same_order_as_skills_list(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill(tmp_path, "zeta", category="z-cat") + _make_skill(tmp_path, "alpha", category="a-cat") + _make_skill(tmp_path, "beta", category="a-cat") + + list_result = json.loads(skills_list()) + view_result = json.loads(skill_view("missing-skill")) + + assert view_result["success"] is False + assert view_result["available_skills"] == [ + skill["name"] for skill in list_result["skills"] + ] + class TestSkillViewSecureSetupOnLoad: def test_requests_missing_required_env_and_continues(self, tmp_path, monkeypatch): diff --git a/tools/skills_tool.py b/tools/skills_tool.py index 40a6990ea1..8bf92ef08d 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -554,7 +554,7 @@ def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]: Returns: List of skill metadata dicts (name, description, category). """ - from agent.skill_utils import get_external_skills_dirs + from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files skills = [] seen_names: set = set() @@ -569,7 +569,7 @@ def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]: dirs_to_scan.extend(get_external_skills_dirs()) for scan_dir in dirs_to_scan: - for skill_md in scan_dir.rglob("SKILL.md"): + for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"): if any(part in _EXCLUDED_SKILL_DIRS for part in skill_md.parts): continue @@ -620,6 +620,11 @@ def _find_all_skills(*, skip_disabled: bool = False) -> List[Dict[str, Any]]: return skills +def _sort_skills(skills: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Keep every skill listing path ordered the same way.""" + return sorted(skills, key=lambda s: (s.get("category") or "", s["name"])) + + def _load_category_description(category_dir: Path) -> Optional[str]: """ Load category description from DESCRIPTION.md if it exists. @@ -709,7 +714,7 @@ def skills_list(category: str = None, task_id: str = None) -> str: all_skills = [s for s in all_skills if s.get("category") == category] # Sort by category then name - all_skills.sort(key=lambda s: (s.get("category") or "", s["name"])) + all_skills = _sort_skills(all_skills) # Extract unique categories categories = sorted( @@ -926,7 +931,9 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: # Search by directory name across all dirs if not skill_md: for search_dir in all_dirs: - for found_skill_md in search_dir.rglob("SKILL.md"): + from agent.skill_utils import iter_skill_index_files + + for found_skill_md in iter_skill_index_files(search_dir, "SKILL.md"): if found_skill_md.parent.name == name: skill_dir = found_skill_md.parent skill_md = found_skill_md @@ -945,7 +952,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: break if not skill_md or not skill_md.exists(): - available = [s["name"] for s in _find_all_skills()[:20]] + available = [s["name"] for s in _sort_skills(_find_all_skills())[:20]] return json.dumps( { "success": False, From e020f46beccad23f80796d5f7d3a3dc7d4bbdb6f Mon Sep 17 00:00:00 2001 From: maelrx Date: Mon, 13 Apr 2026 15:12:55 -0300 Subject: [PATCH 031/264] fix(agent): preserve MiniMax context length on delta-only overflow --- run_agent.py | 23 ++++++++- tests/agent/test_model_metadata.py | 4 ++ tests/run_agent/test_run_agent.py | 83 ++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 3a26fdeadf..affcbbd721 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10575,9 +10575,30 @@ class AIAgent: # Error is about the INPUT being too large — reduce context_length. # Try to parse the actual limit from the error message parsed_limit = parse_context_limit_from_error(error_msg) + _provider_lower = (getattr(self, "provider", "") or "").lower() + _base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower() + is_minimax_provider = ( + _provider_lower in {"minimax", "minimax-cn"} + or _base_lower.startswith(( + "https://api.minimax.io/anthropic", + "https://api.minimaxi.com/anthropic", + )) + ) + minimax_delta_only_overflow = ( + is_minimax_provider + and parsed_limit is None + and "context window exceeds limit (" in error_msg + ) if parsed_limit and parsed_limit < old_ctx: new_ctx = parsed_limit - self._vprint(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) + self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True) + elif minimax_delta_only_overflow: + new_ctx = old_ctx + self._vprint( + f"{self.log_prefix}Provider reported overflow amount only; " + f"keeping context_length at {old_ctx:,} tokens and compressing.", + force=True, + ) else: # Step down to the next probe tier new_ctx = get_next_probe_tier(old_ctx) diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 45e7160226..8c5261f48e 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -621,6 +621,10 @@ class TestParseContextLimitFromError: msg = "Error: context window of 4096 tokens exceeded" assert parse_context_limit_from_error(msg) == 4096 + def test_minimax_delta_only_message_returns_none(self): + msg = "invalid params, context window exceeds limit (2013)" + assert parse_context_limit_from_error(msg) is None + def test_completely_unrelated_error(self): assert parse_context_limit_from_error("Invalid API key") is None diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 8d5e21f11f..d8f33f67c3 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2575,6 +2575,89 @@ class TestRunConversation: assert result["final_response"] == "Recovered after compression" assert result["completed"] is True + def test_minimax_delta_overflow_keeps_known_context_length(self, agent): + """MiniMax reports overflow deltas like 'limit (2013)' without the real window. + + Keep the known 204,800-token window and compress instead of probing down + to the generic 128K fallback tier. + """ + self._setup_agent(agent) + agent.provider = "minimax" + agent.model = "MiniMax-M2.7-highspeed" + agent.base_url = "https://api.minimax.io/anthropic" + agent.context_compressor.context_length = 204_800 + agent.context_compressor.threshold_tokens = int( + agent.context_compressor.context_length * agent.context_compressor.threshold_percent + ) + + err_400 = Exception( + "HTTP 400: invalid params, context window exceeds limit (2013)" + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + prefill = [ + {"role": "user", "content": "previous question"}, + {"role": "assistant", "content": "previous answer"}, + ] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed system prompt", + ) + result = agent.run_conversation("hello", conversation_history=prefill) + + mock_compress.assert_called_once() + assert agent.context_compressor.context_length == 204_800 + assert agent.context_compressor._context_probed is False + assert result["final_response"] == "Recovered after compression" + assert result["completed"] is True + + def test_non_minimax_delta_overflow_still_probes_down(self, agent): + """Non-MiniMax providers should keep the generic probe-down behavior.""" + self._setup_agent(agent) + agent.provider = "openrouter" + agent.model = "some/unknown-model" + agent.base_url = "https://openrouter.ai/api/v1" + agent.context_compressor.context_length = 200_000 + agent.context_compressor.threshold_tokens = int( + agent.context_compressor.context_length * agent.context_compressor.threshold_percent + ) + + err_400 = Exception( + "HTTP 400: invalid params, context window exceeds limit (2013)" + ) + err_400.status_code = 400 + ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [err_400, ok_resp] + prefill = [ + {"role": "user", "content": "previous question"}, + {"role": "assistant", "content": "previous answer"}, + ] + + with ( + patch.object(agent, "_compress_context") as mock_compress, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + mock_compress.return_value = ( + [{"role": "user", "content": "hello"}], + "compressed system prompt", + ) + result = agent.run_conversation("hello", conversation_history=prefill) + + mock_compress.assert_called_once() + assert agent.context_compressor.context_length == 128_000 + assert result["final_response"] == "Recovered after compression" + assert result["completed"] is True + def test_length_finish_reason_requests_continuation(self, agent): """Normal truncation (partial real content) triggers continuation.""" self._setup_agent(agent) From a0d8dd7ba30c193390c71360e94991f61f4c4ef3 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 14:05:45 -0700 Subject: [PATCH 032/264] chore(release): map eumael.mkt@gmail.com -> maelrx For release-notes attribution of PR #9170 (MiniMax context preservation). --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index a168c921ba..02de85110b 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -409,6 +409,7 @@ AUTHOR_MAP = { "caliberoviv@gmail.com": "vivganes", "michaelfackerell@gmail.com": "MikeFac", "18024642@qq.com": "GuyCui", + "eumael.mkt@gmail.com": "maelrx", } From d0821b0573151fe795622972492a458cd1b55a3d Mon Sep 17 00:00:00 2001 From: hharry11 Date: Fri, 24 Apr 2026 00:27:41 +0300 Subject: [PATCH 033/264] fix(gateway): only clear locks belonging to the replaced process --- gateway/run.py | 7 ++- gateway/status.py | 37 +++++++++++++-- tests/gateway/test_runner_startup_failures.py | 10 +++- tests/gateway/test_status.py | 47 +++++++++++++++++++ 4 files changed, 95 insertions(+), 6 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 3eb932cc24..881f77cb71 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10954,6 +10954,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = from gateway.status import ( acquire_gateway_runtime_lock, get_running_pid, + get_process_start_time, release_gateway_runtime_lock, remove_pid_file, terminate_pid, @@ -10961,6 +10962,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = existing_pid = get_running_pid() if existing_pid is not None and existing_pid != os.getpid(): if replace: + existing_start_time = get_process_start_time(existing_pid) logger.info( "Replacing existing gateway instance (PID %d) with --replace.", existing_pid, @@ -11029,7 +11031,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # leaving stale lock files that block the new gateway from starting. try: from gateway.status import release_all_scoped_locks - _released = release_all_scoped_locks() + _released = release_all_scoped_locks( + owner_pid=existing_pid, + owner_start_time=existing_start_time, + ) if _released: logger.info("Released %d stale scoped lock(s) from old gateway.", _released) except Exception: diff --git a/gateway/status.py b/gateway/status.py index 9e373564d4..7f7df182f5 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]: return None +def get_process_start_time(pid: int) -> Optional[int]: + """Public wrapper for retrieving a process start time when available.""" + return _get_process_start_time(pid) + + def _read_process_cmdline(pid: int) -> Optional[str]: """Return the process command line as a space-separated string.""" cmdline_path = Path(f"/proc/{pid}/cmdline") @@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None: pass -def release_all_scoped_locks() -> int: - """Remove all scoped lock files in the lock directory. +def release_all_scoped_locks( + *, + owner_pid: Optional[int] = None, + owner_start_time: Optional[int] = None, +) -> int: + """Remove scoped lock files in the lock directory. Called during --replace to clean up stale locks left by stopped/killed - gateway processes that did not release their locks gracefully. + gateway processes that did not release their locks gracefully. When an + ``owner_pid`` is provided, only lock records belonging to that gateway + process are removed. ``owner_start_time`` further narrows the match to + protect against PID reuse. + + When no owner is provided, preserves the legacy behavior and removes every + scoped lock file in the directory. + Returns the number of lock files removed. """ lock_dir = _get_lock_dir() removed = 0 if lock_dir.exists(): for lock_file in lock_dir.glob("*.lock"): + if owner_pid is not None: + record = _read_json_file(lock_file) + if not isinstance(record, dict): + continue + try: + record_pid = int(record.get("pid")) + except (TypeError, ValueError): + continue + if record_pid != owner_pid: + continue + if ( + owner_start_time is not None + and record.get("start_time") != owner_start_time + ): + continue try: lock_file.unlink(missing_ok=True) removed += 1 diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index 83ffc0d4d0..d94e466ec3 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -193,7 +193,10 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p _pid_state["alive"] = False monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) - monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) + monkeypatch.setattr( + "gateway.status.release_all_scoped_locks", + lambda **kwargs: 0, + ) monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force))) monkeypatch.setattr("gateway.run.os.getpid", lambda: 100) monkeypatch.setattr("gateway.run.os.kill", lambda pid, sig: None) @@ -267,7 +270,10 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm( _pid_state["alive"] = False monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) - monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) + monkeypatch.setattr( + "gateway.status.release_all_scoped_locks", + lambda **kwargs: 0, + ) monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker) monkeypatch.setattr("gateway.status.terminate_pid", record_terminate) monkeypatch.setattr("gateway.run.os.getpid", lambda: 100) diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py index f2b6b1b1f3..e91bb6e419 100644 --- a/tests/gateway/test_status.py +++ b/tests/gateway/test_status.py @@ -404,6 +404,53 @@ class TestScopedLocks: status.release_scoped_lock("telegram-bot-token", "secret") assert not lock_path.exists() + def test_release_all_scoped_locks_can_target_single_owner(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks")) + lock_dir = tmp_path / "locks" + lock_dir.mkdir(parents=True, exist_ok=True) + + target_lock = lock_dir / "telegram-bot-token-target.lock" + other_lock = lock_dir / "slack-app-token-other.lock" + target_lock.write_text(json.dumps({ + "pid": 111, + "start_time": 222, + "kind": "hermes-gateway", + })) + other_lock.write_text(json.dumps({ + "pid": 999, + "start_time": 333, + "kind": "hermes-gateway", + })) + + removed = status.release_all_scoped_locks( + owner_pid=111, + owner_start_time=222, + ) + + assert removed == 1 + assert not target_lock.exists() + assert other_lock.exists() + + def test_release_all_scoped_locks_skips_pid_reuse_mismatch(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks")) + lock_dir = tmp_path / "locks" + lock_dir.mkdir(parents=True, exist_ok=True) + + reused_pid_lock = lock_dir / "telegram-bot-token-reused.lock" + reused_pid_lock.write_text(json.dumps({ + "pid": 111, + "start_time": 999, + "kind": "hermes-gateway", + })) + + removed = status.release_all_scoped_locks( + owner_pid=111, + owner_start_time=222, + ) + + assert removed == 0 + assert reused_pid_lock.exists() + class TestTakeoverMarker: """Tests for the --replace takeover marker. From 78d1e252faae2aecd9e65e4c52c5ca01e38a0abd Mon Sep 17 00:00:00 2001 From: sprmn24 Date: Fri, 24 Apr 2026 00:32:40 +0300 Subject: [PATCH 034/264] fix(web_server): guard GATEWAY_HEALTH_TIMEOUT against invalid env values float(os.getenv(...)) at module level raises ValueError on any non-numeric value, crashing the web server at import before it starts. Wrap in try/except with a warning log and fallback to 3.0s. --- hermes_cli/web_server.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index ca473b0a58..0ea5132f11 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -431,7 +431,14 @@ class EnvVarReveal(BaseModel): _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL") -_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3")) +try: + _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3")) +except (ValueError, TypeError): + _log.warning( + "Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s", + os.getenv("GATEWAY_HEALTH_TIMEOUT"), + ) + _GATEWAY_HEALTH_TIMEOUT = 3.0 def _probe_gateway_health() -> tuple[bool, dict | None]: From c7d023937c53a5df688cb486fd2c7b68fd879f92 Mon Sep 17 00:00:00 2001 From: MaxsolcuCrypto <162235745+MaxsolcuCrypto@users.noreply.github.com> Date: Thu, 23 Apr 2026 20:45:19 +0300 Subject: [PATCH 035/264] Update CONTRIBUTING.md --- CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0e00c3f2cb..146cb1161b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee We value contributions in this order: 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority. -2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere. +2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere. 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations). 4. **Performance and robustness** — retry logic, error handling, graceful degradation. 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool) @@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl ## Cross-Platform Compatibility -Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS: +Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS: ### Critical rules @@ -597,7 +597,7 @@ refactor/description # Code restructuring 1. **Run tests**: `pytest tests/ -v` 2. **Test manually**: Run `hermes` and exercise the code path you changed -3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS +3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature. ### PR description From 67c8f837fc8b93bc17bf9b1d674138ceab348d6e Mon Sep 17 00:00:00 2001 From: Jefferson Date: Thu, 23 Apr 2026 20:01:17 +0200 Subject: [PATCH 036/264] fix(mcp): per-process PID isolation prevents cross-session crash on restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _stdio_pids: set → Dict[int,str] tracks pid→server_name - SIGTERM-first with 2s grace before SIGKILL escalation - hasattr guard for SIGKILL on platforms without it - Updated tests for dict-based tracking and 3-phase kill sequence --- tests/tools/test_mcp_stability.py | 29 +++++++++++++------- tools/mcp_tool.py | 45 ++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/tests/tools/test_mcp_stability.py b/tests/tools/test_mcp_stability.py index e3827f0a58..7a500dad51 100644 --- a/tests/tools/test_mcp_stability.py +++ b/tests/tools/test_mcp_stability.py @@ -77,7 +77,7 @@ class TestStdioPidTracking: from tools.mcp_tool import _stdio_pids, _lock with _lock: # Might have residual state from other tests, just check type - assert isinstance(_stdio_pids, set) + assert isinstance(_stdio_pids, dict) def test_kill_orphaned_noop_when_empty(self): """_kill_orphaned_mcp_children does nothing when no PIDs tracked.""" @@ -96,7 +96,7 @@ class TestStdioPidTracking: # Use a PID that definitely doesn't exist fake_pid = 999999999 with _lock: - _stdio_pids.add(fake_pid) + _stdio_pids[fake_pid] = "test" # Should not raise (ProcessLookupError is caught) _kill_orphaned_mcp_children() @@ -105,40 +105,49 @@ class TestStdioPidTracking: assert fake_pid not in _stdio_pids def test_kill_orphaned_uses_sigkill_when_available(self, monkeypatch): - """Unix-like platforms should keep using SIGKILL for orphan cleanup.""" + """SIGTERM-first then SIGKILL after 2s for orphan cleanup.""" from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock fake_pid = 424242 with _lock: _stdio_pids.clear() - _stdio_pids.add(fake_pid) + _stdio_pids[fake_pid] = "test" fake_sigkill = 9 monkeypatch.setattr(signal, "SIGKILL", fake_sigkill, raising=False) - with patch("tools.mcp_tool.os.kill") as mock_kill: + with patch("tools.mcp_tool.os.kill") as mock_kill, \ + patch("time.sleep") as mock_sleep: _kill_orphaned_mcp_children() - mock_kill.assert_called_once_with(fake_pid, fake_sigkill) + # SIGTERM, then alive-check (signal 0), then SIGKILL + mock_kill.assert_any_call(fake_pid, signal.SIGTERM) + mock_kill.assert_any_call(fake_pid, 0) # alive check + mock_kill.assert_any_call(fake_pid, fake_sigkill) + assert mock_kill.call_count == 3 + mock_sleep.assert_called_once_with(2) with _lock: assert fake_pid not in _stdio_pids def test_kill_orphaned_falls_back_without_sigkill(self, monkeypatch): - """Windows-like signal modules without SIGKILL should fall back to SIGTERM.""" + """Without SIGKILL, SIGTERM is used for both phases.""" from tools.mcp_tool import _kill_orphaned_mcp_children, _stdio_pids, _lock fake_pid = 434343 with _lock: _stdio_pids.clear() - _stdio_pids.add(fake_pid) + _stdio_pids[fake_pid] = "test" monkeypatch.delattr(signal, "SIGKILL", raising=False) - with patch("tools.mcp_tool.os.kill") as mock_kill: + with patch("tools.mcp_tool.os.kill") as mock_kill, \ + patch("time.sleep") as mock_sleep: _kill_orphaned_mcp_children() - mock_kill.assert_called_once_with(fake_pid, signal.SIGTERM) + # SIGTERM phase, alive check raises (process gone), no escalation + mock_kill.assert_any_call(fake_pid, signal.SIGTERM) + assert mock_sleep.called with _lock: assert fake_pid not in _stdio_pids diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 2de4793387..efef5ea91a 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -967,7 +967,8 @@ class MCPServerTask: new_pids = _snapshot_child_pids() - pids_before if new_pids: with _lock: - _stdio_pids.update(new_pids) + for _pid in new_pids: + _stdio_pids[_pid] = self.name async with ClientSession(read_stream, write_stream, **sampling_kwargs) as session: await session.initialize() self.session = session @@ -980,7 +981,8 @@ class MCPServerTask: # Context exited cleanly — subprocess was terminated by the SDK. if new_pids: with _lock: - _stdio_pids.difference_update(new_pids) + for _pid in new_pids: + _stdio_pids.pop(_pid, None) async def _run_http(self, config: dict): """Run the server using HTTP/StreamableHTTP transport.""" @@ -1484,7 +1486,7 @@ _lock = threading.Lock() # them on shutdown if the graceful cleanup (SDK context-manager teardown) # fails or times out. PIDs are added after connection and removed on # normal server shutdown. -_stdio_pids: set = set() +_stdio_pids: Dict[int, str] = {} # pid -> server_name def _snapshot_child_pids() -> set: @@ -2618,27 +2620,44 @@ def shutdown_mcp_servers(): def _kill_orphaned_mcp_children() -> None: - """Best-effort kill of MCP stdio subprocesses that survived loop shutdown. + """Graceful shutdown of MCP stdio subprocesses that survived loop cleanup. - After the MCP event loop is stopped, stdio server subprocesses *should* - have been terminated by the SDK's context-manager cleanup. If the loop - was stuck or the shutdown timed out, orphaned children may remain. + Sends SIGTERM first, waits 2 seconds, then escalates to SIGKILL. + This prevents shared-resource collisions when multiple hermes processes + run on the same host (each has its own _stdio_pids dict). Only kills PIDs tracked in ``_stdio_pids`` — never arbitrary children. """ import signal as _signal - kill_signal = getattr(_signal, "SIGKILL", _signal.SIGTERM) + import time as _time with _lock: - pids = list(_stdio_pids) + pids = dict(_stdio_pids) _stdio_pids.clear() - for pid in pids: + # Phase 1: SIGTERM (graceful) + for pid, server_name in pids.items(): try: - os.kill(pid, kill_signal) - logger.debug("Force-killed orphaned MCP stdio process %d", pid) + os.kill(pid, _signal.SIGTERM) + logger.debug("Sent SIGTERM to orphaned MCP process %d (%s)", pid, server_name) except (ProcessLookupError, PermissionError, OSError): - pass # Already exited or inaccessible + pass + + # Phase 2: Wait for graceful exit + _time.sleep(2) + + # Phase 3: SIGKILL any survivors + _sigkill = getattr(_signal, "SIGKILL", _signal.SIGTERM) + for pid, server_name in pids.items(): + try: + os.kill(pid, 0) # Check if still alive + os.kill(pid, _sigkill) + logger.warning( + "Force-killed MCP process %d (%s) after SIGTERM timeout", + pid, server_name, + ) + except (ProcessLookupError, PermissionError, OSError): + pass # Good — exited after SIGTERM def _stop_mcp_loop(): From 83859b4da081139a9f458de33a69a9644088fa81 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 15:07:16 -0700 Subject: [PATCH 037/264] chore(release): map jefferson@heimdallstrategy.com -> Mind-Dragon --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 02de85110b..c5cf669d0e 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -44,6 +44,7 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", "343873859@qq.com": "DrStrangerUJN", + "jefferson@heimdallstrategy.com": "Mind-Dragon", # contributors (from noreply pattern) "david.vv@icloud.com": "davidvv", "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", From 4a0c02b7dcb0b513512e3104ac0f07b0b2f2e312 Mon Sep 17 00:00:00 2001 From: Yukipukii1 Date: Thu, 23 Apr 2026 22:36:07 +0300 Subject: [PATCH 038/264] fix(file_tools): resolve bookkeeping paths against live terminal cwd --- tests/tools/test_file_staleness.py | 55 +++++++++++++++++++++++++ tests/tools/test_resolve_path.py | 37 ++++++++++++++++- tools/file_tools.py | 64 +++++++++++++++++++++++------- 3 files changed, 139 insertions(+), 17 deletions(-) diff --git a/tests/tools/test_file_staleness.py b/tests/tools/test_file_staleness.py index 4d9136125f..dc5a1e7bd7 100644 --- a/tests/tools/test_file_staleness.py +++ b/tests/tools/test_file_staleness.py @@ -13,8 +13,10 @@ import os import tempfile import time import unittest +from types import SimpleNamespace from unittest.mock import patch, MagicMock +from tools import file_state from tools.file_tools import ( read_file_tool, write_file_tool, @@ -76,6 +78,7 @@ class TestStalenessCheck(unittest.TestCase): def setUp(self): _read_tracker.clear() + file_state.get_registry().clear() self._tmpdir = tempfile.mkdtemp() self._tmpfile = os.path.join(self._tmpdir, "stale_test.txt") with open(self._tmpfile, "w") as f: @@ -83,6 +86,7 @@ class TestStalenessCheck(unittest.TestCase): def tearDown(self): _read_tracker.clear() + file_state.get_registry().clear() try: os.unlink(self._tmpfile) os.rmdir(self._tmpdir) @@ -145,6 +149,53 @@ class TestStalenessCheck(unittest.TestCase): result = json.loads(write_file_tool(self._tmpfile, "new", task_id="task_b")) self.assertNotIn("_warning", result) + @patch("tools.file_tools._get_file_ops") + def test_relative_path_uses_live_cwd_for_staleness_tracking(self, mock_ops): + """Relative-path stale tracking must follow the live terminal cwd.""" + start_dir = os.path.join(self._tmpdir, "start") + live_dir = os.path.join(self._tmpdir, "worktree") + os.makedirs(start_dir, exist_ok=True) + os.makedirs(live_dir, exist_ok=True) + + start_file = os.path.join(start_dir, "shared.txt") + live_file = os.path.join(live_dir, "shared.txt") + with open(start_file, "w") as f: + f.write("start copy\n") + with open(live_file, "w") as f: + f.write("live copy\n") + + fake_ops = _make_fake_ops("live copy\n", 10) + fake_ops.env = SimpleNamespace(cwd=live_dir) + fake_ops.cwd = start_dir + mock_ops.return_value = fake_ops + + from tools import file_tools + + with file_tools._file_ops_lock: + previous = file_tools._file_ops_cache.get("live_task") + file_tools._file_ops_cache["live_task"] = fake_ops + + try: + with patch.dict(os.environ, {"TERMINAL_CWD": start_dir}, clear=False): + read_file_tool("shared.txt", task_id="live_task") + + time.sleep(0.05) + with open(live_file, "w") as f: + f.write("live copy modified elsewhere\n") + + result = json.loads( + write_file_tool("shared.txt", "replacement", task_id="live_task") + ) + finally: + with file_tools._file_ops_lock: + if previous is None: + file_tools._file_ops_cache.pop("live_task", None) + else: + file_tools._file_ops_cache["live_task"] = previous + + self.assertIn("_warning", result) + self.assertIn("modified since you last read", result["_warning"]) + # --------------------------------------------------------------------------- # Staleness in patch @@ -154,6 +205,7 @@ class TestPatchStaleness(unittest.TestCase): def setUp(self): _read_tracker.clear() + file_state.get_registry().clear() self._tmpdir = tempfile.mkdtemp() self._tmpfile = os.path.join(self._tmpdir, "patch_test.txt") with open(self._tmpfile, "w") as f: @@ -161,6 +213,7 @@ class TestPatchStaleness(unittest.TestCase): def tearDown(self): _read_tracker.clear() + file_state.get_registry().clear() try: os.unlink(self._tmpfile) os.rmdir(self._tmpdir) @@ -207,9 +260,11 @@ class TestCheckFileStalenessHelper(unittest.TestCase): def setUp(self): _read_tracker.clear() + file_state.get_registry().clear() def tearDown(self): _read_tracker.clear() + file_state.get_registry().clear() def test_returns_none_for_unknown_task(self): self.assertIsNone(_check_file_staleness("/tmp/x.py", "nonexistent")) diff --git a/tests/tools/test_resolve_path.py b/tests/tools/test_resolve_path.py index beea3cc40f..cd4d868961 100644 --- a/tests/tools/test_resolve_path.py +++ b/tests/tools/test_resolve_path.py @@ -2,6 +2,7 @@ import os from pathlib import Path +from types import SimpleNamespace import pytest @@ -22,8 +23,9 @@ class TestResolvePath: monkeypatch.setenv("TERMINAL_CWD", str(tmp_path)) from tools.file_tools import _resolve_path - result = _resolve_path("/etc/hosts") - assert result == Path("/etc/hosts") + absolute = (tmp_path / "already-absolute.txt").resolve() + result = _resolve_path(str(absolute)) + assert result == absolute def test_falls_back_to_cwd_without_terminal_cwd(self, monkeypatch): """Without TERMINAL_CWD, falls back to os.getcwd().""" @@ -50,3 +52,34 @@ class TestResolvePath: result = _resolve_path("a/../b/file.txt") assert ".." not in str(result) assert result == (tmp_path / "b" / "file.txt") + + def test_relative_path_prefers_live_file_ops_cwd(self, monkeypatch, tmp_path): + """Live env.cwd must win after the terminal session changes directory.""" + start_dir = tmp_path / "start" + live_dir = tmp_path / "worktree" + start_dir.mkdir() + live_dir.mkdir() + monkeypatch.setenv("TERMINAL_CWD", str(start_dir)) + + from tools import file_tools + + task_id = "live-cwd" + fake_ops = SimpleNamespace( + env=SimpleNamespace(cwd=str(live_dir)), + cwd=str(start_dir), + ) + + with file_tools._file_ops_lock: + previous = file_tools._file_ops_cache.get(task_id) + file_tools._file_ops_cache[task_id] = fake_ops + + try: + result = file_tools._resolve_path("nested/file.txt", task_id=task_id) + finally: + with file_tools._file_ops_lock: + if previous is None: + file_tools._file_ops_cache.pop(task_id, None) + else: + file_tools._file_ops_cache[task_id] = previous + + assert result == live_dir / "nested" / "file.txt" diff --git a/tools/file_tools.py b/tools/file_tools.py index 3b6f459422..609506c05e 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -79,13 +79,45 @@ _BLOCKED_DEVICE_PATHS = frozenset({ }) -def _resolve_path(filepath: str) -> Path: +def _resolve_path(filepath: str, task_id: str = "default") -> Path: """Resolve a path relative to TERMINAL_CWD (the worktree base directory) instead of the main repository root. """ + return _resolve_path_for_task(filepath, task_id) + + +def _get_live_tracking_cwd(task_id: str = "default") -> str | None: + """Return the task's live terminal cwd for bookkeeping when available.""" + with _file_ops_lock: + cached = _file_ops_cache.get(task_id) + if cached is not None: + live_cwd = getattr(getattr(cached, "env", None), "cwd", None) or getattr( + cached, "cwd", None + ) + if live_cwd: + return live_cwd + + try: + from tools.terminal_tool import _active_environments, _env_lock + + with _env_lock: + env = _active_environments.get(task_id) + live_cwd = getattr(env, "cwd", None) if env is not None else None + if live_cwd: + return live_cwd + except Exception: + pass + + return None + + +def _resolve_path_for_task(filepath: str, task_id: str = "default") -> Path: + """Resolve *filepath* against the task's live terminal cwd when possible.""" p = Path(filepath).expanduser() if not p.is_absolute(): - base = os.environ.get("TERMINAL_CWD", os.getcwd()) + base = _get_live_tracking_cwd(task_id) or os.environ.get( + "TERMINAL_CWD", os.getcwd() + ) p = Path(base) / p return p.resolve() @@ -118,10 +150,10 @@ _SENSITIVE_PATH_PREFIXES = ( _SENSITIVE_EXACT_PATHS = {"/var/run/docker.sock", "/run/docker.sock"} -def _check_sensitive_path(filepath: str) -> str | None: +def _check_sensitive_path(filepath: str, task_id: str = "default") -> str | None: """Return an error message if the path targets a sensitive system location.""" try: - resolved = str(_resolve_path(filepath)) + resolved = str(_resolve_path_for_task(filepath, task_id)) except (OSError, ValueError): resolved = filepath normalized = os.path.normpath(os.path.expanduser(filepath)) @@ -368,7 +400,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = ), }) - _resolved = _resolve_path(path) + _resolved = _resolve_path_for_task(path, task_id) # ── Binary file guard ───────────────────────────────────────── # Block binary files by extension (no I/O). @@ -574,7 +606,7 @@ def _update_read_timestamp(filepath: str, task_id: str) -> None: refreshes the stored timestamp to match the file's new state. """ try: - resolved = str(_resolve_path(filepath)) + resolved = str(_resolve_path_for_task(filepath, task_id)) current_mtime = os.path.getmtime(resolved) except (OSError, ValueError): return @@ -593,7 +625,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None: or was never read. Does not block — the write still proceeds. """ try: - resolved = str(_resolve_path(filepath)) + resolved = str(_resolve_path_for_task(filepath, task_id)) except (OSError, ValueError): return None with _read_tracker_lock: @@ -618,7 +650,7 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None: def write_file_tool(path: str, content: str, task_id: str = "default") -> str: """Write content to a file.""" - sensitive_err = _check_sensitive_path(path) + sensitive_err = _check_sensitive_path(path, task_id) if sensitive_err: return tool_error(sensitive_err) try: @@ -626,7 +658,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str: # fall back to the legacy path — write proceeds, per-task staleness # check below still runs. try: - _resolved = str(_resolve_path(path)) + _resolved = str(_resolve_path_for_task(path, task_id)) except Exception: _resolved = None @@ -681,7 +713,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, for _m in _re.finditer(r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$', patch, _re.MULTILINE): _paths_to_check.append(_m.group(1).strip()) for _p in _paths_to_check: - sensitive_err = _check_sensitive_path(_p) + sensitive_err = _check_sensitive_path(_p, task_id) if sensitive_err: return tool_error(sensitive_err) try: @@ -692,7 +724,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, _seen: set[str] = set() for _p in _paths_to_check: try: - _r = str(_resolve_path(_p)) + _r = str(_resolve_path_for_task(_p, task_id)) except Exception: _r = None if _r and _r not in _seen: @@ -714,7 +746,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, _path_to_resolved: dict[str, str] = {} for _p in _paths_to_check: try: - _r = str(_resolve_path(_p)) + _r = str(_resolve_path_for_task(_p, task_id)) except Exception: _r = None _path_to_resolved[_p] = _r @@ -749,15 +781,17 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, _r = _path_to_resolved.get(_p) if _r: file_state.note_write(task_id, _r) - result_json = json.dumps(result_dict, ensure_ascii=False) # Hint when old_string not found — saves iterations where the agent # retries with stale content instead of re-reading the file. # Suppressed when patch_replace already attached a rich "Did you mean?" # snippet (which is strictly more useful than the generic hint). if result_dict.get("error") and "Could not find" in str(result_dict["error"]): if "Did you mean one of these sections?" not in str(result_dict["error"]): - result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" - return result_json + result_dict["_hint"] = ( + "old_string not found. Use read_file to verify the current " + "content, or search_files to locate the text." + ) + return json.dumps(result_dict, ensure_ascii=False) except Exception as e: return tool_error(str(e)) From a1ff6b45eaf7f4876315e8f8da61b2ec26b49674 Mon Sep 17 00:00:00 2001 From: Magaav <73175452+Magaav@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:09:04 -0700 Subject: [PATCH 039/264] fix(gateway/discord): add safe startup slash sync policy Replaces blind tree.sync() on every Discord reconnect with a diff-based reconcile. In safe mode (default), fetch existing global commands, compare desired vs existing payloads, skip unchanged, PATCH changed, recreate when non-patchable metadata differs, POST missing, and delete stale commands one-by-one. Keeps 'bulk' for legacy behavior and 'off' to skip startup sync entirely. Fixes restart-heavy workflows that burn Discord's command write budget and can surface 429s when iterating on native slash commands. Env var: DISCORD_COMMAND_SYNC_POLICY (safe|bulk|off), default 'safe'. Co-authored-by: Codex --- gateway/platforms/discord.py | 161 +++++++++++- tests/gateway/test_discord_connect.py | 245 ++++++++++++++++++ .../docs/reference/environment-variables.md | 1 + website/docs/user-guide/messaging/discord.md | 2 +- 4 files changed, 406 insertions(+), 3 deletions(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index a148c5f4b9..3587f661de 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any logger = logging.getLogger(__name__) VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080} +_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"} try: import discord @@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter): if not self._client: return try: - synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30) - logger.info("[%s] Synced %d slash command(s)", self.name, len(synced)) + sync_policy = self._get_discord_command_sync_policy() + if sync_policy == "off": + logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name) + return + + if sync_policy == "bulk": + synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30) + logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced)) + return + + summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30) + logger.info( + "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d", + self.name, + summary["total"], + summary["unchanged"], + summary["updated"], + summary["recreated"], + summary["created"], + summary["deleted"], + ) except asyncio.TimeoutError: logger.warning("[%s] Slash command sync timed out after 30s", self.name) except asyncio.CancelledError: @@ -811,6 +831,143 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: # pragma: no cover - defensive logging logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True) + def _get_discord_command_sync_policy(self) -> str: + raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower() + if raw in _DISCORD_COMMAND_SYNC_POLICIES: + return raw + if raw: + logger.warning( + "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'", + self.name, + raw, + ) + return "safe" + + def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Reduce command payloads to the semantic fields Hermes manages.""" + return { + "type": int(payload.get("type", 1) or 1), + "name": str(payload.get("name", "") or ""), + "description": str(payload.get("description", "") or ""), + "default_member_permissions": payload.get("default_member_permissions"), + "dm_permission": payload.get("dm_permission", True), + "nsfw": bool(payload.get("nsfw", False)), + "options": [ + self._canonicalize_app_command_option(item) + for item in payload.get("options", []) or [] + if isinstance(item, dict) + ], + } + + def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]: + return { + "type": int(payload.get("type", 0) or 0), + "name": str(payload.get("name", "") or ""), + "description": str(payload.get("description", "") or ""), + "required": bool(payload.get("required", False)), + "autocomplete": bool(payload.get("autocomplete", False)), + "choices": [ + { + "name": str(choice.get("name", "") or ""), + "value": choice.get("value"), + } + for choice in payload.get("choices", []) or [] + if isinstance(choice, dict) + ], + "channel_types": list(payload.get("channel_types", []) or []), + "min_value": payload.get("min_value"), + "max_value": payload.get("max_value"), + "min_length": payload.get("min_length"), + "max_length": payload.get("max_length"), + "options": [ + self._canonicalize_app_command_option(item) + for item in payload.get("options", []) or [] + if isinstance(item, dict) + ], + } + + def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Fields supported by discord.py's edit_global_command route.""" + canonical = self._canonicalize_app_command_payload(payload) + return { + "name": canonical["name"], + "description": canonical["description"], + "options": canonical["options"], + } + + async def _safe_sync_slash_commands(self) -> Dict[str, int]: + """Diff existing global commands and only mutate the commands that changed.""" + if not self._client: + return { + "total": 0, + "unchanged": 0, + "updated": 0, + "recreated": 0, + "created": 0, + "deleted": 0, + } + + tree = self._client.tree + app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None) + if not app_id: + raise RuntimeError("Discord application ID is unavailable for slash command sync") + + desired_payloads = [command.to_dict(tree) for command in tree.get_commands()] + desired_by_key = { + (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload + for payload in desired_payloads + } + existing_commands = await tree.fetch_commands() + existing_by_key = { + ( + int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1), + str(command.name or "").lower(), + ): command + for command in existing_commands + } + + unchanged = 0 + updated = 0 + recreated = 0 + created = 0 + deleted = 0 + http = self._client.http + + for key, desired in desired_by_key.items(): + current = existing_by_key.pop(key, None) + if current is None: + await http.upsert_global_command(app_id, desired) + created += 1 + continue + + current_payload = self._canonicalize_app_command_payload(current.to_dict()) + desired_payload = self._canonicalize_app_command_payload(desired) + if current_payload == desired_payload: + unchanged += 1 + continue + + if self._patchable_app_command_payload(current.to_dict()) == self._patchable_app_command_payload(desired): + await http.delete_global_command(app_id, current.id) + await http.upsert_global_command(app_id, desired) + recreated += 1 + continue + + await http.edit_global_command(app_id, current.id, desired) + updated += 1 + + for current in existing_by_key.values(): + await http.delete_global_command(app_id, current.id) + deleted += 1 + + return { + "total": len(desired_payloads), + "unchanged": unchanged, + "updated": updated, + "recreated": recreated, + "created": created, + "deleted": deleted, + } + async def _add_reaction(self, message: Any, emoji: str) -> bool: """Add an emoji reaction to a Discord message.""" if not message or not hasattr(message, "add_reaction"): diff --git a/tests/gateway/test_discord_connect.py b/tests/gateway/test_discord_connect.py index 0ac1c9ba39..35a57f2ac5 100644 --- a/tests/gateway/test_discord_connect.py +++ b/tests/gateway/test_discord_connect.py @@ -73,18 +73,29 @@ from gateway.platforms.discord import DiscordAdapter # noqa: E402 class FakeTree: def __init__(self): self.sync = AsyncMock(return_value=[]) + self.fetch_commands = AsyncMock(return_value=[]) + self._commands = [] def command(self, *args, **kwargs): return lambda fn: fn + def get_commands(self, *args, **kwargs): + return list(self._commands) + class FakeBot: def __init__(self, *, intents, proxy=None, allowed_mentions=None, **_): self.intents = intents self.allowed_mentions = allowed_mentions + self.application_id = 999 self.user = SimpleNamespace(id=999, name="Hermes") self._events = {} self.tree = FakeTree() + self.http = SimpleNamespace( + upsert_global_command=AsyncMock(), + edit_global_command=AsyncMock(), + delete_global_command=AsyncMock(), + ) def event(self, fn): self._events[fn.__name__] = fn @@ -199,6 +210,7 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch): async def test_connect_does_not_wait_for_slash_sync(monkeypatch): adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "bulk") monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None)) monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None) @@ -226,3 +238,236 @@ async def test_connect_does_not_wait_for_slash_sync(monkeypatch): created["bot"].tree.allow_finish.set() await asyncio.sleep(0) await adapter.disconnect() + + +@pytest.mark.asyncio +async def test_connect_respects_slash_commands_opt_out(monkeypatch): + adapter = DiscordAdapter( + PlatformConfig(enabled=True, token="test-token", extra={"slash_commands": False}) + ) + + monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off") + monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None)) + monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None) + + intents = SimpleNamespace(message_content=False, dm_messages=False, guild_messages=False, members=False, voice_states=False) + monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents) + monkeypatch.setattr( + discord_platform.commands, + "Bot", + lambda **kwargs: FakeBot( + intents=kwargs["intents"], + proxy=kwargs.get("proxy"), + allowed_mentions=kwargs.get("allowed_mentions"), + ), + ) + register_mock = MagicMock() + monkeypatch.setattr(adapter, "_register_slash_commands", register_mock) + monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock()) + + ok = await adapter.connect() + + assert ok is True + register_mock.assert_not_called() + + await adapter.disconnect() + + +@pytest.mark.asyncio +async def test_safe_sync_slash_commands_only_mutates_diffs(): + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + class _DesiredCommand: + def __init__(self, payload): + self._payload = payload + + def to_dict(self, tree): + assert tree is not None + return dict(self._payload) + + class _ExistingCommand: + def __init__(self, command_id, payload): + self.id = command_id + self.name = payload["name"] + self.type = SimpleNamespace(value=payload["type"]) + self._payload = payload + + def to_dict(self): + return { + "id": self.id, + "application_id": 999, + **self._payload, + "name_localizations": {}, + "description_localizations": {}, + } + + desired_same = { + "name": "status", + "description": "Show Hermes session status", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": None, + } + desired_updated = { + "name": "help", + "description": "Show available commands", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": None, + } + desired_created = { + "name": "metricas", + "description": "Show Colmeio metrics dashboard", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": None, + } + existing_same = _ExistingCommand(11, desired_same) + existing_updated = _ExistingCommand( + 12, + { + **desired_updated, + "description": "Old help text", + }, + ) + existing_deleted = _ExistingCommand( + 13, + { + "name": "old-command", + "description": "To be deleted", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": None, + }, + ) + + fake_tree = SimpleNamespace( + get_commands=lambda: [ + _DesiredCommand(desired_same), + _DesiredCommand(desired_updated), + _DesiredCommand(desired_created), + ], + fetch_commands=AsyncMock(return_value=[existing_same, existing_updated, existing_deleted]), + ) + fake_http = SimpleNamespace( + upsert_global_command=AsyncMock(), + edit_global_command=AsyncMock(), + delete_global_command=AsyncMock(), + ) + adapter._client = SimpleNamespace( + tree=fake_tree, + http=fake_http, + application_id=999, + user=SimpleNamespace(id=999), + ) + + summary = await adapter._safe_sync_slash_commands() + + assert summary == { + "total": 3, + "unchanged": 1, + "updated": 1, + "recreated": 0, + "created": 1, + "deleted": 1, + } + fake_http.edit_global_command.assert_awaited_once_with(999, 12, desired_updated) + fake_http.upsert_global_command.assert_awaited_once_with(999, desired_created) + fake_http.delete_global_command.assert_awaited_once_with(999, 13) + + +@pytest.mark.asyncio +async def test_safe_sync_slash_commands_recreates_metadata_only_diffs(): + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + class _DesiredCommand: + def __init__(self, payload): + self._payload = payload + + def to_dict(self, tree): + assert tree is not None + return dict(self._payload) + + class _ExistingCommand: + def __init__(self, command_id, payload): + self.id = command_id + self.name = payload["name"] + self.type = SimpleNamespace(value=payload["type"]) + self._payload = payload + + def to_dict(self): + return { + "id": self.id, + "application_id": 999, + **self._payload, + "name_localizations": {}, + "description_localizations": {}, + } + + desired = { + "name": "help", + "description": "Show available commands", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": "8", + } + existing = _ExistingCommand( + 12, + { + **desired, + "default_member_permissions": None, + }, + ) + + fake_tree = SimpleNamespace( + get_commands=lambda: [_DesiredCommand(desired)], + fetch_commands=AsyncMock(return_value=[existing]), + ) + fake_http = SimpleNamespace( + upsert_global_command=AsyncMock(), + edit_global_command=AsyncMock(), + delete_global_command=AsyncMock(), + ) + adapter._client = SimpleNamespace( + tree=fake_tree, + http=fake_http, + application_id=999, + user=SimpleNamespace(id=999), + ) + + summary = await adapter._safe_sync_slash_commands() + + assert summary == { + "total": 1, + "unchanged": 0, + "updated": 0, + "recreated": 1, + "created": 0, + "deleted": 0, + } + fake_http.edit_global_command.assert_not_awaited() + fake_http.delete_global_command.assert_awaited_once_with(999, 12) + fake_http.upsert_global_command.assert_awaited_once_with(999, desired) + + +@pytest.mark.asyncio +async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatch): + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off") + + fake_tree = SimpleNamespace(sync=AsyncMock()) + adapter._client = SimpleNamespace(tree=fake_tree) + + await adapter._run_post_connect_initialization() + + fake_tree.sync.assert_not_called() diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 886db482c4..42280b4df2 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -206,6 +206,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI | `DISCORD_PROXY` | Proxy URL for Discord connections — overrides `HTTPS_PROXY`. Supports `http://`, `https://`, `socks5://` | | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery | | `DISCORD_HOME_CHANNEL_NAME` | Display name for the Discord home channel | +| `DISCORD_COMMAND_SYNC_POLICY` | Discord slash-command startup sync policy: `safe` (diff and reconcile), `bulk` (legacy `tree.sync()`), or `off` | | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels | | `DISCORD_FREE_RESPONSE_CHANNELS` | Comma-separated channel IDs where mention is not required | | `DISCORD_AUTO_THREAD` | Auto-thread long replies when supported | diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 2a38b9798c..d2b06f0237 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -275,6 +275,7 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede | `DISCORD_ALLOWED_ROLES` | No | — | Comma-separated Discord role IDs. Any member with one of these roles is authorized — OR semantics with `DISCORD_ALLOWED_USERS`. Auto-enables the **Server Members Intent** on connect. Useful when moderation teams churn: new mods get access as soon as the role is granted, no config push needed. | | `DISCORD_HOME_CHANNEL` | No | — | Channel ID where the bot sends proactive messages (cron output, reminders, notifications). | | `DISCORD_HOME_CHANNEL_NAME` | No | `"Home"` | Display name for the home channel in logs and status output. | +| `DISCORD_COMMAND_SYNC_POLICY` | No | `"safe"` | Controls native slash-command startup sync. `"safe"` diffs existing global commands and only updates what changed, recreating commands when Discord metadata changes cannot be applied via patch. `"bulk"` preserves the old `tree.sync()` behavior. `"off"` skips startup sync entirely. | | `DISCORD_REQUIRE_MENTION` | No | `true` | When `true`, the bot only responds in server channels when `@mentioned`. Set to `false` to respond to all messages in every channel. | | `DISCORD_FREE_RESPONSE_CHANNELS` | No | — | Comma-separated channel IDs where the bot responds without requiring an `@mention`, even when `DISCORD_REQUIRE_MENTION` is `true`. | | `DISCORD_IGNORE_NO_MENTION` | No | `true` | When `true`, the bot stays silent if a message `@mentions` other users but does **not** mention the bot. Prevents the bot from jumping into conversations directed at other people. Only applies in server channels, not DMs. | @@ -628,4 +629,3 @@ Leave `everyone` and `roles` at `false` unless you know exactly why you need the For more information on securing your Hermes Agent deployment, see the [Security Guide](../security.md). - From b61ac8964b8889840a4841cf617e1a8b4de7763c Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 15:11:12 -0700 Subject: [PATCH 040/264] fix(gateway/discord): read permission attrs from AppCommand, canonicalize contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to Magaav's safe sync policy. Two gaps in the canonicalizer caused false diffs or silent drift: 1. discord.py's AppCommand.to_dict() omits nsfw, dm_permission, and default_member_permissions — those live only on attributes. The canonicalizer was reading them via payload.get() and getting defaults (False/True/None), while the desired side from Command.to_dict(tree) had the real values. Any command using non-default permissions false-diffed on every startup. Pull them from the AppCommand attributes via _existing_command_to_payload(). 2. contexts and integration_types weren't canonicalized at all, so drift in either was silently ignored. Added both to _canonicalize_app_command_payload (sorted for stable compare). Also normalized default_member_permissions to str-or-None since the server emits strings but discord.py stores ints locally. Added regression tests for both gaps. --- gateway/platforms/discord.py | 48 ++++++- tests/gateway/test_discord_connect.py | 184 ++++++++++++++++++++++++++ 2 files changed, 228 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 3587f661de..f741d45b5f 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -845,13 +845,21 @@ class DiscordAdapter(BasePlatformAdapter): def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]: """Reduce command payloads to the semantic fields Hermes manages.""" + contexts = payload.get("contexts") + integration_types = payload.get("integration_types") return { "type": int(payload.get("type", 1) or 1), "name": str(payload.get("name", "") or ""), "description": str(payload.get("description", "") or ""), - "default_member_permissions": payload.get("default_member_permissions"), - "dm_permission": payload.get("dm_permission", True), + "default_member_permissions": self._normalize_permissions( + payload.get("default_member_permissions") + ), + "dm_permission": bool(payload.get("dm_permission", True)), "nsfw": bool(payload.get("nsfw", False)), + "contexts": sorted(int(c) for c in contexts) if contexts else None, + "integration_types": ( + sorted(int(i) for i in integration_types) if integration_types else None + ), "options": [ self._canonicalize_app_command_option(item) for item in payload.get("options", []) or [] @@ -859,6 +867,37 @@ class DiscordAdapter(BasePlatformAdapter): ], } + @staticmethod + def _normalize_permissions(value: Any) -> Optional[str]: + """Discord emits default_member_permissions as str server-side but discord.py + sets it as int locally. Normalize to str-or-None so the comparison is stable.""" + if value is None: + return None + return str(value) + + def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]: + """Build a canonical-ready dict from an AppCommand. + + discord.py's AppCommand.to_dict() does NOT include nsfw, + dm_permission, or default_member_permissions (they live only on the + attributes). Pull them from the attributes so the canonicalizer sees + the real server-side values instead of defaults — otherwise any + command using non-default permissions would diff on every startup. + """ + payload = dict(command.to_dict()) + nsfw = getattr(command, "nsfw", None) + if nsfw is not None: + payload["nsfw"] = bool(nsfw) + guild_only = getattr(command, "guild_only", None) + if guild_only is not None: + payload["dm_permission"] = not bool(guild_only) + default_permissions = getattr(command, "default_member_permissions", None) + if default_permissions is not None: + payload["default_member_permissions"] = getattr( + default_permissions, "value", default_permissions + ) + return payload + def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]: return { "type": int(payload.get("type", 0) or 0), @@ -940,13 +979,14 @@ class DiscordAdapter(BasePlatformAdapter): created += 1 continue - current_payload = self._canonicalize_app_command_payload(current.to_dict()) + current_existing_payload = self._existing_command_to_payload(current) + current_payload = self._canonicalize_app_command_payload(current_existing_payload) desired_payload = self._canonicalize_app_command_payload(desired) if current_payload == desired_payload: unchanged += 1 continue - if self._patchable_app_command_payload(current.to_dict()) == self._patchable_app_command_payload(desired): + if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired): await http.delete_global_command(app_id, current.id) await http.upsert_global_command(app_id, desired) recreated += 1 diff --git a/tests/gateway/test_discord_connect.py b/tests/gateway/test_discord_connect.py index 35a57f2ac5..d769d3f445 100644 --- a/tests/gateway/test_discord_connect.py +++ b/tests/gateway/test_discord_connect.py @@ -471,3 +471,187 @@ async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatc await adapter._run_post_connect_initialization() fake_tree.sync.assert_not_called() + + +@pytest.mark.asyncio +async def test_safe_sync_reads_permission_attrs_from_existing_command(): + """Regression: AppCommand.to_dict() in discord.py does NOT include + nsfw, dm_permission, or default_member_permissions — they live only + on the attributes. Without reading those attrs, any command with + non-default permissions false-diffs on every startup. + """ + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + class _DesiredCommand: + def __init__(self, payload): + self._payload = payload + + def to_dict(self, tree): + return dict(self._payload) + + class _ExistingCommand: + """Mirrors discord.py's AppCommand — to_dict() omits nsfw/dm/perms.""" + + def __init__(self, command_id, name, description, *, nsfw, guild_only, default_permissions): + self.id = command_id + self.name = name + self.description = description + self.type = SimpleNamespace(value=1) + self.nsfw = nsfw + self.guild_only = guild_only + self.default_member_permissions = ( + SimpleNamespace(value=default_permissions) + if default_permissions is not None + else None + ) + + def to_dict(self): + # Match real AppCommand.to_dict() — no nsfw/dm_permission/default_member_permissions + return { + "id": self.id, + "type": 1, + "application_id": 999, + "name": self.name, + "description": self.description, + "name_localizations": {}, + "description_localizations": {}, + "options": [], + } + + desired = { + "name": "admin", + "description": "Admin-only command", + "type": 1, + "options": [], + "nsfw": True, + "dm_permission": False, + "default_member_permissions": "8", + } + # Existing command has matching attrs — should report unchanged, NOT falsely diff. + existing = _ExistingCommand( + 42, + "admin", + "Admin-only command", + nsfw=True, + guild_only=True, + default_permissions=8, + ) + + fake_tree = SimpleNamespace( + get_commands=lambda: [_DesiredCommand(desired)], + fetch_commands=AsyncMock(return_value=[existing]), + ) + fake_http = SimpleNamespace( + upsert_global_command=AsyncMock(), + edit_global_command=AsyncMock(), + delete_global_command=AsyncMock(), + ) + adapter._client = SimpleNamespace( + tree=fake_tree, + http=fake_http, + application_id=999, + user=SimpleNamespace(id=999), + ) + + summary = await adapter._safe_sync_slash_commands() + + # Without the fix, this would be unchanged=0, recreated=1 (false diff). + assert summary == { + "total": 1, + "unchanged": 1, + "updated": 0, + "recreated": 0, + "created": 0, + "deleted": 0, + } + fake_http.edit_global_command.assert_not_awaited() + fake_http.delete_global_command.assert_not_awaited() + fake_http.upsert_global_command.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_safe_sync_detects_contexts_drift(): + """Regression: contexts and integration_types must be canonicalized + so drift in those fields triggers reconciliation. Without this, the + diff silently reports 'unchanged' and never reconciles. + """ + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + class _DesiredCommand: + def __init__(self, payload): + self._payload = payload + + def to_dict(self, tree): + return dict(self._payload) + + class _ExistingCommand: + def __init__(self, command_id, payload): + self.id = command_id + self.name = payload["name"] + self.description = payload["description"] + self.type = SimpleNamespace(value=1) + self.nsfw = payload.get("nsfw", False) + self.guild_only = not payload.get("dm_permission", True) + self.default_member_permissions = None + self._payload = payload + + def to_dict(self): + return { + "id": self.id, + "type": 1, + "application_id": 999, + "name": self.name, + "description": self.description, + "name_localizations": {}, + "description_localizations": {}, + "options": [], + "contexts": self._payload.get("contexts"), + "integration_types": self._payload.get("integration_types"), + } + + desired = { + "name": "help", + "description": "Show available commands", + "type": 1, + "options": [], + "nsfw": False, + "dm_permission": True, + "default_member_permissions": None, + "contexts": [0, 1, 2], + "integration_types": [0, 1], + } + existing = _ExistingCommand( + 77, + { + **desired, + "contexts": [0], # server-side only + "integration_types": [0], + }, + ) + + fake_tree = SimpleNamespace( + get_commands=lambda: [_DesiredCommand(desired)], + fetch_commands=AsyncMock(return_value=[existing]), + ) + fake_http = SimpleNamespace( + upsert_global_command=AsyncMock(), + edit_global_command=AsyncMock(), + delete_global_command=AsyncMock(), + ) + adapter._client = SimpleNamespace( + tree=fake_tree, + http=fake_http, + application_id=999, + user=SimpleNamespace(id=999), + ) + + summary = await adapter._safe_sync_slash_commands() + + # contexts and integration_types are not patchable by + # edit_global_command, so the command must be recreated. + assert summary["unchanged"] == 0 + assert summary["recreated"] == 1 + assert summary["updated"] == 0 + fake_http.edit_global_command.assert_not_awaited() + fake_http.delete_global_command.assert_awaited_once_with(999, 77) + fake_http.upsert_global_command.assert_awaited_once_with(999, desired) From 692ae6dd073b4e9fd92f3ef7bc935cef496e8fc8 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:12:04 -0700 Subject: [PATCH 041/264] docs(readme): fix stale RL submodule instructions, skills table row, test runner (#14758) - Drop broken tinker-atropos submodule instructions: no .gitmodules exists, tinker-atropos/ is empty, and atroposlib + tinker are regular pip deps in pyproject.toml pulled in by .[all,dev]. Replace with a one-line note. - CLI vs Messaging table: /skills is cli_only=True in COMMAND_REGISTRY, so remove it from the messaging column. / still works there. - Point contributors at scripts/run_tests.sh (the canonical runner enforcing CI-parity env) instead of bare pytest. --- README.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 70b65debd7..11390fb2b2 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat | Set a personality | `/personality [name]` | `/personality [name]` | | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` | | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` | -| Browse skills | `/skills` or `/` | `/skills` or `/` | +| Browse skills | `/skills` or `/` | `/` | | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message | | Platform-specific status | `/platforms` | `/status`, `/sethome` | @@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh uv venv venv --python 3.11 source venv/bin/activate uv pip install -e ".[all,dev]" -python -m pytest tests/ -q +scripts/run_tests.sh ``` -> **RL Training (optional):** To work on the RL/Tinker-Atropos integration: -> ```bash -> git submodule update --init tinker-atropos -> uv pip install -e "./tinker-atropos" -> ``` +> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required. --- From 9d147f7fdefb598fe49f0bf09053c53940c6793d Mon Sep 17 00:00:00 2001 From: whitehatjr1001 Date: Wed, 22 Apr 2026 12:23:32 +0530 Subject: [PATCH 042/264] fix(gateway): enhance message handling during agent tasks with queue mode support --- gateway/run.py | 36 +++++++++++++----------- tests/gateway/test_busy_session_ack.py | 39 +++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 881f77cb71..db3f8b00d5 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1551,27 +1551,23 @@ class GatewayRunner: ) return True - # --- Normal busy case (agent actively running a task) --- - # The user sent a message while the agent is working. Interrupt the - # agent immediately so it stops the current tool-calling loop and - # processes the new message. The pending message is stored in the - # adapter so the base adapter picks it up once the interrupted run - # returns. A brief ack tells the user what's happening (debounced - # to avoid spam when they fire multiple messages quickly). - + # Normal busy case (agent actively running a task) adapter = self.adapters.get(event.source.platform) if not adapter: return False # let default path handle it # Store the message so it's processed as the next turn after the - # interrupt causes the current run to exit. + # current run finishes (or is interrupted). from gateway.platforms.base import merge_pending_message_event merge_pending_message_event(adapter._pending_messages, session_key, event) - # Interrupt the running agent — this aborts in-flight tool calls and - # causes the agent loop to exit at the next check point. + is_queue_mode = self._busy_input_mode == "queue" + + # If not in queue mode, interrupt the running agent immediately. + # This aborts in-flight tool calls and causes the agent loop to exit + # at the next check point. running_agent = self._running_agents.get(session_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL: try: running_agent.interrupt(event.text) except Exception: @@ -1583,7 +1579,7 @@ class GatewayRunner: now = time.time() last_ack = self._busy_ack_ts.get(session_key, 0) if now - last_ack < _BUSY_ACK_COOLDOWN: - return True # interrupt sent, ack already delivered recently + return True # interrupt sent (if not queue), ack already delivered recently self._busy_ack_ts[session_key] = now @@ -1608,10 +1604,16 @@ class GatewayRunner: pass status_detail = f" ({', '.join(status_parts)})" if status_parts else "" - message = ( - f"⚡ Interrupting current task{status_detail}. " - f"I'll respond to your message shortly." - ) + if is_queue_mode: + message = ( + f"⏳ Queued for the next turn{status_detail}. " + f"I'll respond once the current task finishes." + ) + else: + message = ( + f"⚡ Interrupting current task{status_detail}. " + f"I'll respond to your message shortly." + ) thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None try: diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py index 07fe5fa279..52d4c23df4 100644 --- a/tests/gateway/test_busy_session_ack.py +++ b/tests/gateway/test_busy_session_ack.py @@ -95,6 +95,7 @@ class TestBusySessionAck: async def test_sends_ack_when_agent_running(self): """First message during busy session should get a status ack.""" runner, sentinel = _make_runner() + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event = _make_event(text="Are you working?") @@ -127,16 +128,42 @@ class TestBusySessionAck: assert "Interrupting" in content or "respond" in content assert "/stop" not in content # no need — we ARE interrupting - # Verify message was queued in adapter pending - assert sk in adapter._pending_messages - # Verify agent interrupt was called agent.interrupt.assert_called_once_with("Are you working?") + @pytest.mark.asyncio + async def test_queue_mode_suppresses_interrupt_and_updates_ack(self): + """When busy_input_mode is 'queue', message is queued WITHOUT interrupt.""" + runner, sentinel = _make_runner() + runner._busy_input_mode = "queue" + adapter = _make_adapter() + + event = _make_event(text="Add this to queue") + sk = build_session_key(event.source) + runner.adapters[event.source.platform] = adapter + + agent = MagicMock() + runner._running_agents[sk] = agent + + with patch("gateway.run.merge_pending_message_event"): + await runner._handle_active_session_busy_message(event, sk) + + # VERIFY: Agent was NOT interrupted + agent.interrupt.assert_not_called() + + # VERIFY: Ack sent with queue-specific wording + adapter._send_with_retry.assert_called_once() + call_kwargs = adapter._send_with_retry.call_args + content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "") + assert "Queued for the next turn" in content + assert "respond once the current task finishes" in content + assert "Interrupting" not in content + @pytest.mark.asyncio async def test_debounce_suppresses_rapid_acks(self): """Second message within 30s should NOT send another ack.""" runner, sentinel = _make_runner() + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event1 = _make_event(text="hello?") @@ -172,13 +199,14 @@ class TestBusySessionAck: assert result2 is True assert adapter._send_with_retry.call_count == 1 # still 1, no new ack - # But interrupt should still be called for both + # But interrupt should still be called for both (since we are in interrupt mode) assert agent.interrupt.call_count == 2 @pytest.mark.asyncio async def test_ack_after_cooldown_expires(self): """After 30s cooldown, a new message should send a fresh ack.""" runner, sentinel = _make_runner() + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event = _make_event(text="hello?") @@ -212,6 +240,7 @@ class TestBusySessionAck: async def test_includes_status_detail(self): """Ack message should include iteration and tool info when available.""" runner, sentinel = _make_runner() + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event = _make_event(text="yo") @@ -243,6 +272,7 @@ class TestBusySessionAck: """Draining case should still produce the drain-specific message.""" runner, sentinel = _make_runner() runner._draining = True + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event = _make_event(text="hello") @@ -264,6 +294,7 @@ class TestBusySessionAck: async def test_pending_sentinel_no_interrupt(self): """When agent is PENDING_SENTINEL, don't call interrupt (it has no method).""" runner, sentinel = _make_runner() + runner._busy_input_mode = "interrupt" adapter = _make_adapter() event = _make_event(text="hey") From d001814e3f20c545fd2866f7458f45ac5014308c Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 15:11:52 -0700 Subject: [PATCH 043/264] chore(release): map rohithsaimidigudla@gmail.com -> whitehatjr1001 --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index c5cf669d0e..4d00fd4f1e 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -166,6 +166,7 @@ AUTHOR_MAP = { "seanalt555@gmail.com": "Salt-555", "satelerd@gmail.com": "satelerd", "numman.ali@gmail.com": "nummanali", + "rohithsaimidigudla@gmail.com": "whitehatjr1001", "0xNyk@users.noreply.github.com": "0xNyk", "0xnykcd@googlemail.com": "0xNyk", "buraysandro9@gmail.com": "buray", From d42b6a2eddc766c72ede28322d15006b3a078988 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:13:13 -0700 Subject: [PATCH 044/264] =?UTF-8?q?docs(agents):=20refresh=20AGENTS.md=20?= =?UTF-8?q?=E2=80=94=20fix=20stale=20facts,=20expand=20plugins/skills=20se?= =?UTF-8?q?ctions=20(#14763)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes several outright-wrong facts and gaps vs current main: - venv activation: .venv is preferred, venv is fallback (per run_tests.sh) - AIAgent default model is "" (empty, resolved from config), not hardcoded opus - Test suite is ~15k tests / ~700 files, not ~3000 - tools/mcp_tool.py is 2.6k LOC, not 1050 - Remove stale "currently 5" config_version note; the real bump-trigger rule is migration-only, not every new key - Remove MESSAGING_CWD as the messaging cwd — it's been removed in favor of terminal.cwd in config.yaml (gateway bridges to TERMINAL_CWD env var) - .env is secrets-only; non-secret settings belong in config.yaml - simple_term_menu pitfall: existing sites are legacy fallback, rule is no new usage Incomplete/missing sections filled in: - Gateway platforms list updated to reflect actual adapters (matrix, mattermost, email, sms, dingtalk, wecom, weixin, feishu, bluebubbles, webhook, api_server, etc.) - New 'Plugins' section covering general plugins, memory-provider plugins, and dashboard/context-engine/image-gen plugin directories — including the May 2026 rule that plugins must not touch core files - New 'Skills' section covering skills/ vs optional-skills/ split and SKILL.md frontmatter fields - Logs section pointing at ~/.hermes/logs/ and 'hermes logs' CLI - Prompt-cache policy now explicitly mentions --now / deferred slash-command invalidation pattern - Two new pitfalls: gateway two-guard dispatch rule, squash-merge-from-stale branch silent revert, don't-wire-dead-code rule Tree layout trimmed to load-bearing entry points — per-file subtrees were ~70% stale so replaced with directory-level notes pointing readers at the filesystem as the source of truth. --- AGENTS.md | 288 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 211 insertions(+), 77 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0f5ce15f28..ae78e005a0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent ## Development Environment ```bash -source venv/bin/activate # ALWAYS activate before running Python +# Prefer .venv; fall back to venv if that's what your checkout has. +source .venv/bin/activate # or: source venv/bin/activate ``` +`scripts/run_tests.sh` probes `.venv` first, then `venv`, then +`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the +main checkout). + ## Project Structure +File counts shift constantly — don't treat the tree below as exhaustive. +The canonical source is the filesystem. The notes call out the load-bearing +entry points you'll actually edit. + ``` hermes-agent/ -├── run_agent.py # AIAgent class — core conversation loop +├── run_agent.py # AIAgent class — core conversation loop (~12k LOC) ├── model_tools.py # Tool orchestration, discover_builtin_tools(), handle_function_call() ├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list -├── cli.py # HermesCLI class — interactive CLI orchestrator +├── cli.py # HermesCLI class — interactive CLI orchestrator (~11k LOC) ├── hermes_state.py # SessionDB — SQLite session store (FTS5 search) -├── agent/ # Agent internals -│ ├── prompt_builder.py # System prompt assembly -│ ├── context_compressor.py # Auto context compression -│ ├── prompt_caching.py # Anthropic prompt caching -│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization) -│ ├── model_metadata.py # Model context lengths, token estimation -│ ├── models_dev.py # models.dev registry integration (provider-aware context) -│ ├── display.py # KawaiiSpinner, tool preview formatting -│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway) -│ └── trajectory.py # Trajectory saving helpers -├── hermes_cli/ # CLI subcommands and setup -│ ├── main.py # Entry point — all `hermes` subcommands -│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration -│ ├── commands.py # Slash command definitions + SlashCommandCompleter -│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval) -│ ├── setup.py # Interactive setup wizard -│ ├── skin_engine.py # Skin/theme engine — CLI visual customization -│ ├── skills_config.py # `hermes skills` — enable/disable skills per platform -│ ├── tools_config.py # `hermes tools` — enable/disable tools per platform -│ ├── skills_hub.py # `/skills` slash command (search, browse, install) -│ ├── models.py # Model catalog, provider model lists -│ ├── model_switch.py # Shared /model switch pipeline (CLI + gateway) -│ └── auth.py # Provider credential resolution -├── tools/ # Tool implementations (one file per tool) -│ ├── registry.py # Central tool registry (schemas, handlers, dispatch) -│ ├── approval.py # Dangerous command detection -│ ├── terminal_tool.py # Terminal orchestration -│ ├── process_registry.py # Background process management -│ ├── file_tools.py # File read/write/search/patch -│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl) -│ ├── browser_tool.py # Browserbase browser automation -│ ├── code_execution_tool.py # execute_code sandbox -│ ├── delegate_tool.py # Subagent delegation -│ ├── mcp_tool.py # MCP client (~1050 lines) +├── hermes_constants.py # get_hermes_home(), display_hermes_home() — profile-aware paths +├── hermes_logging.py # setup_logging() — agent.log / errors.log / gateway.log (profile-aware) +├── batch_runner.py # Parallel batch processing +├── agent/ # Agent internals (provider adapters, memory, caching, compression, etc.) +├── hermes_cli/ # CLI subcommands, setup wizard, plugins loader, skin engine +├── tools/ # Tool implementations — auto-discovered via tools/registry.py │ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity) -├── gateway/ # Messaging platform gateway -│ ├── run.py # Main loop, slash commands, message dispatch -│ ├── session.py # SessionStore — conversation persistence -│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot +├── gateway/ # Messaging gateway — run.py + session.py + platforms/ +│ ├── platforms/ # Adapter per platform (telegram, discord, slack, whatsapp, +│ │ # homeassistant, signal, matrix, mattermost, email, sms, +│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles, +│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md. +│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...) +├── plugins/ # Plugin system (see "Plugins" section below) +│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...) +│ ├── context_engine/ # Context-engine plugins +│ └── / # Dashboard, image-gen, disk-cleanup, examples, ... +├── optional-skills/ # Heavier/niche skills shipped but NOT active by default +├── skills/ # Built-in skills bundled with the repo ├── ui-tui/ # Ink (React) terminal UI — `hermes --tui` -│ ├── src/entry.tsx # TTY gate + render() -│ ├── src/app.tsx # Main state machine and UI -│ ├── src/gatewayClient.ts # Child process + JSON-RPC bridge -│ ├── src/app/ # Decomposed app logic (event handler, slash handler, stores, hooks) -│ ├── src/components/ # Ink components (branding, markdown, prompts, pickers, etc.) -│ ├── src/hooks/ # useCompletion, useInputHistory, useQueue, useVirtualHistory -│ └── src/lib/ # Pure helpers (history, osc52, text, rpc, messages) +│ └── src/ # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib ├── tui_gateway/ # Python JSON-RPC backend for the TUI -│ ├── entry.py # stdio entrypoint -│ ├── server.py # RPC handlers and session logic -│ ├── render.py # Optional rich/ANSI bridge -│ └── slash_worker.py # Persistent HermesCLI subprocess for slash commands ├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration) -├── cron/ # Scheduler (jobs.py, scheduler.py) +├── cron/ # Scheduler — jobs.py, scheduler.py ├── environments/ # RL training environments (Atropos) -├── tests/ # Pytest suite (~3000 tests) -└── batch_runner.py # Parallel batch processing +├── scripts/ # run_tests.sh, release.py, auxiliary scripts +├── website/ # Docusaurus docs site +└── tests/ # Pytest suite (~15k tests across ~700 files as of Apr 2026) ``` -**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys) +**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only). +**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+), +`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`. +Browse with `hermes logs [--follow] [--level ...] [--session ...]`. ## File Dependency Chain @@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/ ## AIAgent Class (run_agent.py) +The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks, +session context, budget, credential pool, etc.). The signature below is the +minimum subset you'll usually touch — read `run_agent.py` for the full list. + ```python class AIAgent: def __init__(self, - model: str = "anthropic/claude-opus-4.6", - max_iterations: int = 90, + base_url: str = None, + api_key: str = None, + provider: str = None, + api_mode: str = None, # "chat_completions" | "codex_responses" | ... + model: str = "", # empty → resolved from config/provider later + max_iterations: int = 90, # tool-calling iterations (shared with subagents) enabled_toolsets: list = None, disabled_toolsets: list = None, quiet_mode: bool = False, save_trajectories: bool = False, - platform: str = None, # "cli", "telegram", etc. + platform: str = None, # "cli", "telegram", etc. session_id: str = None, skip_context_files: bool = False, skip_memory: bool = False, - # ... plus provider, api_mode, callbacks, routing params + credential_pool=None, + # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model, + # checkpoints config, prefill_messages, service_tier, reasoning_config, etc. ): ... def chat(self, message: str) -> str: @@ -120,10 +113,13 @@ class AIAgent: ### Agent Loop -The core loop is inside `run_conversation()` — entirely synchronous: +The core loop is inside `run_conversation()` — entirely synchronous, with +interrupt checks, budget tracking, and a one-turn grace call: ```python -while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0: +while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \ + or self._budget_grace_call: + if self._interrupt_requested: break response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas) if response.tool_calls: for tool_call in response.tool_calls: @@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining > return response.content ``` -Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`. +Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. +Reasoning content is stored in `assistant_msg["reasoning"]`. --- @@ -280,7 +277,7 @@ The registry handles schema collection, dispatch, availability checking, and err **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state. -**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern. +**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern. --- @@ -288,9 +285,13 @@ The registry handles schema collection, dispatch, availability checking, and err ### config.yaml options: 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py` -2. Bump `_config_version` (currently 5) to trigger migration for existing users +2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`) + ONLY if you need to actively migrate/transform existing user config + (renaming keys, changing structure). Adding a new key to an existing + section is handled automatically by the deep-merge and does NOT require + a version bump. -### .env variables: +### .env variables (SECRETS ONLY — API keys, tokens, passwords): 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata: ```python "NEW_API_KEY": { @@ -302,13 +303,29 @@ The registry handles schema collection, dispatch, availability checking, and err }, ``` -### Config loaders (two separate systems): +Non-secret settings (timeouts, thresholds, feature flags, paths, display +preferences) belong in `config.yaml`, not `.env`. If internal code needs an +env var mirror for backward compatibility, bridge it from `config.yaml` to +the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`). + +### Config loaders (three paths — know which one you're in): | Loader | Used by | Location | |--------|---------|----------| -| `load_cli_config()` | CLI mode | `cli.py` | -| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` | -| Direct YAML load | Gateway | `gateway/run.py` | +| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML | +| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML | +| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw | + +If you add a new key and the CLI sees it but the gateway doesn't (or vice +versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage. + +### Working directory: +- **CLI** — uses the process's current directory (`os.getcwd()`). +- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this + to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been + removed** — the config loader prints a deprecation warning if it's set in + `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is + `terminal.cwd` in `config.yaml`. --- @@ -401,7 +418,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml. --- +## Plugins + +Hermes has two plugin surfaces. Both live under `plugins/` in the repo so +repo-shipped plugins can be discovered alongside user-installed ones in +`~/.hermes/plugins/` and pip-installed entry points. + +### General plugins (`hermes_cli/plugins.py` + `plugins//`) + +`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`, +and pip entry points. Each plugin exposes a `register(ctx)` function that +can: + +- Register Python-callback lifecycle hooks: + `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`, + `on_session_start`, `on_session_end` +- Register new tools via `ctx.register_tool(...)` +- Register CLI subcommands via `ctx.register_cli_command(...)` — the + plugin's argparse tree is wired into `hermes` at startup so + `hermes ` works with no change to `main.py` + +Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py` +(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs +as a side effect of importing `model_tools.py`. Code paths that read plugin +state without importing `model_tools.py` first must call `discover_plugins()` +explicitly (it's idempotent). + +### Memory-provider plugins (`plugins/memory//`) + +Separate discovery system for pluggable memory backends. Current built-in +providers include **honcho, mem0, supermemory, byterover, hindsight, +holographic, openviking, retaindb**. + +Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`) +and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include +`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional +`post_setup(hermes_home, config)` for setup-wizard integration. + +**CLI commands via `plugins/memory//cli.py`:** if a memory plugin +defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds +it at argparse setup time and wires it into `hermes `. The +framework only exposes CLI commands for the **currently active** memory +provider (read from `memory.provider` in config.yaml), so disabled +providers don't clutter `hermes --help`. + +**Rule (Teknium, May 2026):** plugins MUST NOT modify core files +(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.). +If a plugin needs a capability the framework doesn't expose, expand the +generic plugin surface (new hook, new ctx method) — never hardcode +plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded +honcho argparse from `main.py` for exactly this reason. + +### Dashboard / context-engine / image-gen plugin directories + +`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`, +etc. follow the same pattern (ABC + orchestrator + per-plugin directory). +Context engines plug into `agent/context_engine.py`; image-gen providers +into `agent/image_gen_provider.py`. + +--- + +## Skills + +Two parallel surfaces: + +- **`skills/`** — built-in skills shipped and loadable by default. + Organized by category directories (e.g. `skills/github/`, `skills/mlops/`). +- **`optional-skills/`** — heavier or niche skills shipped with the repo but + NOT active by default. Installed explicitly via + `hermes skills install official//`. Adapter lives in + `tools/skills_hub.py` (`OptionalSkillSource`). Categories include + `autonomous-ai-agents`, `blockchain`, `communication`, `creative`, + `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`, + `research`, `security`, `web-development`. + +When reviewing skill PRs, check which directory they target — heavy-dep or +niche skills belong in `optional-skills/`. + +### SKILL.md frontmatter + +Standard fields: `name`, `description`, `version`, `platforms` +(OS-gating list: `[macos]`, `[linux, macos]`, ...), +`metadata.hermes.tags`, `metadata.hermes.category`, +`metadata.hermes.config` (config.yaml settings the skill needs — stored +under `skills.config.`, prompted during setup, injected at load time). + +--- + ## Important Policies + ### Prompt Caching Must Not Break Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:** @@ -411,9 +516,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression. -### Working Directory Behavior -- **CLI**: Uses current directory (`.` → `os.getcwd()`) -- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory) +Slash commands that mutate system-prompt state (skills, tools, memory, etc.) +must be **cache-aware**: default to deferred invalidation (change takes +effect next session), with an opt-in `--now` flag for immediate +invalidation. See `/skills install --now` for the canonical pattern. ### Background Process Notifications (Gateway) @@ -435,7 +541,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.). The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets -`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()` +`HERMES_HOME` before any module imports. All `get_hermes_home()` references automatically scope to the active profile. ### Rules for profile-safe code @@ -492,8 +598,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575. -### DO NOT use `simple_term_menu` for interactive menus -Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern. +### DO NOT introduce new `simple_term_menu` usage +Existing call sites in `hermes_cli/main.py` remain for legacy fallback only; +the preferred UI is curses (stdlib) because `simple_term_menu` has +ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New +interactive menus must use `hermes_cli/curses_ui.py` — see +`hermes_cli/tools_config.py` for the canonical pattern. ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`. @@ -504,6 +614,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p ### DO NOT hardcode cross-tool references in schema descriptions Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern. +### The gateway has TWO message guards — both must bypass approval/control commands +When an agent is running, messages pass through two sequential guards: +(1) **base adapter** (`gateway/platforms/base.py`) queues messages in +`_pending_messages` when `session_key in self._active_sessions`, and +(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`, +`/queue`, `/status`, `/approve`, `/deny` before they reach +`running_agent.interrupt()`. Any new command that must reach the runner +while the agent is blocked (e.g. approval prompts) MUST bypass BOTH +guards and be dispatched inline, not via `_process_message_background()` +(which races session lifecycle). + +### Squash merges from stale branches silently revert recent fixes +Before squash-merging a PR, ensure the branch is up to date with `main` +(`git fetch origin main && git reset --hard origin/main` in the worktree, +then re-apply the PR's commits). A stale branch's version of an unrelated +file will silently overwrite recent fixes on main when squashed. Verify +with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a +red flag. + +### Don't wire in dead code without E2E validation +Unused code that was never shipped was dead for a reason. Before wiring an +unused module into a live code path, E2E test the real resolution chain +with actual imports (not mocks) against a temp `HERMES_HOME`. + ### Tests must not write to `~/.hermes/` The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests. @@ -559,7 +693,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells pytest directly), at minimum activate the venv and pass `-n 4`: ```bash -source venv/bin/activate +source .venv/bin/activate # or: source venv/bin/activate python -m pytest tests/ -q -n 4 ``` From a5e4a86ebe124be1441f4552b11bbc746f6e0cd6 Mon Sep 17 00:00:00 2001 From: Julien Talbot Date: Thu, 23 Apr 2026 16:45:10 +0400 Subject: [PATCH 045/264] feat(xai): add xAI image generation provider (grok-imagine-image) Add xAI as a plugin-based image generation backend using grok-imagine-image. Follows the existing ImageGenProvider ABC pattern used by OpenAI and FAL. Changes: - plugins/image_gen/xai/__init__.py: xAI provider implementation - Uses xAI /images/generations endpoint - Supports text-to-image and image editing with reference images - Multiple aspect ratios (1:1, 16:9, 9:16, 4:3, 3:4, 3:2, 2:3) - Multiple resolutions (1K, 2K) - Base64 output saved to cache - Config via config.yaml image_gen.xai section - plugins/image_gen/xai/plugin.yaml: plugin metadata - tests/plugins/image_gen/test_xai_provider.py: 19 unit tests - Provider class (name, display_name, is_available, list_models, setup_schema) - Config (default model, resolution, custom model) - Generate (missing key, success b64/url, API error, timeout, empty response, reference images, auth header) - Registration Requires XAI_API_KEY in ~/.hermes/.env. To use: set image_gen.provider: xai in config.yaml. --- plugins/image_gen/xai/__init__.py | 324 +++++++++++++++++++ plugins/image_gen/xai/plugin.yaml | 7 + tests/plugins/image_gen/test_xai_provider.py | 260 +++++++++++++++ 3 files changed, 591 insertions(+) create mode 100644 plugins/image_gen/xai/__init__.py create mode 100644 plugins/image_gen/xai/plugin.yaml create mode 100644 tests/plugins/image_gen/test_xai_provider.py diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py new file mode 100644 index 0000000000..39e83e5ac8 --- /dev/null +++ b/plugins/image_gen/xai/__init__.py @@ -0,0 +1,324 @@ +"""xAI image generation backend. + +Exposes xAI's ``grok-imagine-image`` model as an +:class:`ImageGenProvider` implementation. + +Features: +- Text-to-image generation +- Image editing with reference images +- Multiple aspect ratios (1:1, 16:9, 9:16, etc.) +- Multiple resolutions (1K, 2K) +- Base64 output saved to cache + +Selection precedence (first hit wins): +1. ``XAI_IMAGE_MODEL`` env var +2. ``image_gen.xai.model`` in ``config.yaml`` +3. :data:`DEFAULT_MODEL` +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from agent.image_gen_provider import ( + DEFAULT_ASPECT_RATIO, + ImageGenProvider, + error_response, + resolve_aspect_ratio, + save_b64_image, + success_response, +) +from tools.xai_http import hermes_xai_user_agent + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Model catalog +# --------------------------------------------------------------------------- + +API_MODEL = "grok-imagine-image" + +_MODELS: Dict[str, Dict[str, Any]] = { + "grok-imagine-image": { + "display": "Grok Imagine Image", + "speed": "~5-10s", + "strengths": "Fast, high-quality, supports editing", + }, +} + +DEFAULT_MODEL = "grok-imagine-image" + +# xAI aspect ratios (more options than FAL/OpenAI) +_XAI_ASPECT_RATIOS = { + "landscape": "16:9", + "square": "1:1", + "portrait": "9:16", + "4:3": "4:3", + "3:4": "3:4", + "3:2": "3:2", + "2:3": "2:3", +} + +# xAI resolutions +_XAI_RESOLUTIONS = { + "1k": "1024", + "2k": "2048", +} + +DEFAULT_RESOLUTION = "1k" + + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + + +def _load_xai_config() -> Dict[str, Any]: + """Read ``image_gen.xai`` from config.yaml.""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + xai_section = section.get("xai") if isinstance(section, dict) else None + return xai_section if isinstance(xai_section, dict) else {} + except Exception as exc: + logger.debug("Could not load image_gen.xai config: %s", exc) + return {} + + +def _resolve_model() -> Tuple[str, Dict[str, Any]]: + """Decide which model to use and return ``(model_id, meta)``.""" + env_override = os.environ.get("XAI_IMAGE_MODEL") + if env_override and env_override in _MODELS: + return env_override, _MODELS[env_override] + + cfg = _load_xai_config() + candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None + if candidate and candidate in _MODELS: + return candidate, _MODELS[candidate] + + return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL] + + +def _resolve_resolution() -> str: + """Get configured resolution.""" + cfg = _load_xai_config() + res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None + if res and res in _XAI_RESOLUTIONS: + return res + return DEFAULT_RESOLUTION + + +# --------------------------------------------------------------------------- +# Provider +# --------------------------------------------------------------------------- + + +class XAIImageGenProvider(ImageGenProvider): + """xAI ``grok-imagine-image`` backend.""" + + @property + def name(self) -> str: + return "xai" + + @property + def display_name(self) -> str: + return "xAI (Grok)" + + def is_available(self) -> bool: + return bool(os.getenv("XAI_API_KEY")) + + def list_models(self) -> List[Dict[str, Any]]: + return [ + { + "id": model_id, + "display": meta.get("display", model_id), + "speed": meta.get("speed", ""), + "strengths": meta.get("strengths", ""), + } + for model_id, meta in _MODELS.items() + ] + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "xAI (Grok)", + "badge": "paid", + "tag": "Native xAI image generation via grok-imagine-image", + "env_vars": [ + { + "key": "XAI_API_KEY", + "prompt": "xAI API key", + "url": "https://console.x.ai/", + }, + ], + } + + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + """Generate an image using xAI's grok-imagine-image.""" + api_key = os.getenv("XAI_API_KEY", "").strip() + if not api_key: + return error_response( + error="XAI_API_KEY not set. Get one at https://console.x.ai/", + error_type="missing_api_key", + provider="xai", + aspect_ratio=aspect_ratio, + ) + + model_id, meta = _resolve_model() + aspect = resolve_aspect_ratio(aspect_ratio) + xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1") + resolution = _resolve_resolution() + xai_res = _XAI_RESOLUTIONS.get(resolution, "1024") + + # Check for editing mode (reference images) + reference_images = kwargs.get("reference_images", []) + edit_image = kwargs.get("edit_image") + + payload: Dict[str, Any] = { + "model": API_MODEL, + "prompt": prompt, + "aspect_ratio": xai_ar, + "resolution": xai_res, + } + + # Add editing parameters if present + if reference_images: + payload["reference_images"] = reference_images[:5] + if edit_image: + payload["image_url"] = edit_image + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + "User-Agent": hermes_xai_user_agent(), + } + + base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/") + + try: + response = requests.post( + f"{base_url}/images/generations", + headers=headers, + json=payload, + timeout=120, + ) + response.raise_for_status() + except requests.HTTPError as exc: + status = exc.response.status_code if exc.response else 0 + try: + err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300]) + except Exception: + err_msg = exc.response.text[:300] if exc.response else str(exc) + logger.error("xAI image gen failed (%d): %s", status, err_msg) + return error_response( + error=f"xAI image generation failed ({status}): {err_msg}", + error_type="api_error", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + except requests.Timeout: + return error_response( + error="xAI image generation timed out (120s)", + error_type="timeout", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + except requests.ConnectionError as exc: + return error_response( + error=f"xAI connection error: {exc}", + error_type="connection_error", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + try: + result = response.json() + except Exception as exc: + return error_response( + error=f"xAI returned invalid JSON: {exc}", + error_type="invalid_response", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + # Parse response — xAI returns data[0].b64_json or data[0].url + data = result.get("data", []) + if not data: + return error_response( + error="xAI returned no image data", + error_type="empty_response", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + first = data[0] + b64 = first.get("b64_json") + url = first.get("url") + + if b64: + try: + saved_path = save_b64_image(b64, prefix=f"xai_{model_id}") + except Exception as exc: + return error_response( + error=f"Could not save image to cache: {exc}", + error_type="io_error", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + image_ref = str(saved_path) + elif url: + image_ref = url + else: + return error_response( + error="xAI response contained neither b64_json nor URL", + error_type="empty_response", + provider="xai", + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + extra: Dict[str, Any] = {} + if reference_images: + extra["reference_images"] = len(reference_images) + + return success_response( + image=image_ref, + model=model_id, + prompt=prompt, + aspect_ratio=aspect, + provider="xai", + extra=extra if extra else None, + ) + + +# --------------------------------------------------------------------------- +# Plugin registration +# --------------------------------------------------------------------------- + + +def register(ctx: Any) -> None: + """Register this provider with the image gen registry.""" + ctx.register_image_gen_provider(XAIImageGenProvider()) diff --git a/plugins/image_gen/xai/plugin.yaml b/plugins/image_gen/xai/plugin.yaml new file mode 100644 index 0000000000..af735846a0 --- /dev/null +++ b/plugins/image_gen/xai/plugin.yaml @@ -0,0 +1,7 @@ +name: xai +version: 1.0.0 +description: "xAI image generation backend (grok-imagine-image). Supports text-to-image and editing." +author: Julien Talbot +kind: backend +requires_env: + - XAI_API_KEY diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py new file mode 100644 index 0000000000..b69e3e18d5 --- /dev/null +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +"""Tests for xAI image generation provider.""" + +from __future__ import annotations + +import json +import os +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _fake_api_key(monkeypatch): + """Ensure XAI_API_KEY is set for all tests.""" + monkeypatch.setenv("XAI_API_KEY", "test-key-12345") + + +# --------------------------------------------------------------------------- +# Provider class tests +# --------------------------------------------------------------------------- + + +class TestXAIImageGenProvider: + def test_name(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + assert provider.name == "xai" + + def test_display_name(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + assert provider.display_name == "xAI (Grok)" + + def test_is_available_with_key(self, monkeypatch): + monkeypatch.setenv("XAI_API_KEY", "sk-xxx") + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + assert provider.is_available() is True + + def test_is_available_without_key(self, monkeypatch): + monkeypatch.delenv("XAI_API_KEY", raising=False) + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + assert provider.is_available() is False + + def test_list_models(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + models = provider.list_models() + assert len(models) >= 1 + assert models[0]["id"] == "grok-imagine-image" + + def test_default_model(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + assert provider.default_model() == "grok-imagine-image" + + def test_get_setup_schema(self): + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + schema = provider.get_setup_schema() + assert schema["name"] == "xAI (Grok)" + assert schema["badge"] == "paid" + assert len(schema["env_vars"]) == 1 + assert schema["env_vars"][0]["key"] == "XAI_API_KEY" + + +# --------------------------------------------------------------------------- +# Config tests +# --------------------------------------------------------------------------- + + +class TestConfig: + def test_default_model(self): + from plugins.image_gen.xai import _resolve_model + + model_id, meta = _resolve_model() + assert model_id == "grok-imagine-image" + + def test_default_resolution(self): + from plugins.image_gen.xai import _resolve_resolution + + assert _resolve_resolution() == "1k" + + def test_custom_model(self, monkeypatch): + monkeypatch.setenv("XAI_IMAGE_MODEL", "grok-imagine-image") + from plugins.image_gen.xai import _resolve_model + + model_id, _ = _resolve_model() + assert model_id == "grok-imagine-image" + + +# --------------------------------------------------------------------------- +# Generate tests +# --------------------------------------------------------------------------- + + +class TestGenerate: + def test_missing_api_key(self, monkeypatch): + monkeypatch.delenv("XAI_API_KEY", raising=False) + from plugins.image_gen.xai import XAIImageGenProvider + + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + assert result["success"] is False + assert "XAI_API_KEY" in result["error"] + + def test_successful_generation(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": [{"b64_json": "dGVzdC1pbWFnZS1kYXRh"}], # base64 "test-image-data" + } + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp): + with patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"): + provider = XAIImageGenProvider() + result = provider.generate(prompt="A cat playing piano") + + assert result["success"] is True + assert result["image"] == "/tmp/test.png" + assert result["provider"] == "xai" + assert result["model"] == "grok-imagine-image" + + def test_successful_url_response(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": [{"url": "https://xai.image/result.png"}], + } + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp): + provider = XAIImageGenProvider() + result = provider.generate(prompt="A cat playing piano") + + assert result["success"] is True + assert result["image"] == "https://xai.image/result.png" + + def test_api_error(self): + import requests as req_lib + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 401 + mock_resp.text = "Unauthorized" + mock_resp.json.return_value = {"error": {"message": "Invalid API key"}} + mock_resp.raise_for_status.side_effect = req_lib.HTTPError(response=mock_resp) + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp): + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + + assert result["success"] is False + assert result["error_type"] == "api_error" + + def test_timeout(self): + import requests as req_lib + + from plugins.image_gen.xai import XAIImageGenProvider + + with patch("plugins.image_gen.xai.requests.post", side_effect=req_lib.Timeout()): + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + + assert result["success"] is False + assert result["error_type"] == "timeout" + + def test_empty_response(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = {"data": []} + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp): + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + + assert result["success"] is False + assert result["error_type"] == "empty_response" + + def test_with_reference_images(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": [{"url": "https://xai.image/edited.png"}], + } + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post: + provider = XAIImageGenProvider() + result = provider.generate( + prompt="Edit this image", + reference_images=["https://example.com/ref1.png", "https://example.com/ref2.png"], + ) + + assert result["success"] is True + # Check that reference_images was passed in payload + call_args = mock_post.call_args + payload = call_args.kwargs.get("json") or call_args[1].get("json") + assert "reference_images" in payload + assert len(payload["reference_images"]) == 2 + + def test_auth_header(self): + from plugins.image_gen.xai import XAIImageGenProvider + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.raise_for_status = MagicMock() + mock_resp.json.return_value = { + "data": [{"url": "https://xai.image/test.png"}], + } + + with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post: + provider = XAIImageGenProvider() + provider.generate(prompt="test") + + call_args = mock_post.call_args + headers = call_args.kwargs.get("headers") or call_args[1].get("headers") + assert "Bearer test-key-12345" in headers["Authorization"] + assert "Hermes-Agent" in headers["User-Agent"] + + +# --------------------------------------------------------------------------- +# Registration test +# --------------------------------------------------------------------------- + + +class TestRegistration: + def test_register(self): + from plugins.image_gen.xai import XAIImageGenProvider, register + + mock_ctx = MagicMock() + register(mock_ctx) + mock_ctx.register_image_gen_provider.assert_called_once() + provider = mock_ctx.register_image_gen_provider.call_args[0][0] + assert isinstance(provider, XAIImageGenProvider) + assert provider.name == "xai" From 9599271180cb301cabd15920c4d380e2b4c47c58 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Thu, 23 Apr 2026 15:12:05 -0700 Subject: [PATCH 046/264] fix(xai-image): drop unreachable editing code path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent-facing image_generate tool only passes prompt + aspect_ratio to provider.generate() (see tools/image_generation_tool.py:953). The editing block (reference_images / edit_image kwargs) could never fire from the tool surface, and the xAI edits endpoint is /images/edits with a different payload shape anyway — not /images/generations as submitted. - Remove reference_images / edit_image kwargs handling from generate() - Remove matching test_with_reference_images case - Update docstring + plugin.yaml description to text-to-image only - Surface resolution in the success extras Follow-up to PR #14547. Tests: 18/18 pass. --- plugins/image_gen/xai/__init__.py | 21 ++++------------- plugins/image_gen/xai/plugin.yaml | 2 +- tests/plugins/image_gen/test_xai_provider.py | 24 -------------------- 3 files changed, 6 insertions(+), 41 deletions(-) diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py index 39e83e5ac8..b1ec4368ef 100644 --- a/plugins/image_gen/xai/__init__.py +++ b/plugins/image_gen/xai/__init__.py @@ -5,7 +5,6 @@ Exposes xAI's ``grok-imagine-image`` model as an Features: - Text-to-image generation -- Image editing with reference images - Multiple aspect ratios (1:1, 16:9, 9:16, etc.) - Multiple resolutions (1K, 2K) - Base64 output saved to cache @@ -46,7 +45,7 @@ _MODELS: Dict[str, Dict[str, Any]] = { "grok-imagine-image": { "display": "Grok Imagine Image", "speed": "~5-10s", - "strengths": "Fast, high-quality, supports editing", + "strengths": "Fast, high-quality", }, } @@ -180,10 +179,6 @@ class XAIImageGenProvider(ImageGenProvider): resolution = _resolve_resolution() xai_res = _XAI_RESOLUTIONS.get(resolution, "1024") - # Check for editing mode (reference images) - reference_images = kwargs.get("reference_images", []) - edit_image = kwargs.get("edit_image") - payload: Dict[str, Any] = { "model": API_MODEL, "prompt": prompt, @@ -191,12 +186,6 @@ class XAIImageGenProvider(ImageGenProvider): "resolution": xai_res, } - # Add editing parameters if present - if reference_images: - payload["reference_images"] = reference_images[:5] - if edit_image: - payload["image_url"] = edit_image - headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", @@ -300,9 +289,9 @@ class XAIImageGenProvider(ImageGenProvider): aspect_ratio=aspect, ) - extra: Dict[str, Any] = {} - if reference_images: - extra["reference_images"] = len(reference_images) + extra: Dict[str, Any] = { + "resolution": xai_res, + } return success_response( image=image_ref, @@ -310,7 +299,7 @@ class XAIImageGenProvider(ImageGenProvider): prompt=prompt, aspect_ratio=aspect, provider="xai", - extra=extra if extra else None, + extra=extra, ) diff --git a/plugins/image_gen/xai/plugin.yaml b/plugins/image_gen/xai/plugin.yaml index af735846a0..1bebc7d725 100644 --- a/plugins/image_gen/xai/plugin.yaml +++ b/plugins/image_gen/xai/plugin.yaml @@ -1,6 +1,6 @@ name: xai version: 1.0.0 -description: "xAI image generation backend (grok-imagine-image). Supports text-to-image and editing." +description: "xAI image generation backend (grok-imagine-image). Text-to-image." author: Julien Talbot kind: backend requires_env: diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py index b69e3e18d5..ab1bf88345 100644 --- a/tests/plugins/image_gen/test_xai_provider.py +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -199,30 +199,6 @@ class TestGenerate: assert result["success"] is False assert result["error_type"] == "empty_response" - def test_with_reference_images(self): - from plugins.image_gen.xai import XAIImageGenProvider - - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.raise_for_status = MagicMock() - mock_resp.json.return_value = { - "data": [{"url": "https://xai.image/edited.png"}], - } - - with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post: - provider = XAIImageGenProvider() - result = provider.generate( - prompt="Edit this image", - reference_images=["https://example.com/ref1.png", "https://example.com/ref2.png"], - ) - - assert result["success"] is True - # Check that reference_images was passed in payload - call_args = mock_post.call_args - payload = call_args.kwargs.get("json") or call_args[1].get("json") - assert "reference_images" in payload - assert len(payload["reference_images"]) == 2 - def test_auth_header(self): from plugins.image_gen.xai import XAIImageGenProvider From 1df35a93b20b92c65e9cb312c3e12d93cb9c2251 Mon Sep 17 00:00:00 2001 From: Devorun <130918800+devorun@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:57:49 +0300 Subject: [PATCH 047/264] Fix (mixture_of_agents): replace deprecated Gemini model and forward max_tokens to OpenRouter (#6621) --- tools/mixture_of_agents_tool.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/mixture_of_agents_tool.py b/tools/mixture_of_agents_tool.py index 8bbc187928..a34e99aa8f 100644 --- a/tools/mixture_of_agents_tool.py +++ b/tools/mixture_of_agents_tool.py @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) # Keep this list aligned with current top-tier OpenRouter frontier options. REFERENCE_MODELS = [ "anthropic/claude-opus-4.6", - "google/gemini-3-pro-preview", + "google/gemini-2.5-pro", "openai/gpt-5.4-pro", "deepseek/deepseek-v3.2", ] @@ -129,6 +129,7 @@ async def _run_reference_model_safe( api_params = { "model": model, "messages": [{"role": "user", "content": user_prompt}], + "max_tokens": max_tokens, "extra_body": { "reasoning": { "enabled": True, @@ -203,6 +204,7 @@ async def _run_aggregator_model( {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], + "max_tokens": max_tokens, "extra_body": { "reasoning": { "enabled": True, From 5e67b384377b7a76a55395d1553d74c94d76ba2f Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 15:12:30 -0700 Subject: [PATCH 048/264] chore(release): map devorun author + convert MoA defaults test to invariant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AUTHOR_MAP entry for 130918800+devorun for #6636 attribution - test_moa_defaults: was a change-detector tied to the exact frontier model list — flips red every OpenRouter churn. Rewritten as an invariant (non-empty, valid vendor/model slugs). --- scripts/release.py | 1 + tests/tools/test_mixture_of_agents_tool.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/release.py b/scripts/release.py index 4d00fd4f1e..dda2a366a8 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ AUTHOR_MAP = { "127238744+teknium1@users.noreply.github.com": "teknium1", "343873859@qq.com": "DrStrangerUJN", "jefferson@heimdallstrategy.com": "Mind-Dragon", + "130918800+devorun@users.noreply.github.com": "devorun", # contributors (from noreply pattern) "david.vv@icloud.com": "davidvv", "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", diff --git a/tests/tools/test_mixture_of_agents_tool.py b/tests/tools/test_mixture_of_agents_tool.py index 84d1ffece5..686922f892 100644 --- a/tests/tools/test_mixture_of_agents_tool.py +++ b/tests/tools/test_mixture_of_agents_tool.py @@ -8,14 +8,17 @@ import pytest moa = importlib.import_module("tools.mixture_of_agents_tool") -def test_moa_defaults_track_current_openrouter_frontier_models(): - assert moa.REFERENCE_MODELS == [ - "anthropic/claude-opus-4.6", - "google/gemini-3-pro-preview", - "openai/gpt-5.4-pro", - "deepseek/deepseek-v3.2", - ] - assert moa.AGGREGATOR_MODEL == "anthropic/claude-opus-4.6" +def test_moa_defaults_are_well_formed(): + # Invariants, not a catalog snapshot: the exact model list churns with + # OpenRouter availability (see PR #6636 where gemini-3-pro-preview was + # removed upstream). What we care about is that the defaults are present + # and valid vendor/model slugs. + assert isinstance(moa.REFERENCE_MODELS, list) + assert len(moa.REFERENCE_MODELS) >= 1 + for m in moa.REFERENCE_MODELS: + assert isinstance(m, str) and "/" in m and not m.startswith("/") + assert isinstance(moa.AGGREGATOR_MODEL, str) + assert "/" in moa.AGGREGATOR_MODEL @pytest.mark.asyncio From 0086fd894d1c4d313f87b134afb1750978f238fa Mon Sep 17 00:00:00 2001 From: say8hi Date: Wed, 8 Apr 2026 15:38:36 +0500 Subject: [PATCH 049/264] feat(cron): support enabled_toolsets per job to reduce token overhead --- cron/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cron/scheduler.py b/cron/scheduler.py index e7a22dfbe5..9797703744 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -886,6 +886,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: providers_ignored=pr.get("ignore"), providers_order=pr.get("order"), provider_sort=pr.get("sort"), + enabled_toolsets=job.get("enabled_toolsets") or None, disabled_toolsets=["cronjob", "messaging", "clarify"], quiet_mode=True, skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd From 8b79acb8de64009da7f4f6d16d674b8873d80700 Mon Sep 17 00:00:00 2001 From: say8hi Date: Wed, 8 Apr 2026 15:50:46 +0500 Subject: [PATCH 050/264] feat(cron): expose enabled_toolsets in cronjob tool and create_job() --- cron/jobs.py | 7 +++++++ tools/cronjob_tools.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/cron/jobs.py b/cron/jobs.py index 8fb3f868a9..4d34b1534b 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -384,6 +384,7 @@ def create_job( provider: Optional[str] = None, base_url: Optional[str] = None, script: Optional[str] = None, + enabled_toolsets: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Create a new cron job. @@ -403,6 +404,9 @@ def create_job( script: Optional path to a Python script whose stdout is injected into the prompt each run. The script runs before the agent turn, and its output is prepended as context. Useful for data collection / change detection. + enabled_toolsets: Optional list of toolset names to restrict the agent to. + When set, only tools from these toolsets are loaded, reducing + token overhead. When omitted, all default tools are loaded. Returns: The created job dict @@ -433,6 +437,8 @@ def create_job( normalized_base_url = normalized_base_url or None normalized_script = str(script).strip() if isinstance(script, str) else None normalized_script = normalized_script or None + normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None + normalized_toolsets = normalized_toolsets or None label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job" job = { @@ -464,6 +470,7 @@ def create_job( # Delivery configuration "deliver": deliver, "origin": origin, # Tracks where job was created for "origin" delivery + "enabled_toolsets": normalized_toolsets, } jobs = load_jobs() diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 8a685a8ccb..b6aacf54ec 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -215,6 +215,8 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]: } if job.get("script"): result["script"] = job["script"] + if job.get("enabled_toolsets"): + result["enabled_toolsets"] = job["enabled_toolsets"] return result @@ -234,6 +236,7 @@ def cronjob( base_url: Optional[str] = None, reason: Optional[str] = None, script: Optional[str] = None, + enabled_toolsets: Optional[List[str]] = None, task_id: str = None, ) -> str: """Unified cron job management tool.""" @@ -271,6 +274,7 @@ def cronjob( provider=_normalize_optional_job_value(provider), base_url=_normalize_optional_job_value(base_url, strip_trailing_slash=True), script=_normalize_optional_job_value(script), + enabled_toolsets=enabled_toolsets or None, ) return json.dumps( { @@ -360,6 +364,8 @@ def cronjob( if script_error: return tool_error(script_error, success=False) updates["script"] = _normalize_optional_job_value(script) if script else None + if enabled_toolsets is not None: + updates["enabled_toolsets"] = enabled_toolsets or None if repeat is not None: # Normalize: treat 0 or negative as None (infinite) normalized_repeat = None if repeat <= 0 else repeat @@ -459,6 +465,11 @@ Important safety rule: cron-run sessions should not recursively schedule more cr "type": "string", "description": f"Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under {display_hermes_home()}/scripts/. On update, pass empty string to clear." }, + "enabled_toolsets": { + "type": "array", + "items": {"type": "string"}, + "description": "Optional list of toolset names to restrict the job's agent to (e.g. [\"web\", \"terminal\", \"file\", \"delegation\"]). When set, only tools from these toolsets are loaded, significantly reducing input token overhead. When omitted, all default tools are loaded. Infer from the job's prompt — e.g. use \"web\" if it calls web_search, \"terminal\" if it runs scripts, \"file\" if it reads files, \"delegation\" if it calls delegate_task. On update, pass an empty array to clear." + }, }, "required": ["action"] } @@ -503,6 +514,7 @@ registry.register( base_url=args.get("base_url"), reason=args.get("reason"), script=args.get("script"), + enabled_toolsets=args.get("enabled_toolsets"), task_id=kw.get("task_id"), ))(), check_fn=check_cronjob_requirements, From 18d5ba86764b219fbfdad02e337b12e794953b8f Mon Sep 17 00:00:00 2001 From: say8hi Date: Wed, 8 Apr 2026 16:27:51 +0500 Subject: [PATCH 051/264] test(cron): add tests for enabled_toolsets in create_job and run_job --- tests/cron/test_jobs.py | 29 +++++++++++++++++++ tests/cron/test_scheduler.py | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index e0f56b9612..6a9185f072 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -566,6 +566,35 @@ class TestGetDueJobs: assert get_job("oneshot-stale")["next_run_at"] is None +class TestEnabledToolsets: + def test_enabled_toolsets_stored(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "terminal"]) + assert job["enabled_toolsets"] == ["web", "terminal"] + + def test_enabled_toolsets_persisted(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "file"]) + fetched = get_job(job["id"]) + assert fetched["enabled_toolsets"] == ["web", "file"] + + def test_enabled_toolsets_none_when_omitted(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h") + assert job["enabled_toolsets"] is None + + def test_enabled_toolsets_empty_list_normalizes_to_none(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=[]) + assert job["enabled_toolsets"] is None + + def test_enabled_toolsets_whitespace_entries_stripped(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", " ", "file"]) + assert job["enabled_toolsets"] == ["web", "file"] + + def test_enabled_toolsets_updated_via_update_job(self, tmp_cron_dir): + job = create_job(prompt="monitor", schedule="every 1h") + update_job(job["id"], {"enabled_toolsets": ["web", "delegation"]}) + fetched = get_job(job["id"]) + assert fetched["enabled_toolsets"] == ["web", "delegation"] + + class TestSaveJobOutput: def test_creates_output_file(self, tmp_cron_dir): output_file = save_job_output("test123", "# Results\nEverything ok.") diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 524490eb09..421d6859d9 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -673,6 +673,60 @@ class TestRunJobSessionPersistence: assert call_args[0][1] == "cron_complete" fake_db.close.assert_called_once() + def _make_run_job_patches(self, tmp_path): + """Common patches for run_job tests.""" + fake_db = MagicMock() + return fake_db, [ + patch("cron.scheduler._hermes_home", tmp_path), + patch("cron.scheduler._resolve_origin", return_value=None), + patch("dotenv.load_dotenv"), + patch("hermes_state.SessionDB", return_value=fake_db), + patch( + "hermes_cli.runtime_provider.resolve_runtime_provider", + return_value={ + "api_key": "test-key", + "base_url": "https://example.invalid/v1", + "provider": "openrouter", + "api_mode": "chat_completions", + }, + ), + ] + + def test_run_job_passes_enabled_toolsets_to_agent(self, tmp_path): + job = { + "id": "toolset-job", + "name": "test", + "prompt": "hello", + "enabled_toolsets": ["web", "terminal", "file"], + } + fake_db, patches = self._make_run_job_patches(tmp_path) + with patches[0], patches[1], patches[2], patches[3], patches[4], \ + patch("run_agent.AIAgent") as mock_agent_cls: + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + run_job(job) + + kwargs = mock_agent_cls.call_args.kwargs + assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"] + + def test_run_job_enabled_toolsets_none_when_not_set(self, tmp_path): + job = { + "id": "no-toolset-job", + "name": "test", + "prompt": "hello", + } + fake_db, patches = self._make_run_job_patches(tmp_path) + with patches[0], patches[1], patches[2], patches[3], patches[4], \ + patch("run_agent.AIAgent") as mock_agent_cls: + mock_agent = MagicMock() + mock_agent.run_conversation.return_value = {"final_response": "ok"} + mock_agent_cls.return_value = mock_agent + run_job(job) + + kwargs = mock_agent_cls.call_args.kwargs + assert kwargs["enabled_toolsets"] is None + def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path): """Empty final_response should stay empty for delivery logic (issue #2234). From 470389e6a30ab77c92a26c911f96d754209c8589 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 23 Apr 2026 15:14:47 -0700 Subject: [PATCH 052/264] chore(release): map say8hi author for #6130 salvage --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index dda2a366a8..345214460a 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -46,6 +46,7 @@ AUTHOR_MAP = { "343873859@qq.com": "DrStrangerUJN", "jefferson@heimdallstrategy.com": "Mind-Dragon", "130918800+devorun@users.noreply.github.com": "devorun", + "maks.mir@yahoo.com": "say8hi", # contributors (from noreply pattern) "david.vv@icloud.com": "davidvv", "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", From f593c367bec49157efc1c5fe4ccb4c85866588e6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 23 Apr 2026 15:31:01 -0700 Subject: [PATCH 053/264] feat(dashboard): reskin extension points for themes and plugins (#14776) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Themes and plugins can now pull off arbitrary dashboard reskins (cockpit HUD, retro terminal, etc.) without touching core code. Themes gain four new fields: - layoutVariant: standard | cockpit | tiled — shell layout selector - assets: {bg, hero, logo, crest, sidebar, header, custom: {...}} — artwork URLs exposed as --theme-asset-* CSS vars - customCSS: raw CSS injected as a scoped + + + + + +``` + +Key implementation patterns: +- **Seeded randomness**: Always `randomSeed()` + `noiseSeed()` for reproducibility +- **Color mode**: Use `colorMode(HSB, 360, 100, 100, 100)` for intuitive color control +- **State separation**: CONFIG for parameters, PALETTE for colors, globals for mutable state +- **Class-based entities**: Particles, agents, shapes as classes with `update()` + `display()` methods +- **Offscreen buffers**: `createGraphics()` for layered composition, trails, masks + +### Step 4: Preview & Iterate + +- Open HTML file directly in browser — no server needed for basic sketches +- For `loadImage()`/`loadFont()` from local files: use `scripts/serve.sh` or `python3 -m http.server` +- Chrome DevTools Performance tab to verify 60fps +- Test at target export resolution, not just the window size +- Adjust parameters until the visual matches the concept from Step 1 + +### Step 5: Export + +| Format | Method | Command | +|--------|--------|---------| +| **PNG** | `saveCanvas('output', 'png')` in `keyPressed()` | Press 's' to save | +| **High-res PNG** | Puppeteer headless capture | `node scripts/export-frames.js sketch.html --width 3840 --height 2160 --frames 1` | +| **GIF** | `saveGif('output', 5)` — captures N seconds | Press 'g' to save | +| **Frame sequence** | `saveFrames('frame', 'png', 10, 30)` — 10s at 30fps | Then `ffmpeg -i frame-%04d.png -c:v libx264 output.mp4` | +| **MP4** | Puppeteer frame capture + ffmpeg | `bash scripts/render.sh sketch.html output.mp4 --duration 30 --fps 30` | +| **SVG** | `createCanvas(w, h, SVG)` with p5.js-svg | `save('output.svg')` | + +### Step 6: Quality Verification + +- **Does it match the vision?** Compare output to the creative concept. If it looks generic, go back to Step 1 +- **Resolution check**: Is it sharp at the target display size? No aliasing artifacts? +- **Performance check**: Does it hold 60fps in browser? (30fps minimum for animations) +- **Color check**: Do the colors work together? Test on both light and dark monitors +- **Edge cases**: What happens at canvas edges? On resize? After running for 10 minutes? + +## Critical Implementation Notes + +### Performance — Disable FES First + +The Friendly Error System (FES) adds up to 10x overhead. Disable it in every production sketch: + +```javascript +p5.disableFriendlyErrors = true; // BEFORE setup() + +function setup() { + pixelDensity(1); // prevent 2x-4x overdraw on retina + createCanvas(1920, 1080); +} +``` + +In hot loops (particles, pixel ops), use `Math.*` instead of p5 wrappers — measurably faster: + +```javascript +// In draw() or update() hot paths: +let a = Math.sin(t); // not sin(t) +let r = Math.sqrt(dx*dx+dy*dy); // not dist() — or better: skip sqrt, compare magSq +let v = Math.random(); // not random() — when seed not needed +let m = Math.min(a, b); // not min(a, b) +``` + +Never `console.log()` inside `draw()`. Never manipulate DOM in `draw()`. See `references/troubleshooting.md` § Performance. + +### Seeded Randomness — Always + +Every generative sketch must be reproducible. Same seed, same output. + +```javascript +function setup() { + randomSeed(CONFIG.seed); + noiseSeed(CONFIG.seed); + // All random() and noise() calls now deterministic +} +``` + +Never use `Math.random()` for generative content — only for performance-critical non-visual code. Always `random()` for visual elements. If you need a random seed: `CONFIG.seed = floor(random(99999))`. + +### Generative Art Platform Support (fxhash / Art Blocks) + +For generative art platforms, replace p5's PRNG with the platform's deterministic random: + +```javascript +// fxhash convention +const SEED = $fx.hash; // unique per mint +const rng = $fx.rand; // deterministic PRNG +$fx.features({ palette: 'warm', complexity: 'high' }); + +// In setup(): +randomSeed(SEED); // for p5's noise() +noiseSeed(SEED); + +// Replace random() with rng() for platform determinism +let x = rng() * width; // instead of random(width) +``` + +See `references/export-pipeline.md` § Platform Export. + +### Color Mode — Use HSB + +HSB (Hue, Saturation, Brightness) is dramatically easier to work with than RGB for generative art: + +```javascript +colorMode(HSB, 360, 100, 100, 100); +// Now: fill(hue, sat, bri, alpha) +// Rotate hue: fill((baseHue + offset) % 360, 80, 90) +// Desaturate: fill(hue, sat * 0.3, bri) +// Darken: fill(hue, sat, bri * 0.5) +``` + +Never hardcode raw RGB values. Define a palette object, derive variations procedurally. See `references/color-systems.md`. + +### Noise — Multi-Octave, Not Raw + +Raw `noise(x, y)` looks like smooth blobs. Layer octaves for natural texture: + +```javascript +function fbm(x, y, octaves = 4) { + let val = 0, amp = 1, freq = 1, sum = 0; + for (let i = 0; i < octaves; i++) { + val += noise(x * freq, y * freq) * amp; + sum += amp; + amp *= 0.5; + freq *= 2; + } + return val / sum; +} +``` + +For flowing organic forms, use **domain warping**: feed noise output back as noise input coordinates. See `references/visual-effects.md`. + +### createGraphics() for Layers — Not Optional + +Flat single-pass rendering looks flat. Use offscreen buffers for composition: + +```javascript +let bgLayer, fgLayer, trailLayer; +function setup() { + createCanvas(1920, 1080); + bgLayer = createGraphics(width, height); + fgLayer = createGraphics(width, height); + trailLayer = createGraphics(width, height); +} +function draw() { + renderBackground(bgLayer); + renderTrails(trailLayer); // persistent, fading + renderForeground(fgLayer); // cleared each frame + image(bgLayer, 0, 0); + image(trailLayer, 0, 0); + image(fgLayer, 0, 0); +} +``` + +### Performance — Vectorize Where Possible + +p5.js draw calls are expensive. For thousands of particles: + +```javascript +// SLOW: individual shapes +for (let p of particles) { + ellipse(p.x, p.y, p.size); +} + +// FAST: single shape with beginShape() +beginShape(POINTS); +for (let p of particles) { + vertex(p.x, p.y); +} +endShape(); + +// FASTEST: pixel buffer for massive counts +loadPixels(); +for (let p of particles) { + let idx = 4 * (floor(p.y) * width + floor(p.x)); + pixels[idx] = r; pixels[idx+1] = g; pixels[idx+2] = b; pixels[idx+3] = 255; +} +updatePixels(); +``` + +See `references/troubleshooting.md` § Performance. + +### Instance Mode for Multiple Sketches + +Global mode pollutes `window`. For production, use instance mode: + +```javascript +const sketch = (p) => { + p.setup = function() { + p.createCanvas(800, 800); + }; + p.draw = function() { + p.background(0); + p.ellipse(p.mouseX, p.mouseY, 50); + }; +}; +new p5(sketch, 'canvas-container'); +``` + +Required when embedding multiple sketches on one page or integrating with frameworks. + +### WebGL Mode Gotchas + +- `createCanvas(w, h, WEBGL)` — origin is center, not top-left +- Y-axis is inverted (positive Y goes up in WEBGL, down in P2D) +- `translate(-width/2, -height/2)` to get P2D-like coordinates +- `push()`/`pop()` around every transform — matrix stack overflows silently +- `texture()` before `rect()`/`plane()` — not after +- Custom shaders: `createShader(vert, frag)` — test on multiple browsers + +### Export — Key Bindings Convention + +Every sketch should include these in `keyPressed()`: + +```javascript +function keyPressed() { + if (key === 's' || key === 'S') saveCanvas('output', 'png'); + if (key === 'g' || key === 'G') saveGif('output', 5); + if (key === 'r' || key === 'R') { randomSeed(millis()); noiseSeed(millis()); } + if (key === ' ') CONFIG.paused = !CONFIG.paused; +} +``` + +### Headless Video Export — Use noLoop() + +For headless rendering via Puppeteer, the sketch **must** use `noLoop()` in setup. Without it, p5's draw loop runs freely while screenshots are slow — the sketch races ahead and you get skipped/duplicate frames. + +```javascript +function setup() { + createCanvas(1920, 1080); + pixelDensity(1); + noLoop(); // capture script controls frame advance + window._p5Ready = true; // signal readiness to capture script +} +``` + +The bundled `scripts/export-frames.js` detects `_p5Ready` and calls `redraw()` once per capture for exact 1:1 frame correspondence. See `references/export-pipeline.md` § Deterministic Capture. + +For multi-scene videos, use the per-clip architecture: one HTML per scene, render independently, stitch with `ffmpeg -f concat`. See `references/export-pipeline.md` § Per-Clip Architecture. + +### Agent Workflow + +When building p5.js sketches: + +1. **Write the HTML file** — single self-contained file, all code inline +2. **Open in browser** — `open sketch.html` (macOS) or `xdg-open sketch.html` (Linux) +3. **Local assets** (fonts, images) require a server: `python3 -m http.server 8080` in the project directory, then open `http://localhost:8080/sketch.html` +4. **Export PNG/GIF** — add `keyPressed()` shortcuts as shown above, tell the user which key to press +5. **Headless export** — `node scripts/export-frames.js sketch.html --frames 300` for automated frame capture (sketch must use `noLoop()` + `_p5Ready`) +6. **MP4 rendering** — `bash scripts/render.sh sketch.html output.mp4 --duration 30` +7. **Iterative refinement** — edit the HTML file, user refreshes browser to see changes +8. **Load references on demand** — use `skill_view(name="p5js", file_path="references/...")` to load specific reference files as needed during implementation + +## Performance Targets + +| Metric | Target | +|--------|--------| +| Frame rate (interactive) | 60fps sustained | +| Frame rate (animated export) | 30fps minimum | +| Particle count (P2D shapes) | 5,000-10,000 at 60fps | +| Particle count (pixel buffer) | 50,000-100,000 at 60fps | +| Canvas resolution | Up to 3840x2160 (export), 1920x1080 (interactive) | +| File size (HTML) | < 100KB (excluding CDN libraries) | +| Load time | < 2s to first frame | + +## References + +| File | Contents | +|------|----------| +| `references/core-api.md` | Canvas setup, coordinate system, draw loop, `push()`/`pop()`, offscreen buffers, composition patterns, `pixelDensity()`, responsive design | +| `references/shapes-and-geometry.md` | 2D primitives, `beginShape()`/`endShape()`, Bezier/Catmull-Rom curves, `vertex()` systems, custom shapes, `p5.Vector`, signed distance fields, SVG path conversion | +| `references/visual-effects.md` | Noise (Perlin, fractal, domain warp, curl), flow fields, particle systems (physics, flocking, trails), pixel manipulation, texture generation (stipple, hatch, halftone), feedback loops, reaction-diffusion | +| `references/animation.md` | Frame-based animation, easing functions, `lerp()`/`map()`, spring physics, state machines, timeline sequencing, `millis()`-based timing, transition patterns | +| `references/typography.md` | `text()`, `loadFont()`, `textToPoints()`, kinetic typography, text masks, font metrics, responsive text sizing | +| `references/color-systems.md` | `colorMode()`, HSB/HSL/RGB, `lerpColor()`, `paletteLerp()`, procedural palettes, color harmony, `blendMode()`, gradient rendering, curated palette library | +| `references/webgl-and-3d.md` | WEBGL renderer, 3D primitives, camera, lighting, materials, custom geometry, GLSL shaders (`createShader()`, `createFilterShader()`), framebuffers, post-processing | +| `references/interaction.md` | Mouse events, keyboard state, touch input, DOM elements, `createSlider()`/`createButton()`, audio input (p5.sound FFT/amplitude), scroll-driven animation, responsive events | +| `references/export-pipeline.md` | `saveCanvas()`, `saveGif()`, `saveFrames()`, deterministic headless capture, ffmpeg frame-to-video, CCapture.js, SVG export, per-clip architecture, platform export (fxhash), video gotchas | +| `references/troubleshooting.md` | Performance profiling, per-pixel budgets, common mistakes, browser compatibility, WebGL debugging, font loading issues, pixel density traps, memory leaks, CORS | +| `templates/viewer.html` | Interactive viewer template: seed navigation (prev/next/random/jump), parameter sliders, download PNG, responsive canvas. Start from this for explorable generative art | + +--- + +## Creative Divergence (use only when user requests experimental/creative/unique output) + +If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code. + +- **Conceptual Blending** — when the user names two things to combine or wants hybrid aesthetics +- **SCAMPER** — when the user wants a twist on a known generative art pattern +- **Distance Association** — when the user gives a single concept and wants exploration ("make something about time") + +### Conceptual Blending +1. Name two distinct visual systems (e.g., particle physics + handwriting) +2. Map correspondences (particles = ink drops, forces = pen pressure, fields = letterforms) +3. Blend selectively — keep mappings that produce interesting emergent visuals +4. Code the blend as a unified system, not two systems side-by-side + +### SCAMPER Transformation +Take a known generative pattern (flow field, particle system, L-system, cellular automata) and systematically transform it: +- **Substitute**: replace circles with text characters, lines with gradients +- **Combine**: merge two patterns (flow field + voronoi) +- **Adapt**: apply a 2D pattern to a 3D projection +- **Modify**: exaggerate scale, warp the coordinate space +- **Purpose**: use a physics sim for typography, a sorting algorithm for color +- **Eliminate**: remove the grid, remove color, remove symmetry +- **Reverse**: run the simulation backward, invert the parameter space + +### Distance Association +1. Anchor on the user's concept (e.g., "loneliness") +2. Generate associations at three distances: + - Close (obvious): empty room, single figure, silence + - Medium (interesting): one fish in a school swimming the wrong way, a phone with no notifications, the gap between subway cars + - Far (abstract): prime numbers, asymptotic curves, the color of 3am +3. Develop the medium-distance associations — they're specific enough to visualize but unexpected enough to be interesting diff --git a/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md new file mode 100644 index 0000000000..beecb38f08 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-pixel-art.md @@ -0,0 +1,232 @@ +--- +title: "Pixel Art — Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +sidebar_label: "Pixel Art" +description: "Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pixel Art + +Convert images into retro pixel art with hardware-accurate palettes (NES, Game Boy, PICO-8, C64, etc.), and animate them into short videos. Presets cover arcade, SNES, and 10+ era-correct looks. Use `clarify` to let the user pick a style before generating. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/pixel-art` | +| Version | `2.0.0` | +| Author | dodo-reach | +| License | MIT | +| Tags | `creative`, `pixel-art`, `arcade`, `snes`, `nes`, `gameboy`, `retro`, `image`, `video` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pixel Art + +Convert any image into retro pixel art, then optionally animate it into a short +MP4 or GIF with era-appropriate effects (rain, fireflies, snow, embers). + +Two scripts ship with this skill: + +- `scripts/pixel_art.py` — photo → pixel-art PNG (Floyd-Steinberg dithering) +- `scripts/pixel_art_video.py` — pixel-art PNG → animated MP4 (+ optional GIF) + +Each is importable or runnable directly. Presets snap to hardware palettes +when you want era-accurate colors (NES, Game Boy, PICO-8, etc.), or use +adaptive N-color quantization for arcade/SNES-style looks. + +## When to Use + +- User wants retro pixel art from a source image +- User asks for NES / Game Boy / PICO-8 / C64 / arcade / SNES styling +- User wants a short looping animation (rain scene, night sky, snow, etc.) +- Posters, album covers, social posts, sprites, characters, avatars + +## Workflow + +Before generating, confirm the style with the user. Different presets produce +very different outputs and regenerating is costly. + +### Step 1 — Offer a style + +Call `clarify` with 4 representative presets. Pick the set based on what the +user asked for — don't just dump all 14. + +Default menu when the user's intent is unclear: + +```python +clarify( + question="Which pixel-art style do you want?", + choices=[ + "arcade — bold, chunky 80s cabinet feel (16 colors, 8px)", + "nes — Nintendo 8-bit hardware palette (54 colors, 8px)", + "gameboy — 4-shade green Game Boy DMG", + "snes — cleaner 16-bit look (32 colors, 4px)", + ], +) +``` + +When the user already named an era (e.g. "80s arcade", "Gameboy"), skip +`clarify` and use the matching preset directly. + +### Step 2 — Offer animation (optional) + +If the user asked for a video/GIF, or the output might benefit from motion, +ask which scene: + +```python +clarify( + question="Want to animate it? Pick a scene or skip.", + choices=[ + "night — stars + fireflies + leaves", + "urban — rain + neon pulse", + "snow — falling snowflakes", + "skip — just the image", + ], +) +``` + +Do NOT call `clarify` more than twice in a row. One for style, one for scene if +animation is on the table. If the user explicitly asked for a specific style +and scene in their message, skip `clarify` entirely. + +### Step 3 — Generate + +Run `pixel_art()` first; if animation was requested, chain into +`pixel_art_video()` on the result. + +## Preset Catalog + +| Preset | Era | Palette | Block | Best for | +|--------|-----|---------|-------|----------| +| `arcade` | 80s arcade | adaptive 16 | 8px | Bold posters, hero art | +| `snes` | 16-bit | adaptive 32 | 4px | Characters, detailed scenes | +| `nes` | 8-bit | NES (54) | 8px | True NES look | +| `gameboy` | DMG handheld | 4 green shades | 8px | Monochrome Game Boy | +| `gameboy_pocket` | Pocket handheld | 4 grey shades | 8px | Mono GB Pocket | +| `pico8` | PICO-8 | 16 fixed | 6px | Fantasy-console look | +| `c64` | Commodore 64 | 16 fixed | 8px | 8-bit home computer | +| `apple2` | Apple II hi-res | 6 fixed | 10px | Extreme retro, 6 colors | +| `teletext` | BBC Teletext | 8 pure | 10px | Chunky primary colors | +| `mspaint` | Windows MS Paint | 24 fixed | 8px | Nostalgic desktop | +| `mono_green` | CRT phosphor | 2 green | 6px | Terminal/CRT aesthetic | +| `mono_amber` | CRT amber | 2 amber | 6px | Amber monitor look | +| `neon` | Cyberpunk | 10 neons | 6px | Vaporwave/cyber | +| `pastel` | Soft pastel | 10 pastels | 6px | Kawaii / gentle | + +Named palettes live in `scripts/palettes.py` (see `references/palettes.md` for +the complete list — 28 named palettes total). Any preset can be overridden: + +```python +pixel_art("in.png", "out.png", preset="snes", palette="PICO_8", block=6) +``` + +## Scene Catalog (for video) + +| Scene | Effects | +|-------|---------| +| `night` | Twinkling stars + fireflies + drifting leaves | +| `dusk` | Fireflies + sparkles | +| `tavern` | Dust motes + warm sparkles | +| `indoor` | Dust motes | +| `urban` | Rain + neon pulse | +| `nature` | Leaves + fireflies | +| `magic` | Sparkles + fireflies | +| `storm` | Rain + lightning | +| `underwater` | Bubbles + light sparkles | +| `fire` | Embers + sparkles | +| `snow` | Snowflakes + sparkles | +| `desert` | Heat shimmer + dust | + +## Invocation Patterns + +### Python (import) + +```python +import sys +sys.path.insert(0, "/home/teknium/.hermes/skills/creative/pixel-art/scripts") +from pixel_art import pixel_art +from pixel_art_video import pixel_art_video + +# 1. Convert to pixel art +pixel_art("/path/to/photo.jpg", "/tmp/pixel.png", preset="nes") + +# 2. Animate (optional) +pixel_art_video( + "/tmp/pixel.png", + "/tmp/pixel.mp4", + scene="night", + duration=6, + fps=15, + seed=42, + export_gif=True, +) +``` + +### CLI + +```bash +cd /home/teknium/.hermes/skills/creative/pixel-art/scripts + +python pixel_art.py in.jpg out.png --preset gameboy +python pixel_art.py in.jpg out.png --preset snes --palette PICO_8 --block 6 + +python pixel_art_video.py out.png out.mp4 --scene night --duration 6 --gif +``` + +## Pipeline Rationale + +**Pixel conversion:** +1. Boost contrast/color/sharpness (stronger for smaller palettes) +2. Posterize to simplify tonal regions before quantization +3. Downscale by `block` with `Image.NEAREST` (hard pixels, no interpolation) +4. Quantize with Floyd-Steinberg dithering — against either an adaptive + N-color palette OR a named hardware palette +5. Upscale back with `Image.NEAREST` + +Quantizing AFTER downscale keeps dithering aligned with the final pixel grid. +Quantizing before would waste error-diffusion on detail that disappears. + +**Video overlay:** +- Copies the base frame each tick (static background) +- Overlays stateless-per-frame particle draws (one function per effect) +- Encodes via ffmpeg `libx264 -pix_fmt yuv420p -crf 18` +- Optional GIF via `palettegen` + `paletteuse` + +## Dependencies + +- Python 3.9+ +- Pillow (`pip install Pillow`) +- ffmpeg on PATH (only needed for video — Hermes installs package this) + +## Pitfalls + +- Pallet keys are case-sensitive (`"NES"`, `"PICO_8"`, `"GAMEBOY_ORIGINAL"`). +- Very small sources (<100px wide) collapse under 8-10px blocks. Upscale the + source first if it's tiny. +- Fractional `block` or `palette` will break quantization — keep them positive ints. +- Animation particle counts are tuned for ~640x480 canvases. On very large + images you may want a second pass with a different seed for density. +- `mono_green` / `mono_amber` force `color=0.0` (desaturate). If you override + and keep chroma, the 2-color palette can produce stripes on smooth regions. +- `clarify` loop: call it at most twice per turn (style, then scene). Don't + pepper the user with more picks. + +## Verification + +- PNG is created at the output path +- Clear square pixel blocks visible at the preset's block size +- Color count matches preset (eyeball the image or run `Image.open(p).getcolors()`) +- Video is a valid MP4 (`ffprobe` can open it) with non-zero size + +## Attribution + +Named hardware palettes and the procedural animation loops in `pixel_art_video.py` +are ported from [pixel-art-studio](https://github.com/Synero/pixel-art-studio) +(MIT). See `ATTRIBUTION.md` in this skill directory for details. diff --git a/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md new file mode 100644 index 0000000000..838a1c1799 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-popular-web-designs.md @@ -0,0 +1,212 @@ +--- +title: "Popular Web Designs — 54 production-quality design systems extracted from real websites" +sidebar_label: "Popular Web Designs" +description: "54 production-quality design systems extracted from real websites" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Popular Web Designs + +54 production-quality design systems extracted from real websites. Load a template to generate HTML/CSS that matches the visual identity of sites like Stripe, Linear, Vercel, Notion, Airbnb, and more. Each template includes colors, typography, components, layout rules, and ready-to-use CSS values. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/popular-web-designs` | +| Version | `1.0.0` | +| Author | Hermes Agent + Teknium (design systems sourced from VoltAgent/awesome-design-md) | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Popular Web Designs + +54 real-world design systems ready for use when generating HTML/CSS. Each template captures a +site's complete visual language: color palette, typography hierarchy, component styles, spacing +system, shadows, responsive behavior, and practical agent prompts with exact CSS values. + +## How to Use + +1. Pick a design from the catalog below +2. Load it: `skill_view(name="popular-web-designs", file_path="templates/.md")` +3. Use the design tokens and component specs when generating HTML +4. Pair with the `generative-widgets` skill to serve the result via cloudflared tunnel + +Each template includes a **Hermes Implementation Notes** block at the top with: +- CDN font substitute and Google Fonts `` tag (ready to paste) +- CSS font-family stacks for primary and monospace +- Reminders to use `write_file` for HTML creation and `browser_vision` for verification + +## HTML Generation Pattern + +```html + + + + + + Page Title + + + + + + + + +``` + +Write the file with `write_file`, serve with the `generative-widgets` workflow (cloudflared tunnel), +and verify the result with `browser_vision` to confirm visual accuracy. + +## Font Substitution Reference + +Most sites use proprietary fonts unavailable via CDN. Each template maps to a Google Fonts +substitute that preserves the design's character. Common mappings: + +| Proprietary Font | CDN Substitute | Character | +|---|---|---| +| Geist / Geist Sans | Geist (on Google Fonts) | Geometric, compressed tracking | +| Geist Mono | Geist Mono (on Google Fonts) | Clean monospace, ligatures | +| sohne-var (Stripe) | Source Sans 3 | Light weight elegance | +| Berkeley Mono | JetBrains Mono | Technical monospace | +| Airbnb Cereal VF | DM Sans | Rounded, friendly geometric | +| Circular (Spotify) | DM Sans | Geometric, warm | +| figmaSans | Inter | Clean humanist | +| Pin Sans (Pinterest) | DM Sans | Friendly, rounded | +| NVIDIA-EMEA | Inter (or Arial system) | Industrial, clean | +| CoinbaseDisplay/Sans | DM Sans | Geometric, trustworthy | +| UberMove | DM Sans | Bold, tight | +| HashiCorp Sans | Inter | Enterprise, neutral | +| waldenburgNormal (Sanity) | Space Grotesk | Geometric, slightly condensed | +| IBM Plex Sans/Mono | IBM Plex Sans/Mono | Available on Google Fonts | +| Rubik (Sentry) | Rubik | Available on Google Fonts | + +When a template's CDN font matches the original (Inter, IBM Plex, Rubik, Geist), no +substitution loss occurs. When a substitute is used (DM Sans for Circular, Source Sans 3 +for sohne-var), follow the template's weight, size, and letter-spacing values closely — +those carry more visual identity than the specific font face. + +## Design Catalog + +### AI & Machine Learning + +| Template | Site | Style | +|---|---|---| +| `claude.md` | Anthropic Claude | Warm terracotta accent, clean editorial layout | +| `cohere.md` | Cohere | Vibrant gradients, data-rich dashboard aesthetic | +| `elevenlabs.md` | ElevenLabs | Dark cinematic UI, audio-waveform aesthetics | +| `minimax.md` | Minimax | Bold dark interface with neon accents | +| `mistral.ai.md` | Mistral AI | French-engineered minimalism, purple-toned | +| `ollama.md` | Ollama | Terminal-first, monochrome simplicity | +| `opencode.ai.md` | OpenCode AI | Developer-centric dark theme, full monospace | +| `replicate.md` | Replicate | Clean white canvas, code-forward | +| `runwayml.md` | RunwayML | Cinematic dark UI, media-rich layout | +| `together.ai.md` | Together AI | Technical, blueprint-style design | +| `voltagent.md` | VoltAgent | Void-black canvas, emerald accent, terminal-native | +| `x.ai.md` | xAI | Stark monochrome, futuristic minimalism, full monospace | + +### Developer Tools & Platforms + +| Template | Site | Style | +|---|---|---| +| `cursor.md` | Cursor | Sleek dark interface, gradient accents | +| `expo.md` | Expo | Dark theme, tight letter-spacing, code-centric | +| `linear.app.md` | Linear | Ultra-minimal dark-mode, precise, purple accent | +| `lovable.md` | Lovable | Playful gradients, friendly dev aesthetic | +| `mintlify.md` | Mintlify | Clean, green-accented, reading-optimized | +| `posthog.md` | PostHog | Playful branding, developer-friendly dark UI | +| `raycast.md` | Raycast | Sleek dark chrome, vibrant gradient accents | +| `resend.md` | Resend | Minimal dark theme, monospace accents | +| `sentry.md` | Sentry | Dark dashboard, data-dense, pink-purple accent | +| `supabase.md` | Supabase | Dark emerald theme, code-first developer tool | +| `superhuman.md` | Superhuman | Premium dark UI, keyboard-first, purple glow | +| `vercel.md` | Vercel | Black and white precision, Geist font system | +| `warp.md` | Warp | Dark IDE-like interface, block-based command UI | +| `zapier.md` | Zapier | Warm orange, friendly illustration-driven | + +### Infrastructure & Cloud + +| Template | Site | Style | +|---|---|---| +| `clickhouse.md` | ClickHouse | Yellow-accented, technical documentation style | +| `composio.md` | Composio | Modern dark with colorful integration icons | +| `hashicorp.md` | HashiCorp | Enterprise-clean, black and white | +| `mongodb.md` | MongoDB | Green leaf branding, developer documentation focus | +| `sanity.md` | Sanity | Red accent, content-first editorial layout | +| `stripe.md` | Stripe | Signature purple gradients, weight-300 elegance | + +### Design & Productivity + +| Template | Site | Style | +|---|---|---| +| `airtable.md` | Airtable | Colorful, friendly, structured data aesthetic | +| `cal.md` | Cal.com | Clean neutral UI, developer-oriented simplicity | +| `clay.md` | Clay | Organic shapes, soft gradients, art-directed layout | +| `figma.md` | Figma | Vibrant multi-color, playful yet professional | +| `framer.md` | Framer | Bold black and blue, motion-first, design-forward | +| `intercom.md` | Intercom | Friendly blue palette, conversational UI patterns | +| `miro.md` | Miro | Bright yellow accent, infinite canvas aesthetic | +| `notion.md` | Notion | Warm minimalism, serif headings, soft surfaces | +| `pinterest.md` | Pinterest | Red accent, masonry grid, image-first layout | +| `webflow.md` | Webflow | Blue-accented, polished marketing site aesthetic | + +### Fintech & Crypto + +| Template | Site | Style | +|---|---|---| +| `coinbase.md` | Coinbase | Clean blue identity, trust-focused, institutional feel | +| `kraken.md` | Kraken | Purple-accented dark UI, data-dense dashboards | +| `revolut.md` | Revolut | Sleek dark interface, gradient cards, fintech precision | +| `wise.md` | Wise | Bright green accent, friendly and clear | + +### Enterprise & Consumer + +| Template | Site | Style | +|---|---|---| +| `airbnb.md` | Airbnb | Warm coral accent, photography-driven, rounded UI | +| `apple.md` | Apple | Premium white space, SF Pro, cinematic imagery | +| `bmw.md` | BMW | Dark premium surfaces, precise engineering aesthetic | +| `ibm.md` | IBM | Carbon design system, structured blue palette | +| `nvidia.md` | NVIDIA | Green-black energy, technical power aesthetic | +| `spacex.md` | SpaceX | Stark black and white, full-bleed imagery, futuristic | +| `spotify.md` | Spotify | Vibrant green on dark, bold type, album-art-driven | +| `uber.md` | Uber | Bold black and white, tight type, urban energy | + +## Choosing a Design + +Match the design to the content: + +- **Developer tools / dashboards:** Linear, Vercel, Supabase, Raycast, Sentry +- **Documentation / content sites:** Mintlify, Notion, Sanity, MongoDB +- **Marketing / landing pages:** Stripe, Framer, Apple, SpaceX +- **Dark mode UIs:** Linear, Cursor, ElevenLabs, Warp, Superhuman +- **Light / clean UIs:** Vercel, Stripe, Notion, Cal.com, Replicate +- **Playful / friendly:** PostHog, Figma, Lovable, Zapier, Miro +- **Premium / luxury:** Apple, BMW, Stripe, Superhuman, Revolut +- **Data-dense / dashboards:** Sentry, Kraken, Cohere, ClickHouse +- **Monospace / terminal aesthetic:** Ollama, OpenCode, x.ai, VoltAgent diff --git a/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md new file mode 100644 index 0000000000..cd0b7fb148 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/creative/creative-songwriting-and-ai-music.md @@ -0,0 +1,297 @@ +--- +title: "Songwriting And Ai Music" +sidebar_label: "Songwriting And Ai Music" +description: "Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songwriting And Ai Music + +Songwriting craft, AI music generation prompts (Suno focus), parody/adaptation techniques, phonetic tricks, and lessons learned. These are tools and ideas, not rules. Break any of them when the art calls for it. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/creative/songwriting-and-ai-music` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Songwriting & AI Music Generation + +Everything here is a GUIDELINE, not a rule. Art breaks rules on purpose. +Use what serves the song. Ignore what doesn't. + +--- + +## 1. Song Structure (Pick One or Invent Your Own) + +Common skeletons — mix, modify, or throw out as needed: + +``` +ABABCB Verse/Chorus/Verse/Chorus/Bridge/Chorus (most pop/rock) +AABA Verse/Verse/Bridge/Verse (refrain-based) (jazz standards, ballads) +ABAB Verse/Chorus alternating (simple, direct) +AAA Verse/Verse/Verse (strophic, no chorus) (folk, storytelling) +``` + +The six building blocks: +- Intro — set the mood, pull the listener in +- Verse — the story, the details, the world-building +- Pre-Chorus — optional tension ramp before the payoff +- Chorus — the emotional core, the part people remember +- Bridge — a detour, a shift in perspective or key +- Outro — the farewell, can echo or subvert the rest + +You don't need all of these. Some great songs are just one section +that evolves. Structure serves the emotion, not the other way around. + +--- + +## 2. Rhyme, Meter, and Sound + +RHYME TYPES (from tight to loose): +- Perfect: lean/mean +- Family: crate/braid +- Assonance: had/glass (same vowels, different endings) +- Consonance: scene/when (different vowels, similar endings) +- Near/slant: enough to suggest connection without locking it down + +Mix them. All perfect rhymes can sound like a nursery rhyme. +All slant rhymes can sound lazy. The blend is where it lives. + +INTERNAL RHYME: Rhyming within a line, not just at the ends. + "We pruned the lies from bleeding trees / Distilled the storm + from entropy" — "lies/flies," "trees/entropy" create internal echoes. + +METER: The rhythm of stressed vs unstressed syllables. +- Matching syllable counts between parallel lines helps singability +- The STRESSED syllables matter more than total count +- Say it out loud. If you stumble, the meter needs work. +- Intentionally breaking meter can create emphasis or surprise + +--- + +## 3. Emotional Arc and Dynamics + +Think of a song as a journey, not a flat road. + +ENERGY MAPPING (rough idea, not prescription): + Intro: 2-3 | Verse: 5-6 | Pre-Chorus: 7 + Chorus: 8-9 | Bridge: varies | Final Chorus: 9-10 + +The most powerful dynamic trick: CONTRAST. +- Whisper before a scream hits harder than just screaming +- Sparse before dense. Slow before fast. Low before high. +- The drop only works because of the buildup +- Silence is an instrument + +"Whisper to roar to whisper" — start intimate, build to full power, +strip back to vulnerability. Works for ballads, epics, anthems. + +--- + +## 4. Writing Lyrics That Work + +SHOW, DON'T TELL (usually): +- "I was sad" = flat +- "Your hoodie's still on the hook by the door" = alive +- But sometimes "I give my life" said plainly IS the power + +THE HOOK: +- The line people remember, hum, repeat +- Usually the title or core phrase +- Works best when melody + lyric + emotion all align +- Place it where it lands hardest (often first/last line of chorus) + +PROSODY — lyrics and music supporting each other: +- Stable feelings (resolution, peace) pair with settled melodies, + perfect rhymes, resolved chords +- Unstable feelings (longing, doubt) pair with wandering melodies, + near-rhymes, unresolved chords +- Verse melody typically sits lower, chorus goes higher +- But flip this if it serves the song + +AVOID (unless you're doing it on purpose): +- Cliches on autopilot ("heart of gold" without earning it) +- Forcing word order to hit a rhyme ("Yoda-speak") +- Same energy in every section (flat dynamics) +- Treating your first draft as sacred — revision is creation + +--- + +## 5. Parody and Adaptation + +When rewriting an existing song with new lyrics: + +THE SKELETON: Map the original's structure first. +- Count syllables per line +- Mark the rhyme scheme (ABAB, AABB, etc.) +- Identify which syllables are STRESSED +- Note where held/sustained notes fall + +FITTING NEW WORDS: +- Match stressed syllables to the same beats as the original +- Total syllable count can flex by 1-2 unstressed syllables +- On long held notes, try to match the VOWEL SOUND of the original + (if original holds "LOOOVE" with an "oo" vowel, "FOOOD" fits + better than "LIFE") +- Monosyllabic swaps in key spots keep rhythm intact + (Crime -> Code, Snake -> Noose) +- Sing your new words over the original — if you stumble, revise + +CONCEPT: +- Pick a concept strong enough to sustain the whole song +- Start from the title/hook and build outward +- Generate lots of raw material (puns, phrases, images) FIRST, + then fit the best ones into the structure +- If you need a specific line somewhere, reverse-engineer the + rhyme scheme backward to set it up + +KEEP SOME ORIGINALS: Leaving a few original lines or structures +intact adds recognizability and lets the audience feel the connection. + +--- + +## 6. Suno AI Prompt Engineering + +### Style/Genre Description Field + +FORMULA (adapt as needed): + Genre + Mood + Era + Instruments + Vocal Style + Production + Dynamics + +``` +BAD: "sad rock song" +GOOD: "Cinematic orchestral spy thriller, 1960s Cold War era, smoky + sultry female vocalist, big band jazz, brass section with + trumpets and french horns, sweeping strings, minor key, + vintage analog warmth" +``` + +DESCRIBE THE JOURNEY, not just the genre: +``` +"Begins as a haunting whisper over sparse piano. Gradually layers + in muted brass. Builds through the chorus with full orchestra. + Second verse erupts with raw belting intensity. Outro strips back + to a lone piano and a fragile whisper fading to silence." +``` + +TIPS: +- V4.5+ supports up to 1,000 chars in Style field — use them +- NO artist names or trademarks. Describe the sound instead. + "1960s Cold War spy thriller brass" not "James Bond style" + "90s grunge" not "Nirvana-style" +- Specify BPM and key when you have a preference +- Use Exclude Styles field for what you DON'T want +- Unexpected genre combos can be gold: "bossa nova trap", + "Appalachian gothic", "chiptune jazz" +- Build a vocal PERSONA, not just a gender: + "A weathered torch singer with a smoky alto, slight rasp, + who starts vulnerable and builds to devastating power" + +### Metatags (place in [brackets] inside lyrics field) + +STRUCTURE: + [Intro] [Verse] [Verse 1] [Pre-Chorus] [Chorus] + [Post-Chorus] [Hook] [Bridge] [Interlude] + [Instrumental] [Instrumental Break] [Guitar Solo] + [Breakdown] [Build-up] [Outro] [Silence] [End] + +VOCAL PERFORMANCE: + [Whispered] [Spoken Word] [Belted] [Falsetto] [Powerful] + [Soulful] [Raspy] [Breathy] [Smooth] [Gritty] + [Staccato] [Legato] [Vibrato] [Melismatic] + [Harmonies] [Choir] [Harmonized Chorus] + +DYNAMICS: + [High Energy] [Low Energy] [Building Energy] [Explosive] + [Emotional Climax] [Gradual swell] [Orchestral swell] + [Quiet arrangement] [Falling tension] [Slow Down] + +GENDER: + [Female Vocals] [Male Vocals] + +ATMOSPHERE: + [Melancholic] [Euphoric] [Nostalgic] [Aggressive] + [Dreamy] [Intimate] [Dark Atmosphere] + +SFX: + [Vinyl Crackle] [Rain] [Applause] [Static] [Thunder] + +Put tags in BOTH style field AND lyrics for reinforcement. +Keep to 5-8 tags per section max — too many confuses the AI. +Don't contradict yourself ([Calm] + [Aggressive] in same section). + +### Custom Mode +- Always use Custom Mode for serious work (separate Style + Lyrics) +- Lyrics field limit: ~3,000 chars (~40-60 lines) +- Always add structural tags — without them Suno defaults to + flat verse/chorus/verse with no emotional arc + +--- + +## 7. Phonetic Tricks for AI Singers + +AI vocalists don't read — they pronounce. Help them: + +PHONETIC RESPELLING: +- Spell words as they SOUND: "through" -> "thru" +- Proper nouns are highest failure rate — test early +- "Nous" -> "Noose" (forces correct pronunciation) +- Hyphenate to guide syllables: "Re-search", "bio-engineering" + +DELIVERY CONTROL: +- ALL CAPS = louder, more intense +- Vowel extension: "lo-o-o-ove" = sustained/melisma +- Ellipses: "I... need... you" = dramatic pauses +- Hyphenated stretch: "ne-e-ed" = emotional stretch + +ALWAYS: +- Spell out numbers: "24/7" -> "twenty four seven" +- Space acronyms: "AI" -> "A I" or "A-I" +- Test proper nouns/unusual words in a short 30-second clip first +- Once generated, pronunciation is baked in — fix in lyrics BEFORE + +--- + +## 8. Workflow + +1. Write the concept/hook first — what's the emotional core? +2. If adapting, map the original structure (syllables, rhyme, stress) +3. Generate raw material — brainstorm freely before structuring +4. Draft lyrics into the structure +5. Read/sing aloud — catch stumbles, fix meter +6. Build the Suno style description — paint the dynamic journey +7. Add metatags to lyrics for performance direction +8. Generate 3-5 variations minimum — treat them like recording takes +9. Pick the best, use Extend/Continue to build on promising sections +10. If something great happens by accident, keep it + +EXPECT: ~3-5 generations per 1 good result. Revision is normal. +Style can drift in extensions — restate genre/mood when extending. + +--- + +## 9. Lessons Learned + +- Describing the dynamic ARC in the style field matters way more + than just listing genres. "Whisper to roar to whisper" gives + Suno a performance map. +- Keeping some original lines intact in a parody adds recognizability + and emotional weight — the audience feels the ghost of the original. +- The bridge slot in a song is where you can transform imagery. + Swap the original's specific references for your theme's metaphors + while keeping the emotional function (reflection, shift, revelation). +- Monosyllabic word swaps in hooks/tags are the cleanest way to + maintain rhythm while changing meaning. +- A strong vocal persona description in the style field makes a + bigger difference than any single metatag. +- Don't be precious about rules. If a line breaks meter but hits + harder, keep it. The feeling is what matters. Craft serves art, + not the other way around. diff --git a/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md new file mode 100644 index 0000000000..027156ccdd --- /dev/null +++ b/website/docs/user-guide/skills/bundled/data-science/data-science-jupyter-live-kernel.md @@ -0,0 +1,183 @@ +--- +title: "Jupyter Live Kernel — Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +sidebar_label: "Jupyter Live Kernel" +description: "Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Jupyter Live Kernel + +Use a live Jupyter kernel for stateful, iterative Python execution via hamelnb. Load this skill when the task involves exploration, iteration, or inspecting intermediate results — data science, ML experimentation, API exploration, or building up complex code step-by-step. Uses terminal to run CLI commands against a live Jupyter kernel. No new tools required. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/data-science/jupyter-live-kernel` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `jupyter`, `notebook`, `repl`, `data-science`, `exploration`, `iterative` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Jupyter Live Kernel (hamelnb) + +Gives you a **stateful Python REPL** via a live Jupyter kernel. Variables persist +across executions. Use this instead of `execute_code` when you need to build up +state incrementally, explore APIs, inspect DataFrames, or iterate on complex code. + +## When to Use This vs Other Tools + +| Tool | Use When | +|------|----------| +| **This skill** | Iterative exploration, state across steps, data science, ML, "let me try this and check" | +| `execute_code` | One-shot scripts needing hermes tool access (web_search, file ops). Stateless. | +| `terminal` | Shell commands, builds, installs, git, process management | + +**Rule of thumb:** If you'd want a Jupyter notebook for the task, use this skill. + +## Prerequisites + +1. **uv** must be installed (check: `which uv`) +2. **JupyterLab** must be installed: `uv tool install jupyterlab` +3. A Jupyter server must be running (see Setup below) + +## Setup + +The hamelnb script location: +``` +SCRIPT="$HOME/.agent-skills/hamelnb/skills/jupyter-live-kernel/scripts/jupyter_live_kernel.py" +``` + +If not cloned yet: +``` +git clone https://github.com/hamelsmu/hamelnb.git ~/.agent-skills/hamelnb +``` + +### Starting JupyterLab + +Check if a server is already running: +``` +uv run "$SCRIPT" servers +``` + +If no servers found, start one: +``` +jupyter-lab --no-browser --port=8888 --notebook-dir=$HOME/notebooks \ + --IdentityProvider.token='' --ServerApp.password='' > /tmp/jupyter.log 2>&1 & +sleep 3 +``` + +Note: Token/password disabled for local agent access. The server runs headless. + +### Creating a Notebook for REPL Use + +If you just need a REPL (no existing notebook), create a minimal notebook file: +``` +mkdir -p ~/notebooks +``` +Write a minimal .ipynb JSON file with one empty code cell, then start a kernel +session via the Jupyter REST API: +``` +curl -s -X POST http://127.0.0.1:8888/api/sessions \ + -H "Content-Type: application/json" \ + -d '{"path":"scratch.ipynb","type":"notebook","name":"scratch.ipynb","kernel":{"name":"python3"}}' +``` + +## Core Workflow + +All commands return structured JSON. Always use `--compact` to save tokens. + +### 1. Discover servers and notebooks + +``` +uv run "$SCRIPT" servers --compact +uv run "$SCRIPT" notebooks --compact +``` + +### 2. Execute code (primary operation) + +``` +uv run "$SCRIPT" execute --path --code '' --compact +``` + +State persists across execute calls. Variables, imports, objects all survive. + +Multi-line code works with $'...' quoting: +``` +uv run "$SCRIPT" execute --path scratch.ipynb --code $'import os\nfiles = os.listdir(".")\nprint(f"Found {len(files)} files")' --compact +``` + +### 3. Inspect live variables + +``` +uv run "$SCRIPT" variables --path list --compact +uv run "$SCRIPT" variables --path preview --name --compact +``` + +### 4. Edit notebook cells + +``` +# View current cells +uv run "$SCRIPT" contents --path --compact + +# Insert a new cell +uv run "$SCRIPT" edit --path insert \ + --at-index --cell-type code --source '' --compact + +# Replace cell source (use cell-id from contents output) +uv run "$SCRIPT" edit --path replace-source \ + --cell-id --source '' --compact + +# Delete a cell +uv run "$SCRIPT" edit --path delete --cell-id --compact +``` + +### 5. Verification (restart + run all) + +Only use when the user asks for a clean verification or you need to confirm +the notebook runs top-to-bottom: + +``` +uv run "$SCRIPT" restart-run-all --path --save-outputs --compact +``` + +## Practical Tips from Experience + +1. **First execution after server start may timeout** — the kernel needs a moment + to initialize. If you get a timeout, just retry. + +2. **The kernel Python is JupyterLab's Python** — packages must be installed in + that environment. If you need additional packages, install them into the + JupyterLab tool environment first. + +3. **--compact flag saves significant tokens** — always use it. JSON output can + be very verbose without it. + +4. **For pure REPL use**, create a scratch.ipynb and don't bother with cell editing. + Just use `execute` repeatedly. + +5. **Argument order matters** — subcommand flags like `--path` go BEFORE the + sub-subcommand. E.g.: `variables --path nb.ipynb list` not `variables list --path nb.ipynb`. + +6. **If a session doesn't exist yet**, you need to start one via the REST API + (see Setup section). The tool can't execute without a live kernel session. + +7. **Errors are returned as JSON** with traceback — read the `ename` and `evalue` + fields to understand what went wrong. + +8. **Occasional websocket timeouts** — some operations may timeout on first try, + especially after a kernel restart. Retry once before escalating. + +## Timeout Defaults + +The script has a 30-second default timeout per execution. For long-running +operations, pass `--timeout 120`. Use generous timeouts (60+) for initial +setup or heavy computation. diff --git a/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md new file mode 100644 index 0000000000..8b5b8ade8f --- /dev/null +++ b/website/docs/user-guide/skills/bundled/devops/devops-webhook-subscriptions.md @@ -0,0 +1,221 @@ +--- +title: "Webhook Subscriptions" +sidebar_label: "Webhook Subscriptions" +description: "Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Webhook Subscriptions + +Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/devops/webhook-subscriptions` | +| Version | `1.1.0` | +| Tags | `webhook`, `events`, `automation`, `integrations`, `notifications`, `push` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Webhook Subscriptions + +Create dynamic webhook subscriptions so external services (GitHub, GitLab, Stripe, CI/CD, IoT sensors, monitoring tools) can trigger Hermes agent runs by POSTing events to a URL. + +## Setup (Required First) + +The webhook platform must be enabled before subscriptions can be created. Check with: +```bash +hermes webhook list +``` + +If it says "Webhook platform is not enabled", set it up: + +### Option 1: Setup wizard +```bash +hermes gateway setup +``` +Follow the prompts to enable webhooks, set the port, and set a global HMAC secret. + +### Option 2: Manual config +Add to `~/.hermes/config.yaml`: +```yaml +platforms: + webhook: + enabled: true + extra: + host: "0.0.0.0" + port: 8644 + secret: "generate-a-strong-secret-here" +``` + +### Option 3: Environment variables +Add to `~/.hermes/.env`: +```bash +WEBHOOK_ENABLED=true +WEBHOOK_PORT=8644 +WEBHOOK_SECRET=generate-a-strong-secret-here +``` + +After configuration, start (or restart) the gateway: +```bash +hermes gateway run +# Or if using systemd: +systemctl --user restart hermes-gateway +``` + +Verify it's running: +```bash +curl http://localhost:8644/health +``` + +## Commands + +All management is via the `hermes webhook` CLI command: + +### Create a subscription +```bash +hermes webhook subscribe \ + --prompt "Prompt template with {payload.fields}" \ + --events "event1,event2" \ + --description "What this does" \ + --skills "skill1,skill2" \ + --deliver telegram \ + --deliver-chat-id "12345" \ + --secret "optional-custom-secret" +``` + +Returns the webhook URL and HMAC secret. The user configures their service to POST to that URL. + +### List subscriptions +```bash +hermes webhook list +``` + +### Remove a subscription +```bash +hermes webhook remove +``` + +### Test a subscription +```bash +hermes webhook test +hermes webhook test --payload '{"key": "value"}' +``` + +## Prompt Templates + +Prompts support `{dot.notation}` for accessing nested payload fields: + +- `{issue.title}` — GitHub issue title +- `{pull_request.user.login}` — PR author +- `{data.object.amount}` — Stripe payment amount +- `{sensor.temperature}` — IoT sensor reading + +If no prompt is specified, the full JSON payload is dumped into the agent prompt. + +## Common Patterns + +### GitHub: new issues +```bash +hermes webhook subscribe github-issues \ + --events "issues" \ + --prompt "New GitHub issue #{issue.number}: {issue.title}\n\nAction: {action}\nAuthor: {issue.user.login}\nBody:\n{issue.body}\n\nPlease triage this issue." \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +Then in GitHub repo Settings → Webhooks → Add webhook: +- Payload URL: the returned webhook_url +- Content type: application/json +- Secret: the returned secret +- Events: "Issues" + +### GitHub: PR reviews +```bash +hermes webhook subscribe github-prs \ + --events "pull_request" \ + --prompt "PR #{pull_request.number} {action}: {pull_request.title}\nBy: {pull_request.user.login}\nBranch: {pull_request.head.ref}\n\n{pull_request.body}" \ + --skills "github-code-review" \ + --deliver github_comment +``` + +### Stripe: payment events +```bash +hermes webhook subscribe stripe-payments \ + --events "payment_intent.succeeded,payment_intent.payment_failed" \ + --prompt "Payment {data.object.status}: {data.object.amount} cents from {data.object.receipt_email}" \ + --deliver telegram \ + --deliver-chat-id "-100123456789" +``` + +### CI/CD: build notifications +```bash +hermes webhook subscribe ci-builds \ + --events "pipeline" \ + --prompt "Build {object_attributes.status} on {project.name} branch {object_attributes.ref}\nCommit: {commit.message}" \ + --deliver discord \ + --deliver-chat-id "1234567890" +``` + +### Generic monitoring alert +```bash +hermes webhook subscribe alerts \ + --prompt "Alert: {alert.name}\nSeverity: {alert.severity}\nMessage: {alert.message}\n\nPlease investigate and suggest remediation." \ + --deliver origin +``` + +### Direct delivery (no agent, zero LLM cost) + +For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter. + +Use this for: +- External service push notifications (Supabase/Firebase webhooks → Telegram) +- Monitoring alerts that should forward verbatim +- Inter-agent pings where one agent is telling another agent's user something +- Any webhook where an LLM round trip would be wasted effort + +```bash +hermes webhook subscribe antenna-matches \ + --deliver telegram \ + --deliver-chat-id "123456789" \ + --deliver-only \ + --prompt "🎉 New match: {match.user_name} matched with you!" \ + --description "Antenna match notifications" +``` + +The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply. + +Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless. + +## Security + +- Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`) +- The webhook adapter validates signatures on every incoming POST +- Static routes from config.yaml cannot be overwritten by dynamic subscriptions +- Subscriptions persist to `~/.hermes/webhook_subscriptions.json` + +## How It Works + +1. `hermes webhook subscribe` writes to `~/.hermes/webhook_subscriptions.json` +2. The webhook adapter hot-reloads this file on each incoming request (mtime-gated, negligible overhead) +3. When a POST arrives matching a route, the adapter formats the prompt and triggers an agent run +4. The agent's response is delivered to the configured target (Telegram, Discord, GitHub comment, etc.) + +## Troubleshooting + +If webhooks aren't working: + +1. **Is the gateway running?** Check with `systemctl --user status hermes-gateway` or `ps aux | grep gateway` +2. **Is the webhook server listening?** `curl http://localhost:8644/health` should return `{"status": "ok"}` +3. **Check gateway logs:** `grep webhook ~/.hermes/logs/gateway.log | tail -20` +4. **Signature mismatch?** Verify the secret in your service matches the one from `hermes webhook list`. GitHub sends `X-Hub-Signature-256`, GitLab sends `X-Gitlab-Token`. +5. **Firewall/NAT?** The webhook URL must be reachable from the service. For local development, use a tunnel (ngrok, cloudflared). +6. **Wrong event type?** Check `--events` filter matches what the service sends. Use `hermes webhook test ` to verify the route works. diff --git a/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md new file mode 100644 index 0000000000..0ff7e72d9d --- /dev/null +++ b/website/docs/user-guide/skills/bundled/dogfood/dogfood-dogfood.md @@ -0,0 +1,178 @@ +--- +title: "Dogfood" +sidebar_label: "Dogfood" +description: "Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Dogfood + +Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/dogfood` | +| Version | `1.0.0` | +| Tags | `qa`, `testing`, `browser`, `web`, `dogfood` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Dogfood: Systematic Web Application QA Testing + +## Overview + +This skill guides you through systematic exploratory QA testing of web applications using the browser toolset. You will navigate the application, interact with elements, capture evidence of issues, and produce a structured bug report. + +## Prerequisites + +- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`) +- A target URL and testing scope from the user + +## Inputs + +The user provides: +1. **Target URL** — the entry point for testing +2. **Scope** — what areas/features to focus on (or "full site" for comprehensive testing) +3. **Output directory** (optional) — where to save screenshots and the report (default: `./dogfood-output`) + +## Workflow + +Follow this 5-phase systematic workflow: + +### Phase 1: Plan + +1. Create the output directory structure: + ``` + {output_dir}/ + ├── screenshots/ # Evidence screenshots + └── report.md # Final report (generated in Phase 5) + ``` +2. Identify the testing scope based on user input. +3. Build a rough sitemap by planning which pages and features to test: + - Landing/home page + - Navigation links (header, footer, sidebar) + - Key user flows (sign up, login, search, checkout, etc.) + - Forms and interactive elements + - Edge cases (empty states, error pages, 404s) + +### Phase 2: Explore + +For each page or feature in your plan: + +1. **Navigate** to the page: + ``` + browser_navigate(url="https://example.com/page") + ``` + +2. **Take a snapshot** to understand the DOM structure: + ``` + browser_snapshot() + ``` + +3. **Check the console** for JavaScript errors: + ``` + browser_console(clear=true) + ``` + Do this after every navigation and after every significant interaction. Silent JS errors are high-value findings. + +4. **Take an annotated screenshot** to visually assess the page and identify interactive elements: + ``` + browser_vision(question="Describe the page layout, identify any visual issues, broken elements, or accessibility concerns", annotate=true) + ``` + The `annotate=true` flag overlays numbered `[N]` labels on interactive elements. Each `[N]` maps to ref `@eN` for subsequent browser commands. + +5. **Test interactive elements** systematically: + - Click buttons and links: `browser_click(ref="@eN")` + - Fill forms: `browser_type(ref="@eN", text="test input")` + - Test keyboard navigation: `browser_press(key="Tab")`, `browser_press(key="Enter")` + - Scroll through content: `browser_scroll(direction="down")` + - Test form validation with invalid inputs + - Test empty submissions + +6. **After each interaction**, check for: + - Console errors: `browser_console()` + - Visual changes: `browser_vision(question="What changed after the interaction?")` + - Expected vs actual behavior + +### Phase 3: Collect Evidence + +For every issue found: + +1. **Take a screenshot** showing the issue: + ``` + browser_vision(question="Capture and describe the issue visible on this page", annotate=false) + ``` + Save the `screenshot_path` from the response — you will reference it in the report. + +2. **Record the details**: + - URL where the issue occurs + - Steps to reproduce + - Expected behavior + - Actual behavior + - Console errors (if any) + - Screenshot path + +3. **Classify the issue** using the issue taxonomy (see `references/issue-taxonomy.md`): + - Severity: Critical / High / Medium / Low + - Category: Functional / Visual / Accessibility / Console / UX / Content + +### Phase 4: Categorize + +1. Review all collected issues. +2. De-duplicate — merge issues that are the same bug manifesting in different places. +3. Assign final severity and category to each issue. +4. Sort by severity (Critical first, then High, Medium, Low). +5. Count issues by severity and category for the executive summary. + +### Phase 5: Report + +Generate the final report using the template at `templates/dogfood-report-template.md`. + +The report must include: +1. **Executive summary** with total issue count, breakdown by severity, and testing scope +2. **Per-issue sections** with: + - Issue number and title + - Severity and category badges + - URL where observed + - Description of the issue + - Steps to reproduce + - Expected vs actual behavior + - Screenshot references (use `MEDIA:` for inline images) + - Console errors if relevant +3. **Summary table** of all issues +4. **Testing notes** — what was tested, what was not, any blockers + +Save the report to `{output_dir}/report.md`. + +## Tools Reference + +| Tool | Purpose | +|------|---------| +| `browser_navigate` | Go to a URL | +| `browser_snapshot` | Get DOM text snapshot (accessibility tree) | +| `browser_click` | Click an element by ref (`@eN`) or text | +| `browser_type` | Type into an input field | +| `browser_scroll` | Scroll up/down on the page | +| `browser_back` | Go back in browser history | +| `browser_press` | Press a keyboard key | +| `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels | +| `browser_console` | Get JS console output and errors | + +## Tips + +- **Always check `browser_console()` after navigating and after significant interactions.** Silent JS errors are among the most valuable findings. +- **Use `annotate=true` with `browser_vision`** when you need to reason about interactive element positions or when the snapshot refs are unclear. +- **Test with both valid and invalid inputs** — form validation bugs are common. +- **Scroll through long pages** — content below the fold may have rendering issues. +- **Test navigation flows** — click through multi-step processes end-to-end. +- **Check responsive behavior** by noting any layout issues visible in screenshots. +- **Don't forget edge cases**: empty states, very long text, special characters, rapid clicking. +- When reporting screenshots to the user, include `MEDIA:` so they can see the evidence inline. diff --git a/website/docs/user-guide/skills/bundled/email/email-himalaya.md b/website/docs/user-guide/skills/bundled/email/email-himalaya.md new file mode 100644 index 0000000000..55178bdc98 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/email/email-himalaya.md @@ -0,0 +1,293 @@ +--- +title: "Himalaya — CLI to manage emails via IMAP/SMTP" +sidebar_label: "Himalaya" +description: "CLI to manage emails via IMAP/SMTP" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Himalaya + +CLI to manage emails via IMAP/SMTP. Use himalaya to list, read, write, reply, forward, search, and organize emails from the terminal. Supports multiple accounts and message composition with MML (MIME Meta Language). + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/email/himalaya` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Email`, `IMAP`, `SMTP`, `CLI`, `Communication` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Himalaya Email CLI + +Himalaya is a CLI email client that lets you manage emails from the terminal using IMAP, SMTP, Notmuch, or Sendmail backends. + +## References + +- `references/configuration.md` (config file setup + IMAP/SMTP authentication) +- `references/message-composition.md` (MML syntax for composing emails) + +## Prerequisites + +1. Himalaya CLI installed (`himalaya --version` to verify) +2. A configuration file at `~/.config/himalaya/config.toml` +3. IMAP/SMTP credentials configured (password stored securely) + +### Installation + +```bash +# Pre-built binary (Linux/macOS — recommended) +curl -sSL https://raw.githubusercontent.com/pimalaya/himalaya/master/install.sh | PREFIX=~/.local sh + +# macOS via Homebrew +brew install himalaya + +# Or via cargo (any platform with Rust) +cargo install himalaya --locked +``` + +## Configuration Setup + +Run the interactive wizard to set up an account: + +```bash +himalaya account configure +``` + +Or create `~/.config/himalaya/config.toml` manually: + +```toml +[accounts.personal] +email = "you@example.com" +display-name = "Your Name" +default = true + +backend.type = "imap" +backend.host = "imap.example.com" +backend.port = 993 +backend.encryption.type = "tls" +backend.login = "you@example.com" +backend.auth.type = "password" +backend.auth.cmd = "pass show email/imap" # or use keyring + +message.send.backend.type = "smtp" +message.send.backend.host = "smtp.example.com" +message.send.backend.port = 587 +message.send.backend.encryption.type = "start-tls" +message.send.backend.login = "you@example.com" +message.send.backend.auth.type = "password" +message.send.backend.auth.cmd = "pass show email/smtp" +``` + +## Hermes Integration Notes + +- **Reading, listing, searching, moving, deleting** all work directly through the terminal tool +- **Composing/replying/forwarding** — piped input (`cat << EOF | himalaya template send`) is recommended for reliability. Interactive `$EDITOR` mode works with `pty=true` + background + process tool, but requires knowing the editor and its commands +- Use `--output json` for structured output that's easier to parse programmatically +- The `himalaya account configure` wizard requires interactive input — use PTY mode: `terminal(command="himalaya account configure", pty=true)` + +## Common Operations + +### List Folders + +```bash +himalaya folder list +``` + +### List Emails + +List emails in INBOX (default): + +```bash +himalaya envelope list +``` + +List emails in a specific folder: + +```bash +himalaya envelope list --folder "Sent" +``` + +List with pagination: + +```bash +himalaya envelope list --page 1 --page-size 20 +``` + +### Search Emails + +```bash +himalaya envelope list from john@example.com subject meeting +``` + +### Read an Email + +Read email by ID (shows plain text): + +```bash +himalaya message read 42 +``` + +Export raw MIME: + +```bash +himalaya message export 42 --full +``` + +### Reply to an Email + +To reply non-interactively from Hermes, read the original message, compose a reply, and pipe it: + +```bash +# Get the reply template, edit it, and send +himalaya template reply 42 | sed 's/^$/\nYour reply text here\n/' | himalaya template send +``` + +Or build the reply manually: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: sender@example.com +Subject: Re: Original Subject +In-Reply-To: + +Your reply here. +EOF +``` + +Reply-all (interactive — needs $EDITOR, use template approach above instead): + +```bash +himalaya message reply 42 --all +``` + +### Forward an Email + +```bash +# Get forward template and pipe with modifications +himalaya template forward 42 | sed 's/^To:.*/To: newrecipient@example.com/' | himalaya template send +``` + +### Write a New Email + +**Non-interactive (use this from Hermes)** — pipe the message via stdin: + +```bash +cat << 'EOF' | himalaya template send +From: you@example.com +To: recipient@example.com +Subject: Test Message + +Hello from Himalaya! +EOF +``` + +Or with headers flag: + +```bash +himalaya message write -H "To:recipient@example.com" -H "Subject:Test" "Message body here" +``` + +Note: `himalaya message write` without piped input opens `$EDITOR`. This works with `pty=true` + background mode, but piping is simpler and more reliable. + +### Move/Copy Emails + +Move to folder: + +```bash +himalaya message move 42 "Archive" +``` + +Copy to folder: + +```bash +himalaya message copy 42 "Important" +``` + +### Delete an Email + +```bash +himalaya message delete 42 +``` + +### Manage Flags + +Add flag: + +```bash +himalaya flag add 42 --flag seen +``` + +Remove flag: + +```bash +himalaya flag remove 42 --flag seen +``` + +## Multiple Accounts + +List accounts: + +```bash +himalaya account list +``` + +Use a specific account: + +```bash +himalaya --account work envelope list +``` + +## Attachments + +Save attachments from a message: + +```bash +himalaya attachment download 42 +``` + +Save to specific directory: + +```bash +himalaya attachment download 42 --dir ~/Downloads +``` + +## Output Formats + +Most commands support `--output` for structured output: + +```bash +himalaya envelope list --output json +himalaya envelope list --output plain +``` + +## Debugging + +Enable debug logging: + +```bash +RUST_LOG=debug himalaya envelope list +``` + +Full trace with backtrace: + +```bash +RUST_LOG=trace RUST_BACKTRACE=1 himalaya envelope list +``` + +## Tips + +- Use `himalaya --help` or `himalaya --help` for detailed usage. +- Message IDs are relative to the current folder; re-list after folder changes. +- For composing rich emails with attachments, use MML syntax (see `references/message-composition.md`). +- Store passwords securely using `pass`, system keyring, or a command that outputs the password. diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md new file mode 100644 index 0000000000..d85495a181 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-minecraft-modpack-server.md @@ -0,0 +1,205 @@ +--- +title: "Minecraft Modpack Server — Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +sidebar_label: "Minecraft Modpack Server" +description: "Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Minecraft Modpack Server + +Set up a modded Minecraft server from a CurseForge/Modrinth server pack zip. Covers NeoForge/Forge install, Java version, JVM tuning, firewall, LAN config, backups, and launch scripts. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/minecraft-modpack-server` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Minecraft Modpack Server Setup + +## When to use +- User wants to set up a modded Minecraft server from a server pack zip +- User needs help with NeoForge/Forge server configuration +- User asks about Minecraft server performance tuning or backups + +## Gather User Preferences First +Before starting setup, ask the user for: +- **Server name / MOTD** — what should it say in the server list? +- **Seed** — specific seed or random? +- **Difficulty** — peaceful / easy / normal / hard? +- **Gamemode** — survival / creative / adventure? +- **Online mode** — true (Mojang auth, legit accounts) or false (LAN/cracked friendly)? +- **Player count** — how many players expected? (affects RAM & view distance tuning) +- **RAM allocation** — or let agent decide based on mod count & available RAM? +- **View distance / simulation distance** — or let agent pick based on player count & hardware? +- **PvP** — on or off? +- **Whitelist** — open server or whitelist only? +- **Backups** — want automated backups? How often? + +Use sensible defaults if the user doesn't care, but always ask before generating the config. + +## Steps + +### 1. Download & Inspect the Pack +```bash +mkdir -p ~/minecraft-server +cd ~/minecraft-server +wget -O serverpack.zip "" +unzip -o serverpack.zip -d server +ls server/ +``` +Look for: `startserver.sh`, installer jar (neoforge/forge), `user_jvm_args.txt`, `mods/` folder. +Check the script to determine: mod loader type, version, and required Java version. + +### 2. Install Java +- Minecraft 1.21+ → Java 21: `sudo apt install openjdk-21-jre-headless` +- Minecraft 1.18-1.20 → Java 17: `sudo apt install openjdk-17-jre-headless` +- Minecraft 1.16 and below → Java 8: `sudo apt install openjdk-8-jre-headless` +- Verify: `java -version` + +### 3. Install the Mod Loader +Most server packs include an install script. Use the INSTALL_ONLY env var to install without launching: +```bash +cd ~/minecraft-server/server +ATM10_INSTALL_ONLY=true bash startserver.sh +# Or for generic Forge packs: +# java -jar forge-*-installer.jar --installServer +``` +This downloads libraries, patches the server jar, etc. + +### 4. Accept EULA +```bash +echo "eula=true" > ~/minecraft-server/server/eula.txt +``` + +### 5. Configure server.properties +Key settings for modded/LAN: +```properties +motd=\u00a7b\u00a7lServer Name \u00a7r\u00a78| \u00a7aModpack Name +server-port=25565 +online-mode=true # false for LAN without Mojang auth +enforce-secure-profile=true # match online-mode +difficulty=hard # most modpacks balance around hard +allow-flight=true # REQUIRED for modded (flying mounts/items) +spawn-protection=0 # let everyone build at spawn +max-tick-time=180000 # modded needs longer tick timeout +enable-command-block=true +``` + +Performance settings (scale to hardware): +```properties +# 2 players, beefy machine: +view-distance=16 +simulation-distance=10 + +# 4-6 players, moderate machine: +view-distance=10 +simulation-distance=6 + +# 8+ players or weaker hardware: +view-distance=8 +simulation-distance=4 +``` + +### 6. Tune JVM Args (user_jvm_args.txt) +Scale RAM to player count and mod count. Rule of thumb for modded: +- 100-200 mods: 6-12GB +- 200-350+ mods: 12-24GB +- Leave at least 8GB free for the OS/other tasks + +``` +-Xms12G +-Xmx24G +-XX:+UseG1GC +-XX:+ParallelRefProcEnabled +-XX:MaxGCPauseMillis=200 +-XX:+UnlockExperimentalVMOptions +-XX:+DisableExplicitGC +-XX:+AlwaysPreTouch +-XX:G1NewSizePercent=30 +-XX:G1MaxNewSizePercent=40 +-XX:G1HeapRegionSize=8M +-XX:G1ReservePercent=20 +-XX:G1HeapWastePercent=5 +-XX:G1MixedGCCountTarget=4 +-XX:InitiatingHeapOccupancyPercent=15 +-XX:G1MixedGCLiveThresholdPercent=90 +-XX:G1RSetUpdatingPauseTimePercent=5 +-XX:SurvivorRatio=32 +-XX:+PerfDisableSharedMem +-XX:MaxTenuringThreshold=1 +``` + +### 7. Open Firewall +```bash +sudo ufw allow 25565/tcp comment "Minecraft Server" +``` +Check with: `sudo ufw status | grep 25565` + +### 8. Create Launch Script +```bash +cat > ~/start-minecraft.sh << 'EOF' +#!/bin/bash +cd ~/minecraft-server/server +java @user_jvm_args.txt @libraries/net/neoforged/neoforge//unix_args.txt nogui +EOF +chmod +x ~/start-minecraft.sh +``` +Note: For Forge (not NeoForge), the args file path differs. Check `startserver.sh` for the exact path. + +### 9. Set Up Automated Backups +Create backup script: +```bash +cat > ~/minecraft-server/backup.sh << 'SCRIPT' +#!/bin/bash +SERVER_DIR="$HOME/minecraft-server/server" +BACKUP_DIR="$HOME/minecraft-server/backups" +WORLD_DIR="$SERVER_DIR/world" +MAX_BACKUPS=24 +mkdir -p "$BACKUP_DIR" +[ ! -d "$WORLD_DIR" ] && echo "[BACKUP] No world folder" && exit 0 +TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) +BACKUP_FILE="$BACKUP_DIR/world_${TIMESTAMP}.tar.gz" +echo "[BACKUP] Starting at $(date)" +tar -czf "$BACKUP_FILE" -C "$SERVER_DIR" world +SIZE=$(du -h "$BACKUP_FILE" | cut -f1) +echo "[BACKUP] Saved: $BACKUP_FILE ($SIZE)" +BACKUP_COUNT=$(ls -1t "$BACKUP_DIR"/world_*.tar.gz 2>/dev/null | wc -l) +if [ "$BACKUP_COUNT" -gt "$MAX_BACKUPS" ]; then + REMOVE=$((BACKUP_COUNT - MAX_BACKUPS)) + ls -1t "$BACKUP_DIR"/world_*.tar.gz | tail -n "$REMOVE" | xargs rm -f + echo "[BACKUP] Pruned $REMOVE old backup(s)" +fi +echo "[BACKUP] Done at $(date)" +SCRIPT +chmod +x ~/minecraft-server/backup.sh +``` + +Add hourly cron: +```bash +(crontab -l 2>/dev/null | grep -v "minecraft/backup.sh"; echo "0 * * * * $HOME/minecraft-server/backup.sh >> $HOME/minecraft-server/backups/backup.log 2>&1") | crontab - +``` + +## Pitfalls +- ALWAYS set `allow-flight=true` for modded — mods with jetpacks/flight will kick players otherwise +- `max-tick-time=180000` or higher — modded servers often have long ticks during worldgen +- First startup is SLOW (several minutes for big packs) — don't panic +- "Can't keep up!" warnings on first launch are normal, settles after initial chunk gen +- If online-mode=false, set enforce-secure-profile=false too or clients get rejected +- The pack's startserver.sh often has an auto-restart loop — make a clean launch script without it +- Delete the world/ folder to regenerate with a new seed +- Some packs have env vars to control behavior (e.g., ATM10 uses ATM10_JAVA, ATM10_RESTART, ATM10_INSTALL_ONLY) + +## Verification +- `pgrep -fa neoforge` or `pgrep -fa minecraft` to check if running +- Check logs: `tail -f ~/minecraft-server/server/logs/latest.log` +- Look for "Done (Xs)!" in the log = server is ready +- Test connection: player adds server IP in Multiplayer diff --git a/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md new file mode 100644 index 0000000000..ab070f8671 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/gaming/gaming-pokemon-player.md @@ -0,0 +1,235 @@ +--- +title: "Pokemon Player — Play Pokemon games autonomously via headless emulation" +sidebar_label: "Pokemon Player" +description: "Play Pokemon games autonomously via headless emulation" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Pokemon Player + +Play Pokemon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/gaming/pokemon-player` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Pokemon Player + +Play Pokemon games via headless emulation using the `pokemon-agent` package. + +## When to Use +- User says "play pokemon", "start pokemon", "pokemon game" +- User asks about Pokemon Red, Blue, Yellow, FireRed, etc. +- User wants to watch an AI play Pokemon +- User references a ROM file (.gb, .gbc, .gba) + +## Startup Procedure + +### 1. First-time setup (clone, venv, install) +The repo is NousResearch/pokemon-agent on GitHub. Clone it, then +set up a Python 3.10+ virtual environment. Use uv (preferred for speed) +to create the venv and install the package in editable mode with the +pyboy extra. If uv is not available, fall back to python3 -m venv + pip. + +On this machine it is already set up at /home/teknium/pokemon-agent +with a venv ready — just cd there and source .venv/bin/activate. + +You also need a ROM file. Ask the user for theirs. On this machine +one exists at roms/pokemon_red.gb inside that directory. +NEVER download or provide ROM files — always ask the user. + +### 2. Start the game server +From inside the pokemon-agent directory with the venv activated, run +pokemon-agent serve with --rom pointing to the ROM and --port 9876. +Run it in the background with &. +To resume from a saved game, add --load-state with the save name. +Wait 4 seconds for startup, then verify with GET /health. + +### 3. Set up live dashboard for user to watch +Use an SSH reverse tunnel via localhost.run so the user can view +the dashboard in their browser. Connect with ssh, forwarding local +port 9876 to remote port 80 on nokey@localhost.run. Redirect output +to a log file, wait 10 seconds, then grep the log for the .lhr.life +URL. Give the user the URL with /dashboard/ appended. +The tunnel URL changes each time — give the user the new one if restarted. + +## Save and Load + +### When to save +- Every 15-20 turns of gameplay +- ALWAYS before gym battles, rival encounters, or risky fights +- Before entering a new town or dungeon +- Before any action you are unsure about + +### How to save +POST /save with a descriptive name. Good examples: +before_brock, route1_start, mt_moon_entrance, got_cut + +### How to load +POST /load with the save name. + +### List available saves +GET /saves returns all saved states. + +### Loading on server startup +Use --load-state flag when starting the server to auto-load a save. +This is faster than loading via the API after startup. + +## The Gameplay Loop + +### Step 1: OBSERVE — check state AND take a screenshot +GET /state for position, HP, battle, dialog. +GET /screenshot and save to /tmp/pokemon.png, then use vision_analyze. +Always do BOTH — RAM state gives numbers, vision gives spatial awareness. + +### Step 2: ORIENT +- Dialog/text on screen → advance it +- In battle → fight or run +- Party hurt → head to Pokemon Center +- Near objective → navigate carefully + +### Step 3: DECIDE +Priority: dialog > battle > heal > story objective > training > explore + +### Step 4: ACT — move 2-4 steps max, then re-check +POST /action with a SHORT action list (2-4 actions, not 10-15). + +### Step 5: VERIFY — screenshot after every move sequence +Take a screenshot and use vision_analyze to confirm you moved where +intended. This is the MOST IMPORTANT step. Without vision you WILL get lost. + +### Step 6: RECORD progress to memory with PKM: prefix + +### Step 7: SAVE periodically + +## Action Reference +- press_a — confirm, talk, select +- press_b — cancel, close menu +- press_start — open game menu +- walk_up/down/left/right — move one tile +- hold_b_N — hold B for N frames (use for speeding through text) +- wait_60 — wait about 1 second (60 frames) +- a_until_dialog_end — press A repeatedly until dialog clears + +## Critical Tips from Experience + +### USE VISION CONSTANTLY +- Take a screenshot every 2-4 movement steps +- The RAM state tells you position and HP but NOT what is around you +- Ledges, fences, signs, building doors, NPCs — only visible via screenshot +- Ask the vision model specific questions: "what is one tile north of me?" +- When stuck, always screenshot before trying random directions + +### Warp Transitions Need Extra Wait Time +When walking through a door or stairs, the screen fades to black during +the map transition. You MUST wait for it to complete. Add 2-3 wait_60 +actions after any door/stair warp. Without waiting, the position reads +as stale and you will think you are still in the old map. + +### Building Exit Trap +When you exit a building, you appear directly IN FRONT of the door. +If you walk north, you go right back inside. ALWAYS sidestep first +by walking left or right 2 tiles, then proceed in your intended direction. + +### Dialog Handling +Gen 1 text scrolls slowly letter-by-letter. To speed through dialog, +hold B for 120 frames then press A. Repeat as needed. Holding B makes +text display at max speed. Then press A to advance to the next line. +The a_until_dialog_end action checks the RAM dialog flag, but this flag +does not catch ALL text states. If dialog seems stuck, use the manual +hold_b + press_a pattern instead and verify via screenshot. + +### Ledges Are One-Way +Ledges (small cliff edges) can only be jumped DOWN (south), never climbed +UP (north). If blocked by a ledge going north, you must go left or right +to find the gap around it. Use vision to identify which direction the +gap is. Ask the vision model explicitly. + +### Navigation Strategy +- Move 2-4 steps at a time, then screenshot to check position +- When entering a new area, screenshot immediately to orient +- Ask the vision model "which direction to [destination]?" +- If stuck for 3+ attempts, screenshot and re-evaluate completely +- Do not spam 10-15 movements — you will overshoot or get stuck + +### Running from Wild Battles +On the battle menu, RUN is bottom-right. To reach it from the default +cursor position (FIGHT, top-left): press down then right to move cursor +to RUN, then press A. Wrap with hold_b to speed through text/animations. + +### Battling (FIGHT) +On the battle menu FIGHT is top-left (default cursor position). +Press A to enter move selection, A again to use the first move. +Then hold B to speed through attack animations and text. + +## Battle Strategy + +### Decision Tree +1. Want to catch? → Weaken then throw Poke Ball +2. Wild you don't need? → RUN +3. Type advantage? → Use super-effective move +4. No advantage? → Use strongest STAB move +5. Low HP? → Switch or use Potion + +### Gen 1 Type Chart (key matchups) +- Water beats Fire, Ground, Rock +- Fire beats Grass, Bug, Ice +- Grass beats Water, Ground, Rock +- Electric beats Water, Flying +- Ground beats Fire, Electric, Rock, Poison +- Psychic beats Fighting, Poison (dominant in Gen 1!) + +### Gen 1 Quirks +- Special stat = both offense AND defense for special moves +- Psychic type is overpowered (Ghost moves bugged) +- Critical hits based on Speed stat +- Wrap/Bind prevent opponent from acting +- Focus Energy bug: REDUCES crit rate instead of raising it + +## Memory Conventions +| Prefix | Purpose | Example | +|--------|---------|---------| +| PKM:OBJECTIVE | Current goal | Get Parcel from Viridian Mart | +| PKM:MAP | Navigation knowledge | Viridian: mart is northeast | +| PKM:STRATEGY | Battle/team plans | Need Grass type before Misty | +| PKM:PROGRESS | Milestone tracker | Beat rival, heading to Viridian | +| PKM:STUCK | Stuck situations | Ledge at y=28 go right to bypass | +| PKM:TEAM | Team notes | Squirtle Lv6, Tackle + Tail Whip | + +## Progression Milestones +- Choose starter +- Deliver Parcel from Viridian Mart, receive Pokedex +- Boulder Badge — Brock (Rock) → use Water/Grass +- Cascade Badge — Misty (Water) → use Grass/Electric +- Thunder Badge — Lt. Surge (Electric) → use Ground +- Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying +- Soul Badge — Koga (Poison) → use Ground/Psychic +- Marsh Badge — Sabrina (Psychic) → hardest gym +- Volcano Badge — Blaine (Fire) → use Water/Ground +- Earth Badge — Giovanni (Ground) → use Water/Grass/Ice +- Elite Four → Champion! + +## Stopping Play +1. Save the game with a descriptive name via POST /save +2. Update memory with PKM:PROGRESS +3. Tell user: "Game saved as [name]! Say 'play pokemon' to resume." +4. Kill the server and tunnel background processes + +## Pitfalls +- NEVER download or provide ROM files +- Do NOT send more than 4-5 actions without checking vision +- Always sidestep after exiting buildings before going north +- Always add wait_60 x2-3 after door/stair warps +- Dialog detection via RAM is unreliable — verify with screenshots +- Save BEFORE risky encounters +- The tunnel URL changes each time you restart it diff --git a/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md new file mode 100644 index 0000000000..13c3fe4425 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-codebase-inspection.md @@ -0,0 +1,131 @@ +--- +title: "Codebase Inspection" +sidebar_label: "Codebase Inspection" +description: "Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Codebase Inspection + +Inspect and analyze codebases using pygount for LOC counting, language breakdown, and code-vs-comment ratios. Use when asked to check lines of code, repo size, language composition, or codebase stats. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/codebase-inspection` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `LOC`, `Code Analysis`, `pygount`, `Codebase`, `Metrics`, `Repository` | +| Related skills | [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Codebase Inspection with pygount + +Analyze repositories for lines of code, language breakdown, file counts, and code-vs-comment ratios using `pygount`. + +## When to Use + +- User asks for LOC (lines of code) count +- User wants a language breakdown of a repo +- User asks about codebase size or composition +- User wants code-vs-comment ratios +- General "how big is this repo" questions + +## Prerequisites + +```bash +pip install --break-system-packages pygount 2>/dev/null || pip install pygount +``` + +## 1. Basic Summary (Most Common) + +Get a full language breakdown with file counts, code lines, and comment lines: + +```bash +cd /path/to/repo +pygount --format=summary \ + --folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,.eggs,*.egg-info" \ + . +``` + +**IMPORTANT:** Always use `--folders-to-skip` to exclude dependency/build directories, otherwise pygount will crawl them and take a very long time or hang. + +## 2. Common Folder Exclusions + +Adjust based on the project type: + +```bash +# Python projects +--folders-to-skip=".git,venv,.venv,__pycache__,.cache,dist,build,.tox,.eggs,.mypy_cache" + +# JavaScript/TypeScript projects +--folders-to-skip=".git,node_modules,dist,build,.next,.cache,.turbo,coverage" + +# General catch-all +--folders-to-skip=".git,node_modules,venv,.venv,__pycache__,.cache,dist,build,.next,.tox,vendor,third_party" +``` + +## 3. Filter by Specific Language + +```bash +# Only count Python files +pygount --suffix=py --format=summary . + +# Only count Python and YAML +pygount --suffix=py,yaml,yml --format=summary . +``` + +## 4. Detailed File-by-File Output + +```bash +# Default format shows per-file breakdown +pygount --folders-to-skip=".git,node_modules,venv" . + +# Sort by code lines (pipe through sort) +pygount --folders-to-skip=".git,node_modules,venv" . | sort -t$'\t' -k1 -nr | head -20 +``` + +## 5. Output Formats + +```bash +# Summary table (default recommendation) +pygount --format=summary . + +# JSON output for programmatic use +pygount --format=json . + +# Pipe-friendly: Language, file count, code, docs, empty, string +pygount --format=summary . 2>/dev/null +``` + +## 6. Interpreting Results + +The summary table columns: +- **Language** — detected programming language +- **Files** — number of files of that language +- **Code** — lines of actual code (executable/declarative) +- **Comment** — lines that are comments or documentation +- **%** — percentage of total + +Special pseudo-languages: +- `__empty__` — empty files +- `__binary__` — binary files (images, compiled, etc.) +- `__generated__` — auto-generated files (detected heuristically) +- `__duplicate__` — files with identical content +- `__unknown__` — unrecognized file types + +## Pitfalls + +1. **Always exclude .git, node_modules, venv** — without `--folders-to-skip`, pygount will crawl everything and may take minutes or hang on large dependency trees. +2. **Markdown shows 0 code lines** — pygount classifies all Markdown content as comments, not code. This is expected behavior. +3. **JSON files show low code counts** — pygount may count JSON lines conservatively. For accurate JSON line counts, use `wc -l` directly. +4. **Large monorepos** — for very large repos, consider using `--suffix` to target specific languages rather than scanning everything. diff --git a/website/docs/user-guide/skills/bundled/github/github-github-auth.md b/website/docs/user-guide/skills/bundled/github/github-github-auth.md new file mode 100644 index 0000000000..4f7360c43e --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-auth.md @@ -0,0 +1,264 @@ +--- +title: "Github Auth — Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +sidebar_label: "Github Auth" +description: "Set up GitHub authentication for the agent using git (universally available) or the gh CLI" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Auth + +Set up GitHub authentication for the agent using git (universally available) or the gh CLI. Covers HTTPS tokens, SSH keys, credential helpers, and gh auth — with a detection flow to pick the right method automatically. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-auth` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Authentication`, `Git`, `gh-cli`, `SSH`, `Setup` | +| Related skills | [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues), [`github-repo-management`](/docs/user-guide/skills/bundled/github/github-github-repo-management) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Authentication Setup + +This skill sets up authentication so the agent can work with GitHub repositories, PRs, issues, and CI. It covers two paths: + +- **`git` (always available)** — uses HTTPS personal access tokens or SSH keys +- **`gh` CLI (if installed)** — richer GitHub API access with a simpler auth flow + +## Detection Flow + +When a user asks you to work with GitHub, run this check first: + +```bash +# Check what's available +git --version +gh --version 2>/dev/null || echo "gh not installed" + +# Check if already authenticated +gh auth status 2>/dev/null || echo "gh not authenticated" +git config --global credential.helper 2>/dev/null || echo "no git credential helper" +``` + +**Decision tree:** +1. If `gh auth status` shows authenticated → you're good, use `gh` for everything +2. If `gh` is installed but not authenticated → use "gh auth" method below +3. If `gh` is not installed → use "git-only" method below (no sudo needed) + +--- + +## Method 1: Git-Only Authentication (No gh, No sudo) + +This works on any machine with `git` installed. No root access needed. + +### Option A: HTTPS with Personal Access Token (Recommended) + +This is the most portable method — works everywhere, no SSH config needed. + +**Step 1: Create a personal access token** + +Tell the user to go to: **https://github.com/settings/tokens** + +- Click "Generate new token (classic)" +- Give it a name like "hermes-agent" +- Select scopes: + - `repo` (full repository access — read, write, push, PRs) + - `workflow` (trigger and manage GitHub Actions) + - `read:org` (if working with organization repos) +- Set expiration (90 days is a good default) +- Copy the token — it won't be shown again + +**Step 2: Configure git to store the token** + +```bash +# Set up the credential helper to cache credentials +# "store" saves to ~/.git-credentials in plaintext (simple, persistent) +git config --global credential.helper store + +# Now do a test operation that triggers auth — git will prompt for credentials +# Username: +# Password: +git ls-remote https://github.com//.git +``` + +After entering credentials once, they're saved and reused for all future operations. + +**Alternative: cache helper (credentials expire from memory)** + +```bash +# Cache in memory for 8 hours (28800 seconds) instead of saving to disk +git config --global credential.helper 'cache --timeout=28800' +``` + +**Alternative: set the token directly in the remote URL (per-repo)** + +```bash +# Embed token in the remote URL (avoids credential prompts entirely) +git remote set-url origin https://:@github.com//.git +``` + +**Step 3: Configure git identity** + +```bash +# Required for commits — set name and email +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +**Step 4: Verify** + +```bash +# Test push access (this should work without any prompts now) +git ls-remote https://github.com//.git + +# Verify identity +git config --global user.name +git config --global user.email +``` + +### Option B: SSH Key Authentication + +Good for users who prefer SSH or already have keys set up. + +**Step 1: Check for existing SSH keys** + +```bash +ls -la ~/.ssh/id_*.pub 2>/dev/null || echo "No SSH keys found" +``` + +**Step 2: Generate a key if needed** + +```bash +# Generate an ed25519 key (modern, secure, fast) +ssh-keygen -t ed25519 -C "their-email@example.com" -f ~/.ssh/id_ed25519 -N "" + +# Display the public key for them to add to GitHub +cat ~/.ssh/id_ed25519.pub +``` + +Tell the user to add the public key at: **https://github.com/settings/keys** +- Click "New SSH key" +- Paste the public key content +- Give it a title like "hermes-agent-<machine-name>" + +**Step 3: Test the connection** + +```bash +ssh -T git@github.com +# Expected: "Hi ! You've successfully authenticated..." +``` + +**Step 4: Configure git to use SSH for GitHub** + +```bash +# Rewrite HTTPS GitHub URLs to SSH automatically +git config --global url."git@github.com:".insteadOf "https://github.com/" +``` + +**Step 5: Configure git identity** + +```bash +git config --global user.name "Their Name" +git config --global user.email "their-email@example.com" +``` + +--- + +## Method 2: gh CLI Authentication + +If `gh` is installed, it handles both API access and git credentials in one step. + +### Interactive Browser Login (Desktop) + +```bash +gh auth login +# Select: GitHub.com +# Select: HTTPS +# Authenticate via browser +``` + +### Token-Based Login (Headless / SSH Servers) + +```bash +echo "" | gh auth login --with-token + +# Set up git credentials through gh +gh auth setup-git +``` + +### Verify + +```bash +gh auth status +``` + +--- + +## Using the GitHub API Without gh + +When `gh` is not available, you can still access the full GitHub API using `curl` with a personal access token. This is how the other GitHub skills implement their fallbacks. + +### Setting the Token for API Calls + +```bash +# Option 1: Export as env var (preferred — keeps it out of commands) +export GITHUB_TOKEN="" + +# Then use in curl calls: +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user +``` + +### Extracting the Token from Git Credentials + +If git credentials are already configured (via credential.helper store), the token can be extracted: + +```bash +# Read from git credential store +grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|' +``` + +### Helper: Detect Auth Method + +Use this pattern at the start of any GitHub workflow: + +```bash +# Try gh first, fall back to git + curl +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + echo "AUTH_METHOD=gh" +elif [ -n "$GITHUB_TOKEN" ]; then + echo "AUTH_METHOD=curl" +elif [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + export GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + echo "AUTH_METHOD=curl" +elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + export GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + echo "AUTH_METHOD=curl" +else + echo "AUTH_METHOD=none" + echo "Need to set up authentication first" +fi +``` + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| `git push` asks for password | GitHub disabled password auth. Use a personal access token as the password, or switch to SSH | +| `remote: Permission to X denied` | Token may lack `repo` scope — regenerate with correct scopes | +| `fatal: Authentication failed` | Cached credentials may be stale — run `git credential reject` then re-authenticate | +| `ssh: connect to host github.com port 22: Connection refused` | Try SSH over HTTPS port: add `Host github.com` with `Port 443` and `Hostname ssh.github.com` to `~/.ssh/config` | +| Credentials not persisting | Check `git config --global credential.helper` — must be `store` or `cache` | +| Multiple GitHub accounts | Use SSH with different keys per host alias in `~/.ssh/config`, or per-repo credential URLs | +| `gh: command not found` + no sudo | Use git-only Method 1 above — no installation needed | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-code-review.md b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md new file mode 100644 index 0000000000..9a18c45e16 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-code-review.md @@ -0,0 +1,498 @@ +--- +title: "Github Code Review" +sidebar_label: "Github Code Review" +description: "Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Code Review + +Review code changes by analyzing git diffs, leaving inline comments on PRs, and performing thorough pre-push review. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-code-review` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Code-Review`, `Pull-Requests`, `Git`, `Quality` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Code Review + +Perform code reviews on local changes before pushing, or review open PRs on GitHub. Most of this skill uses plain `git` — the `gh`/`curl` split only matters for PR-level interactions. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository + +### Setup (for PR interactions) + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Reviewing Local Changes (Pre-Push) + +This is pure `git` — works everywhere, no API needed. + +### Get the Diff + +```bash +# Staged changes (what would be committed) +git diff --staged + +# All changes vs main (what a PR would contain) +git diff main...HEAD + +# File names only +git diff main...HEAD --name-only + +# Stat summary (insertions/deletions per file) +git diff main...HEAD --stat +``` + +### Review Strategy + +1. **Get the big picture first:** + +```bash +git diff main...HEAD --stat +git log main..HEAD --oneline +``` + +2. **Review file by file** — use `read_file` on changed files for full context, and the diff to see what changed: + +```bash +git diff main...HEAD -- src/auth/login.py +``` + +3. **Check for common issues:** + +```bash +# Debug statements, TODOs, console.logs left behind +git diff main...HEAD | grep -n "print(\|console\.log\|TODO\|FIXME\|HACK\|XXX\|debugger" + +# Large files accidentally staged +git diff main...HEAD --stat | sort -t'|' -k2 -rn | head -10 + +# Secrets or credential patterns +git diff main...HEAD | grep -in "password\|secret\|api_key\|token.*=\|private_key" + +# Merge conflict markers +git diff main...HEAD | grep -n "<<<<<<\|>>>>>>\|=======" +``` + +4. **Present structured feedback** to the user. + +### Review Output Format + +When reviewing local changes, present findings in this structure: + +``` +## Code Review Summary + +### Critical +- **src/auth.py:45** — SQL injection: user input passed directly to query. + Suggestion: Use parameterized queries. + +### Warnings +- **src/models/user.py:23** — Password stored in plaintext. Use bcrypt or argon2. +- **src/api/routes.py:112** — No rate limiting on login endpoint. + +### Suggestions +- **src/utils/helpers.py:8** — Duplicates logic in `src/core/utils.py:34`. Consolidate. +- **tests/test_auth.py** — Missing edge case: expired token test. + +### Looks Good +- Clean separation of concerns in the middleware layer +- Good test coverage for the happy path +``` + +--- + +## 2. Reviewing a Pull Request on GitHub + +### View PR Details + +**With gh:** + +```bash +gh pr view 123 +gh pr diff 123 +gh pr diff 123 --name-only +``` + +**With git + curl:** + +```bash +PR_NUMBER=123 + +# Get PR details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c " +import sys, json +pr = json.load(sys.stdin) +print(f\"Title: {pr['title']}\") +print(f\"Author: {pr['user']['login']}\") +print(f\"Branch: {pr['head']['ref']} -> {pr['base']['ref']}\") +print(f\"State: {pr['state']}\") +print(f\"Body:\n{pr['body']}\")" + +# List changed files +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/files \ + | python3 -c " +import sys, json +for f in json.load(sys.stdin): + print(f\"{f['status']:10} +{f['additions']:-4} -{f['deletions']:-4} {f['filename']}\")" +``` + +### Check Out PR Locally for Full Review + +This works with plain `git` — no `gh` needed: + +```bash +# Fetch the PR branch and check it out +git fetch origin pull/123/head:pr-123 +git checkout pr-123 + +# Now you can use read_file, search_files, run tests, etc. + +# View diff against the base branch +git diff main...pr-123 +``` + +**With gh (shortcut):** + +```bash +gh pr checkout 123 +``` + +### Leave Comments on a PR + +**General PR comment — with gh:** + +```bash +gh pr comment 123 --body "Overall looks good, a few suggestions below." +``` + +**General PR comment — with curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$PR_NUMBER/comments \ + -d '{"body": "Overall looks good, a few suggestions below."}' +``` + +### Leave Inline Review Comments + +**Single inline comment — with gh (via API):** + +```bash +HEAD_SHA=$(gh pr view 123 --json headRefOid --jq '.headRefOid') + +gh api repos/$OWNER/$REPO/pulls/123/comments \ + --method POST \ + -f body="This could be simplified with a list comprehension." \ + -f path="src/auth/login.py" \ + -f commit_id="$HEAD_SHA" \ + -f line=45 \ + -f side="RIGHT" +``` + +**Single inline comment — with curl:** + +```bash +# Get the head commit SHA +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/comments \ + -d "{ + \"body\": \"This could be simplified with a list comprehension.\", + \"path\": \"src/auth/login.py\", + \"commit_id\": \"$HEAD_SHA\", + \"line\": 45, + \"side\": \"RIGHT\" + }" +``` + +### Submit a Formal Review (Approve / Request Changes) + +**With gh:** + +```bash +gh pr review 123 --approve --body "LGTM!" +gh pr review 123 --request-changes --body "See inline comments." +gh pr review 123 --comment --body "Some suggestions, nothing blocking." +``` + +**With curl — multi-comment review submitted atomically:** + +```bash +HEAD_SHA=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"COMMENT\", + \"body\": \"Code review from Hermes Agent\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"Use parameterized queries to prevent SQL injection.\"}, + {\"path\": \"src/models/user.py\", \"line\": 23, \"body\": \"Hash passwords with bcrypt before storing.\"}, + {\"path\": \"tests/test_auth.py\", \"line\": 1, \"body\": \"Add test for expired token edge case.\"} + ] + }" +``` + +Event values: `"APPROVE"`, `"REQUEST_CHANGES"`, `"COMMENT"` + +The `line` field refers to the line number in the *new* version of the file. For deleted lines, use `"side": "LEFT"`. + +--- + +## 3. Review Checklist + +When performing a code review (local or PR), systematically check: + +### Correctness +- Does the code do what it claims? +- Edge cases handled (empty inputs, nulls, large data, concurrent access)? +- Error paths handled gracefully? + +### Security +- No hardcoded secrets, credentials, or API keys +- Input validation on user-facing inputs +- No SQL injection, XSS, or path traversal +- Auth/authz checks where needed + +### Code Quality +- Clear naming (variables, functions, classes) +- No unnecessary complexity or premature abstraction +- DRY — no duplicated logic that should be extracted +- Functions are focused (single responsibility) + +### Testing +- New code paths tested? +- Happy path and error cases covered? +- Tests readable and maintainable? + +### Performance +- No N+1 queries or unnecessary loops +- Appropriate caching where beneficial +- No blocking operations in async code paths + +### Documentation +- Public APIs documented +- Non-obvious logic has comments explaining "why" +- README updated if behavior changed + +--- + +## 4. Pre-Push Review Workflow + +When the user asks you to "review the code" or "check before pushing": + +1. `git diff main...HEAD --stat` — see scope of changes +2. `git diff main...HEAD` — read the full diff +3. For each changed file, use `read_file` if you need more context +4. Apply the checklist above +5. Present findings in the structured format (Critical / Warnings / Suggestions / Looks Good) +6. If critical issues found, offer to fix them before the user pushes + +--- + +## 5. PR Review Workflow (End-to-End) + +When the user asks you to "review PR #N", "look at this PR", or gives you a PR URL, follow this recipe: + +### Step 1: Set up environment + +```bash +source "${HERMES_HOME:-$HOME/.hermes}/skills/github/github-auth/scripts/gh-env.sh" +# Or run the inline setup block from the top of this skill +``` + +### Step 2: Gather PR context + +Get the PR metadata, description, and list of changed files to understand scope before diving into code. + +**With gh:** +```bash +gh pr view 123 +gh pr diff 123 --name-only +gh pr checks 123 +``` + +**With curl:** +```bash +PR_NUMBER=123 + +# PR details (title, author, description, branch) +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER + +# Changed files with line counts +curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/files +``` + +### Step 3: Check out the PR locally + +This gives you full access to `read_file`, `search_files`, and the ability to run tests. + +```bash +git fetch origin pull/$PR_NUMBER/head:pr-$PR_NUMBER +git checkout pr-$PR_NUMBER +``` + +### Step 4: Read the diff and understand changes + +```bash +# Full diff against the base branch +git diff main...HEAD + +# Or file-by-file for large PRs +git diff main...HEAD --name-only +# Then for each file: +git diff main...HEAD -- path/to/file.py +``` + +For each changed file, use `read_file` to see full context around the changes — diffs alone can miss issues visible only with surrounding code. + +### Step 5: Run automated checks locally (if applicable) + +```bash +# Run tests if there's a test suite +python -m pytest 2>&1 | tail -20 +# or: npm test, cargo test, go test ./..., etc. + +# Run linter if configured +ruff check . 2>&1 | head -30 +# or: eslint, clippy, etc. +``` + +### Step 6: Apply the review checklist (Section 3) + +Go through each category: Correctness, Security, Code Quality, Testing, Performance, Documentation. + +### Step 7: Post the review to GitHub + +Collect your findings and submit them as a formal review with inline comments. + +**With gh:** +```bash +# If no issues — approve +gh pr review $PR_NUMBER --approve --body "Reviewed by Hermes Agent. Code looks clean — good test coverage, no security concerns." + +# If issues found — request changes with inline comments +gh pr review $PR_NUMBER --request-changes --body "Found a few issues — see inline comments." +``` + +**With curl — atomic review with multiple inline comments:** +```bash +HEAD_SHA=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['head']['sha'])") + +# Build the review JSON — event is APPROVE, REQUEST_CHANGES, or COMMENT +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$GH_OWNER/$GH_REPO/pulls/$PR_NUMBER/reviews \ + -d "{ + \"commit_id\": \"$HEAD_SHA\", + \"event\": \"REQUEST_CHANGES\", + \"body\": \"## Hermes Agent Review\n\nFound 2 issues, 1 suggestion. See inline comments.\", + \"comments\": [ + {\"path\": \"src/auth.py\", \"line\": 45, \"body\": \"🔴 **Critical:** User input passed directly to SQL query — use parameterized queries.\"}, + {\"path\": \"src/models.py\", \"line\": 23, \"body\": \"⚠️ **Warning:** Password stored without hashing.\"}, + {\"path\": \"src/utils.py\", \"line\": 8, \"body\": \"💡 **Suggestion:** This duplicates logic in core/utils.py:34.\"} + ] + }" +``` + +### Step 8: Also post a summary comment + +In addition to inline comments, leave a top-level summary so the PR author gets the full picture at a glance. Use the review output format from `references/review-output-template.md`. + +**With gh:** +```bash +gh pr comment $PR_NUMBER --body "$(cat <<'EOF' +## Code Review Summary + +**Verdict: Changes Requested** (2 issues, 1 suggestion) + +### 🔴 Critical +- **src/auth.py:45** — SQL injection vulnerability + +### ⚠️ Warnings +- **src/models.py:23** — Plaintext password storage + +### 💡 Suggestions +- **src/utils.py:8** — Duplicated logic, consider consolidating + +### ✅ Looks Good +- Clean API design +- Good error handling in the middleware layer + +--- +*Reviewed by Hermes Agent* +EOF +)" +``` + +### Step 9: Clean up + +```bash +git checkout main +git branch -D pr-$PR_NUMBER +``` + +### Decision: Approve vs Request Changes vs Comment + +- **Approve** — no critical or warning-level issues, only minor suggestions or all clear +- **Request Changes** — any critical or warning-level issue that should be fixed before merge +- **Comment** — observations and suggestions, but nothing blocking (use when you're unsure or the PR is a draft) diff --git a/website/docs/user-guide/skills/bundled/github/github-github-issues.md b/website/docs/user-guide/skills/bundled/github/github-github-issues.md new file mode 100644 index 0000000000..8493663cd5 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-issues.md @@ -0,0 +1,387 @@ +--- +title: "Github Issues — Create, manage, triage, and close GitHub issues" +sidebar_label: "Github Issues" +description: "Create, manage, triage, and close GitHub issues" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Issues + +Create, manage, triage, and close GitHub issues. Search existing issues, add labels, assign people, and link to PRs. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-issues` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Issues`, `Project-Management`, `Bug-Tracking`, `Triage` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Issues Management + +Create, search, triage, and manage GitHub issues. Each section shows `gh` first, then the `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repo with a GitHub remote, or specify the repo explicitly + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Viewing Issues + +**With gh:** + +```bash +gh issue list +gh issue list --state open --label "bug" +gh issue list --assignee @me +gh issue list --search "authentication error" --state all +gh issue view 42 +``` + +**With curl:** + +```bash +# List open issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: # GitHub API returns PRs in /issues too + labels = ', '.join(l['name'] for l in i['labels']) + print(f\"#{i['number']:5} {i['state']:6} {labels:30} {i['title']}\")" + +# Filter by label +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?state=open&labels=bug&per_page=20" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" + +# View a specific issue +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + | python3 -c " +import sys, json +i = json.load(sys.stdin) +labels = ', '.join(l['name'] for l in i['labels']) +assignees = ', '.join(a['login'] for a in i['assignees']) +print(f\"#{i['number']}: {i['title']}\") +print(f\"State: {i['state']} Labels: {labels} Assignees: {assignees}\") +print(f\"Author: {i['user']['login']} Created: {i['created_at']}\") +print(f\"\n{i['body']}\")" + +# Search issues +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/search/issues?q=authentication+error+repo:$OWNER/$REPO" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin)['items']: + print(f\"#{i['number']} {i['state']:6} {i['title']}\")" +``` + +## 2. Creating Issues + +**With gh:** + +```bash +gh issue create \ + --title "Login redirect ignores ?next= parameter" \ + --body "## Description +After logging in, users always land on /dashboard. + +## Steps to Reproduce +1. Navigate to /settings while logged out +2. Get redirected to /login?next=/settings +3. Log in +4. Actual: redirected to /dashboard (should go to /settings) + +## Expected Behavior +Respect the ?next= query parameter." \ + --label "bug,backend" \ + --assignee "username" +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues \ + -d '{ + "title": "Login redirect ignores ?next= parameter", + "body": "## Description\nAfter logging in, users always land on /dashboard.\n\n## Steps to Reproduce\n1. Navigate to /settings while logged out\n2. Get redirected to /login?next=/settings\n3. Log in\n4. Actual: redirected to /dashboard\n\n## Expected Behavior\nRespect the ?next= query parameter.", + "labels": ["bug", "backend"], + "assignees": ["username"] + }' +``` + +### Bug Report Template + +``` +## Bug Description + + +## Steps to Reproduce +1. +2. + +## Expected Behavior + + +## Actual Behavior + + +## Environment +- OS: +- Version: +``` + +### Feature Request Template + +``` +## Feature Description + + +## Motivation + + +## Proposed Solution + + +## Alternatives Considered + +``` + +## 3. Managing Issues + +### Add/Remove Labels + +**With gh:** + +```bash +gh issue edit 42 --add-label "priority:high,bug" +gh issue edit 42 --remove-label "needs-triage" +``` + +**With curl:** + +```bash +# Add labels +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels \ + -d '{"labels": ["priority:high", "bug"]}' + +# Remove a label +curl -s -X DELETE \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/labels/needs-triage + +# List available labels in the repo +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/labels \ + | python3 -c " +import sys, json +for l in json.load(sys.stdin): + print(f\" {l['name']:30} {l.get('description', '')}\")" +``` + +### Assignment + +**With gh:** + +```bash +gh issue edit 42 --add-assignee username +gh issue edit 42 --add-assignee @me +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/assignees \ + -d '{"assignees": ["username"]}' +``` + +### Commenting + +**With gh:** + +```bash +gh issue comment 42 --body "Investigated — root cause is in auth middleware. Working on a fix." +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42/comments \ + -d '{"body": "Investigated — root cause is in auth middleware. Working on a fix."}' +``` + +### Closing and Reopening + +**With gh:** + +```bash +gh issue close 42 +gh issue close 42 --reason "not planned" +gh issue reopen 42 +``` + +**With curl:** + +```bash +# Close +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "closed", "state_reason": "completed"}' + +# Reopen +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/42 \ + -d '{"state": "open"}' +``` + +### Linking Issues to PRs + +Issues are automatically closed when a PR merges with the right keywords in the body: + +``` +Closes #42 +Fixes #42 +Resolves #42 +``` + +To create a branch from an issue: + +**With gh:** + +```bash +gh issue develop 42 --checkout +``` + +**With git (manual equivalent):** + +```bash +git checkout main && git pull origin main +git checkout -b fix/issue-42-login-redirect +``` + +## 4. Issue Triage Workflow + +When asked to triage issues: + +1. **List untriaged issues:** + +```bash +# With gh +gh issue list --label "needs-triage" --state open + +# With curl +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=needs-triage&state=open" \ + | python3 -c " +import sys, json +for i in json.load(sys.stdin): + if 'pull_request' not in i: + print(f\"#{i['number']} {i['title']}\")" +``` + +2. **Read and categorize** each issue (view details, understand the bug/feature) + +3. **Apply labels and priority** (see Managing Issues above) + +4. **Assign** if the owner is clear + +5. **Comment with triage notes** if needed + +## 5. Bulk Operations + +For batch operations, combine API calls with shell scripting: + +**With gh:** + +```bash +# Close all issues with a specific label +gh issue list --label "wontfix" --json number --jq '.[].number' | \ + xargs -I {} gh issue close {} --reason "not planned" +``` + +**With curl:** + +```bash +# List issue numbers with a label, then close each +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/issues?labels=wontfix&state=open" \ + | python3 -c "import sys,json; [print(i['number']) for i in json.load(sys.stdin)]" \ + | while read num; do + curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/issues/$num \ + -d '{"state": "closed", "state_reason": "not_planned"}' + echo "Closed #$num" + done +``` + +## Quick Reference Table + +| Action | gh | curl endpoint | +|--------|-----|--------------| +| List issues | `gh issue list` | `GET /repos/{o}/{r}/issues` | +| View issue | `gh issue view N` | `GET /repos/{o}/{r}/issues/N` | +| Create issue | `gh issue create ...` | `POST /repos/{o}/{r}/issues` | +| Add labels | `gh issue edit N --add-label ...` | `POST /repos/{o}/{r}/issues/N/labels` | +| Assign | `gh issue edit N --add-assignee ...` | `POST /repos/{o}/{r}/issues/N/assignees` | +| Comment | `gh issue comment N --body ...` | `POST /repos/{o}/{r}/issues/N/comments` | +| Close | `gh issue close N` | `PATCH /repos/{o}/{r}/issues/N` | +| Search | `gh issue list --search "..."` | `GET /search/issues?q=...` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md new file mode 100644 index 0000000000..f1a31e1572 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-pr-workflow.md @@ -0,0 +1,384 @@ +--- +title: "Github Pr Workflow" +sidebar_label: "Github Pr Workflow" +description: "Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Pr Workflow + +Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-pr-workflow` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Pull-Requests`, `CI/CD`, `Git`, `Automation`, `Merge` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-code-review`](/docs/user-guide/skills/bundled/github/github-github-code-review) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Pull Request Workflow + +Complete guide for managing the PR lifecycle. Each section shows the `gh` way first, then the `git` + `curl` fallback for machines without `gh`. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) +- Inside a git repository with a GitHub remote + +### Quick Auth Detection + +```bash +# Determine which method to use throughout this workflow +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + # Ensure we have a token for API calls + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi +echo "Using: $AUTH" +``` + +### Extracting Owner/Repo from the Git Remote + +Many `curl` commands need `owner/repo`. Extract it from the git remote: + +```bash +# Works for both HTTPS and SSH remote URLs +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +echo "Owner: $OWNER, Repo: $REPO" +``` + +--- + +## 1. Branch Creation + +This part is pure `git` — identical either way: + +```bash +# Make sure you're up to date +git fetch origin +git checkout main && git pull origin main + +# Create and switch to a new branch +git checkout -b feat/add-user-authentication +``` + +Branch naming conventions: +- `feat/description` — new features +- `fix/description` — bug fixes +- `refactor/description` — code restructuring +- `docs/description` — documentation +- `ci/description` — CI/CD changes + +## 2. Making Commits + +Use the agent's file tools (`write_file`, `patch`) to make changes, then commit: + +```bash +# Stage specific files +git add src/auth.py src/models/user.py tests/test_auth.py + +# Commit with a conventional commit message +git commit -m "feat: add JWT-based user authentication + +- Add login/register endpoints +- Add User model with password hashing +- Add auth middleware for protected routes +- Add unit tests for auth flow" +``` + +Commit message format (Conventional Commits): +``` +type(scope): short description + +Longer explanation if needed. Wrap at 72 characters. +``` + +Types: `feat`, `fix`, `refactor`, `docs`, `test`, `ci`, `chore`, `perf` + +## 3. Pushing and Creating a PR + +### Push the Branch (same either way) + +```bash +git push -u origin HEAD +``` + +### Create the PR + +**With gh:** + +```bash +gh pr create \ + --title "feat: add JWT-based user authentication" \ + --body "## Summary +- Adds login and register API endpoints +- JWT token generation and validation + +## Test Plan +- [ ] Unit tests pass + +Closes #42" +``` + +Options: `--draft`, `--reviewer user1,user2`, `--label "enhancement"`, `--base develop` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.v3+json" \ + https://api.github.com/repos/$OWNER/$REPO/pulls \ + -d "{ + \"title\": \"feat: add JWT-based user authentication\", + \"body\": \"## Summary\nAdds login and register API endpoints.\n\nCloses #42\", + \"head\": \"$BRANCH\", + \"base\": \"main\" + }" +``` + +The response JSON includes the PR `number` — save it for later commands. + +To create as a draft, add `"draft": true` to the JSON body. + +## 4. Monitoring CI Status + +### Check CI Status + +**With gh:** + +```bash +# One-shot check +gh pr checks + +# Watch until all checks finish (polls every 10s) +gh pr checks --watch +``` + +**With git + curl:** + +```bash +# Get the latest commit SHA on the current branch +SHA=$(git rev-parse HEAD) + +# Query the combined status +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +print(f\"Overall: {data['state']}\") +for s in data.get('statuses', []): + print(f\" {s['context']}: {s['state']} - {s.get('description', '')}\")" + +# Also check GitHub Actions check runs (separate endpoint) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/check-runs \ + | python3 -c " +import sys, json +data = json.load(sys.stdin) +for cr in data.get('check_runs', []): + print(f\" {cr['name']}: {cr['status']} / {cr['conclusion'] or 'pending'}\")" +``` + +### Poll Until Complete (git + curl) + +```bash +# Simple polling loop — check every 30 seconds, up to 10 minutes +SHA=$(git rev-parse HEAD) +for i in $(seq 1 20); do + STATUS=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/commits/$SHA/status \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])") + echo "Check $i: $STATUS" + if [ "$STATUS" = "success" ] || [ "$STATUS" = "failure" ] || [ "$STATUS" = "error" ]; then + break + fi + sleep 30 +done +``` + +## 5. Auto-Fixing CI Failures + +When CI fails, diagnose and fix. This loop works with either auth method. + +### Step 1: Get Failure Details + +**With gh:** + +```bash +# List recent workflow runs on this branch +gh run list --branch $(git branch --show-current) --limit 5 + +# View failed logs +gh run view --log-failed +``` + +**With git + curl:** + +```bash +BRANCH=$(git branch --show-current) + +# List workflow runs on this branch +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?branch=$BRANCH&per_page=5" \ + | python3 -c " +import sys, json +runs = json.load(sys.stdin)['workflow_runs'] +for r in runs: + print(f\"Run {r['id']}: {r['name']} - {r['conclusion'] or r['status']}\")" + +# Get failed job logs (download as zip, extract, read) +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs && cat ci-logs/*.txt +``` + +### Step 2: Fix and Push + +After identifying the issue, use file tools (`patch`, `write_file`) to fix it: + +```bash +git add +git commit -m "fix: resolve CI failure in " +git push +``` + +### Step 3: Verify + +Re-check CI status using the commands from Section 4 above. + +### Auto-Fix Loop Pattern + +When asked to auto-fix CI, follow this loop: + +1. Check CI status → identify failures +2. Read failure logs → understand the error +3. Use `read_file` + `patch`/`write_file` → fix the code +4. `git add . && git commit -m "fix: ..." && git push` +5. Wait for CI → re-check status +6. Repeat if still failing (up to 3 attempts, then ask the user) + +## 6. Merging + +**With gh:** + +```bash +# Squash merge + delete branch (cleanest for feature branches) +gh pr merge --squash --delete-branch + +# Enable auto-merge (merges when all checks pass) +gh pr merge --auto --squash --delete-branch +``` + +**With git + curl:** + +```bash +PR_NUMBER= + +# Merge the PR via API (squash) +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER/merge \ + -d "{ + \"merge_method\": \"squash\", + \"commit_title\": \"feat: add user authentication (#$PR_NUMBER)\" + }" + +# Delete the remote branch after merge +BRANCH=$(git branch --show-current) +git push origin --delete $BRANCH + +# Switch back to main locally +git checkout main && git pull origin main +git branch -d $BRANCH +``` + +Merge methods: `"merge"` (merge commit), `"squash"`, `"rebase"` + +### Enable Auto-Merge (curl) + +```bash +# Auto-merge requires the repo to have it enabled in settings. +# This uses the GraphQL API since REST doesn't support auto-merge. +PR_NODE_ID=$(curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/pulls/$PR_NUMBER \ + | python3 -c "import sys,json; print(json.load(sys.stdin)['node_id'])") + +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/graphql \ + -d "{\"query\": \"mutation { enablePullRequestAutoMerge(input: {pullRequestId: \\\"$PR_NODE_ID\\\", mergeMethod: SQUASH}) { clientMutationId } }\"}" +``` + +## 7. Complete Workflow Example + +```bash +# 1. Start from clean main +git checkout main && git pull origin main + +# 2. Branch +git checkout -b fix/login-redirect-bug + +# 3. (Agent makes code changes with file tools) + +# 4. Commit +git add src/auth/login.py tests/test_login.py +git commit -m "fix: correct redirect URL after login + +Preserves the ?next= parameter instead of always redirecting to /dashboard." + +# 5. Push +git push -u origin HEAD + +# 6. Create PR (picks gh or curl based on what's available) +# ... (see Section 3) + +# 7. Monitor CI (see Section 4) + +# 8. Merge when green (see Section 6) +``` + +## Useful PR Commands Reference + +| Action | gh | git + curl | +|--------|-----|-----------| +| List my PRs | `gh pr list --author @me` | `curl -s -H "Authorization: token $GITHUB_TOKEN" "https://api.github.com/repos/$OWNER/$REPO/pulls?state=open"` | +| View PR diff | `gh pr diff` | `git diff main...HEAD` (local) or `curl -H "Accept: application/vnd.github.diff" ...` | +| Add comment | `gh pr comment N --body "..."` | `curl -X POST .../issues/N/comments -d '{"body":"..."}'` | +| Request review | `gh pr edit N --add-reviewer user` | `curl -X POST .../pulls/N/requested_reviewers -d '{"reviewers":["user"]}'` | +| Close PR | `gh pr close N` | `curl -X PATCH .../pulls/N -d '{"state":"closed"}'` | +| Check out someone's PR | `gh pr checkout N` | `git fetch origin pull/N/head:pr-N && git checkout pr-N` | diff --git a/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md new file mode 100644 index 0000000000..8392250346 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/github/github-github-repo-management.md @@ -0,0 +1,533 @@ +--- +title: "Github Repo Management — Clone, create, fork, configure, and manage GitHub repositories" +sidebar_label: "Github Repo Management" +description: "Clone, create, fork, configure, and manage GitHub repositories" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Github Repo Management + +Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/github/github-repo-management` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GitHub`, `Repositories`, `Git`, `Releases`, `Secrets`, `Configuration` | +| Related skills | [`github-auth`](/docs/user-guide/skills/bundled/github/github-github-auth), [`github-pr-workflow`](/docs/user-guide/skills/bundled/github/github-github-pr-workflow), [`github-issues`](/docs/user-guide/skills/bundled/github/github-github-issues) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GitHub Repository Management + +Create, clone, fork, configure, and manage GitHub repositories. Each section shows `gh` first, then the `git` + `curl` fallback. + +## Prerequisites + +- Authenticated with GitHub (see `github-auth` skill) + +### Setup + +```bash +if command -v gh &>/dev/null && gh auth status &>/dev/null; then + AUTH="gh" +else + AUTH="git" + if [ -z "$GITHUB_TOKEN" ]; then + if [ -f ~/.hermes/.env ] && grep -q "^GITHUB_TOKEN=" ~/.hermes/.env; then + GITHUB_TOKEN=$(grep "^GITHUB_TOKEN=" ~/.hermes/.env | head -1 | cut -d= -f2 | tr -d '\n\r') + elif grep -q "github.com" ~/.git-credentials 2>/dev/null; then + GITHUB_TOKEN=$(grep "github.com" ~/.git-credentials 2>/dev/null | head -1 | sed 's|https://[^:]*:\([^@]*\)@.*|\1|') + fi + fi +fi + +# Get your GitHub username (needed for several operations) +if [ "$AUTH" = "gh" ]; then + GH_USER=$(gh api user --jq '.login') +else + GH_USER=$(curl -s -H "Authorization: token $GITHUB_TOKEN" https://api.github.com/user | python3 -c "import sys,json; print(json.load(sys.stdin)['login'])") +fi +``` + +If you're inside a repo already: + +```bash +REMOTE_URL=$(git remote get-url origin) +OWNER_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]||; s|\.git$||') +OWNER=$(echo "$OWNER_REPO" | cut -d/ -f1) +REPO=$(echo "$OWNER_REPO" | cut -d/ -f2) +``` + +--- + +## 1. Cloning Repositories + +Cloning is pure `git` — works identically either way: + +```bash +# Clone via HTTPS (works with credential helper or token-embedded URL) +git clone https://github.com/owner/repo-name.git + +# Clone into a specific directory +git clone https://github.com/owner/repo-name.git ./my-local-dir + +# Shallow clone (faster for large repos) +git clone --depth 1 https://github.com/owner/repo-name.git + +# Clone a specific branch +git clone --branch develop https://github.com/owner/repo-name.git + +# Clone via SSH (if SSH is configured) +git clone git@github.com:owner/repo-name.git +``` + +**With gh (shorthand):** + +```bash +gh repo clone owner/repo-name +gh repo clone owner/repo-name -- --depth 1 +``` + +## 2. Creating Repositories + +**With gh:** + +```bash +# Create a public repo and clone it +gh repo create my-new-project --public --clone + +# Private, with description and license +gh repo create my-new-project --private --description "A useful tool" --license MIT --clone + +# Under an organization +gh repo create my-org/my-new-project --public --clone + +# From existing local directory +cd /path/to/existing/project +gh repo create my-project --source . --public --push +``` + +**With git + curl:** + +```bash +# Create the remote repo via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/user/repos \ + -d '{ + "name": "my-new-project", + "description": "A useful tool", + "private": false, + "auto_init": true, + "license_template": "mit" + }' + +# Clone it +git clone https://github.com/$GH_USER/my-new-project.git +cd my-new-project + +# -- OR -- push an existing local directory to the new repo +cd /path/to/existing/project +git init +git add . +git commit -m "Initial commit" +git remote add origin https://github.com/$GH_USER/my-new-project.git +git push -u origin main +``` + +To create under an organization: + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/orgs/my-org/repos \ + -d '{"name": "my-new-project", "private": false}' +``` + +### From a Template + +**With gh:** + +```bash +gh repo create my-new-app --template owner/template-repo --public --clone +``` + +**With curl:** + +```bash +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/template-repo/generate \ + -d '{"owner": "'"$GH_USER"'", "name": "my-new-app", "private": false}' +``` + +## 3. Forking Repositories + +**With gh:** + +```bash +gh repo fork owner/repo-name --clone +``` + +**With git + curl:** + +```bash +# Create the fork via API +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/owner/repo-name/forks + +# Wait a moment for GitHub to create it, then clone +sleep 3 +git clone https://github.com/$GH_USER/repo-name.git +cd repo-name + +# Add the original repo as "upstream" remote +git remote add upstream https://github.com/owner/repo-name.git +``` + +### Keeping a Fork in Sync + +```bash +# Pure git — works everywhere +git fetch upstream +git checkout main +git merge upstream/main +git push origin main +``` + +**With gh (shortcut):** + +```bash +gh repo sync $GH_USER/repo-name +``` + +## 4. Repository Information + +**With gh:** + +```bash +gh repo view owner/repo-name +gh repo list --limit 20 +gh search repos "machine learning" --language python --sort stars +``` + +**With curl:** + +```bash +# View repo details +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + | python3 -c " +import sys, json +r = json.load(sys.stdin) +print(f\"Name: {r['full_name']}\") +print(f\"Description: {r['description']}\") +print(f\"Stars: {r['stargazers_count']} Forks: {r['forks_count']}\") +print(f\"Default branch: {r['default_branch']}\") +print(f\"Language: {r['language']}\")" + +# List your repos +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/user/repos?per_page=20&sort=updated" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + vis = 'private' if r['private'] else 'public' + print(f\" {r['full_name']:40} {vis:8} {r.get('language', ''):10} ★{r['stargazers_count']}\")" + +# Search repos +curl -s \ + "https://api.github.com/search/repositories?q=machine+learning+language:python&sort=stars&per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['items']: + print(f\" {r['full_name']:40} ★{r['stargazers_count']:6} {r['description'][:60] if r['description'] else ''}\")" +``` + +## 5. Repository Settings + +**With gh:** + +```bash +gh repo edit --description "Updated description" --visibility public +gh repo edit --enable-wiki=false --enable-issues=true +gh repo edit --default-branch main +gh repo edit --add-topic "machine-learning,python" +gh repo edit --enable-auto-merge +``` + +**With curl:** + +```bash +curl -s -X PATCH \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO \ + -d '{ + "description": "Updated description", + "has_wiki": false, + "has_issues": true, + "allow_auto_merge": true + }' + +# Update topics +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Accept: application/vnd.github.mercy-preview+json" \ + https://api.github.com/repos/$OWNER/$REPO/topics \ + -d '{"names": ["machine-learning", "python", "automation"]}' +``` + +## 6. Branch Protection + +```bash +# View current protection +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection + +# Set up branch protection +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/branches/main/protection \ + -d '{ + "required_status_checks": { + "strict": true, + "contexts": ["ci/test", "ci/lint"] + }, + "enforce_admins": false, + "required_pull_request_reviews": { + "required_approving_review_count": 1 + }, + "restrictions": null + }' +``` + +## 7. Secrets Management (GitHub Actions) + +**With gh:** + +```bash +gh secret set API_KEY --body "your-secret-value" +gh secret set SSH_KEY < ~/.ssh/id_rsa +gh secret list +gh secret delete API_KEY +``` + +**With curl:** + +Secrets require encryption with the repo's public key — more involved via API: + +```bash +# Get the repo's public key for encrypting secrets +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/public-key + +# Encrypt and set (requires Python with PyNaCl) +python3 -c " +from base64 import b64encode +from nacl import encoding, public +import json, sys + +# Get the public key +key_id = '' +public_key = '' + +# Encrypt +sealed = public.SealedBox( + public.PublicKey(public_key.encode('utf-8'), encoding.Base64Encoder) +).encrypt('your-secret-value'.encode('utf-8')) +print(json.dumps({ + 'encrypted_value': b64encode(sealed).decode('utf-8'), + 'key_id': key_id +}))" + +# Then PUT the encrypted secret +curl -s -X PUT \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets/API_KEY \ + -d '' + +# List secrets (names only, values hidden) +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/secrets \ + | python3 -c " +import sys, json +for s in json.load(sys.stdin)['secrets']: + print(f\" {s['name']:30} updated: {s['updated_at']}\")" +``` + +Note: For secrets, `gh secret set` is dramatically simpler. If setting secrets is needed and `gh` isn't available, recommend installing it for just that operation. + +## 8. Releases + +**With gh:** + +```bash +gh release create v1.0.0 --title "v1.0.0" --generate-notes +gh release create v2.0.0-rc1 --draft --prerelease --generate-notes +gh release create v1.0.0 ./dist/binary --title "v1.0.0" --notes "Release notes" +gh release list +gh release download v1.0.0 --dir ./downloads +``` + +**With curl:** + +```bash +# Create a release +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + -d '{ + "tag_name": "v1.0.0", + "name": "v1.0.0", + "body": "## Changelog\n- Feature A\n- Bug fix B", + "draft": false, + "prerelease": false, + "generate_release_notes": true + }' + +# List releases +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/releases \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin): + tag = r.get('tag_name', 'no tag') + print(f\" {tag:15} {r['name']:30} {'draft' if r['draft'] else 'published'}\")" + +# Upload a release asset (binary file) +RELEASE_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Content-Type: application/octet-stream" \ + "https://uploads.github.com/repos/$OWNER/$REPO/releases/$RELEASE_ID/assets?name=binary-amd64" \ + --data-binary @./dist/binary-amd64 +``` + +## 9. GitHub Actions Workflows + +**With gh:** + +```bash +gh workflow list +gh run list --limit 10 +gh run view +gh run view --log-failed +gh run rerun +gh run rerun --failed +gh workflow run ci.yml --ref main +gh workflow run deploy.yml -f environment=staging +``` + +**With curl:** + +```bash +# List workflows +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows \ + | python3 -c " +import sys, json +for w in json.load(sys.stdin)['workflows']: + print(f\" {w['id']:10} {w['name']:30} {w['state']}\")" + +# List recent runs +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + "https://api.github.com/repos/$OWNER/$REPO/actions/runs?per_page=10" \ + | python3 -c " +import sys, json +for r in json.load(sys.stdin)['workflow_runs']: + print(f\" Run {r['id']} {r['name']:30} {r['conclusion'] or r['status']}\")" + +# Download failed run logs +RUN_ID= +curl -s -L \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/logs \ + -o /tmp/ci-logs.zip +cd /tmp && unzip -o ci-logs.zip -d ci-logs + +# Re-run a failed workflow +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun + +# Re-run only failed jobs +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/runs/$RUN_ID/rerun-failed-jobs + +# Trigger a workflow manually (workflow_dispatch) +WORKFLOW_ID= +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/$OWNER/$REPO/actions/workflows/$WORKFLOW_ID/dispatches \ + -d '{"ref": "main", "inputs": {"environment": "staging"}}' +``` + +## 10. Gists + +**With gh:** + +```bash +gh gist create script.py --public --desc "Useful script" +gh gist list +``` + +**With curl:** + +```bash +# Create a gist +curl -s -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + -d '{ + "description": "Useful script", + "public": true, + "files": { + "script.py": {"content": "print(\"hello\")"} + } + }' + +# List your gists +curl -s \ + -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/gists \ + | python3 -c " +import sys, json +for g in json.load(sys.stdin): + files = ', '.join(g['files'].keys()) + print(f\" {g['id']} {g['description'] or '(no desc)':40} {files}\")" +``` + +## Quick Reference Table + +| Action | gh | git + curl | +|--------|-----|-----------| +| Clone | `gh repo clone o/r` | `git clone https://github.com/o/r.git` | +| Create repo | `gh repo create name --public` | `curl POST /user/repos` | +| Fork | `gh repo fork o/r --clone` | `curl POST /repos/o/r/forks` + `git clone` | +| Repo info | `gh repo view o/r` | `curl GET /repos/o/r` | +| Edit settings | `gh repo edit --...` | `curl PATCH /repos/o/r` | +| Create release | `gh release create v1.0` | `curl POST /repos/o/r/releases` | +| List workflows | `gh workflow list` | `curl GET /repos/o/r/actions/workflows` | +| Rerun CI | `gh run rerun ID` | `curl POST /repos/o/r/actions/runs/ID/rerun` | +| Set secret | `gh secret set KEY` | `curl PUT /repos/o/r/actions/secrets/KEY` (+ encryption) | diff --git a/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md new file mode 100644 index 0000000000..267c8c064c --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mcp/mcp-native-mcp.md @@ -0,0 +1,374 @@ +--- +title: "Native Mcp" +sidebar_label: "Native Mcp" +description: "Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Native Mcp + +Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mcp/native-mcp` | +| Version | `1.0.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `MCP`, `Tools`, `Integrations` | +| Related skills | [`mcporter`](/docs/user-guide/skills/optional/mcp/mcp-mcporter) | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Native MCP Client + +Hermes Agent has a built-in MCP client that connects to MCP servers at startup, discovers their tools, and makes them available as first-class tools the agent can call directly. No bridge CLI needed -- tools from MCP servers appear alongside built-in tools like `terminal`, `read_file`, etc. + +## When to Use + +Use this whenever you want to: +- Connect to MCP servers and use their tools from within Hermes Agent +- Add external capabilities (filesystem access, GitHub, databases, APIs) via MCP +- Run local stdio-based MCP servers (npx, uvx, or any command) +- Connect to remote HTTP/StreamableHTTP MCP servers +- Have MCP tools auto-discovered and available in every conversation + +For ad-hoc, one-off MCP tool calls from the terminal without configuring anything, see the `mcporter` skill instead. + +## Prerequisites + +- **mcp Python package** -- optional dependency; install with `pip install mcp`. If not installed, MCP support is silently disabled. +- **Node.js** -- required for `npx`-based MCP servers (most community servers) +- **uv** -- required for `uvx`-based MCP servers (Python-based servers) + +Install the MCP SDK: + +```bash +pip install mcp +# or, if using uv: +uv pip install mcp +``` + +## Quick Start + +Add MCP servers to `~/.hermes/config.yaml` under the `mcp_servers` key: + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Restart Hermes Agent. On startup it will: +1. Connect to the server +2. Discover available tools +3. Register them with the prefix `mcp_time_*` +4. Inject them into all platform toolsets + +You can then use the tools naturally -- just ask the agent to get the current time. + +## Configuration Reference + +Each entry under `mcp_servers` is a server name mapped to its config. There are two transport types: **stdio** (command-based) and **HTTP** (url-based). + +### Stdio Transport (command + args) + +```yaml +mcp_servers: + server_name: + command: "npx" # (required) executable to run + args: ["-y", "pkg-name"] # (optional) command arguments, default: [] + env: # (optional) environment variables for the subprocess + SOME_API_KEY: "value" + timeout: 120 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### HTTP Transport (url) + +```yaml +mcp_servers: + server_name: + url: "https://my-server.example.com/mcp" # (required) server URL + headers: # (optional) HTTP headers + Authorization: "Bearer sk-..." + timeout: 180 # (optional) per-tool-call timeout in seconds, default: 120 + connect_timeout: 60 # (optional) initial connection timeout in seconds, default: 60 +``` + +### All Config Options + +| Option | Type | Default | Description | +|-------------------|--------|---------|---------------------------------------------------| +| `command` | string | -- | Executable to run (stdio transport, required) | +| `args` | list | `[]` | Arguments passed to the command | +| `env` | dict | `{}` | Extra environment variables for the subprocess | +| `url` | string | -- | Server URL (HTTP transport, required) | +| `headers` | dict | `{}` | HTTP headers sent with every request | +| `timeout` | int | `120` | Per-tool-call timeout in seconds | +| `connect_timeout` | int | `60` | Timeout for initial connection and discovery | + +Note: A server config must have either `command` (stdio) or `url` (HTTP), not both. + +## How It Works + +### Startup Discovery + +When Hermes Agent starts, `discover_mcp_tools()` is called during tool initialization: + +1. Reads `mcp_servers` from `~/.hermes/config.yaml` +2. For each server, spawns a connection in a dedicated background event loop +3. Initializes the MCP session and calls `list_tools()` to discover available tools +4. Registers each tool in the Hermes tool registry + +### Tool Naming Convention + +MCP tools are registered with the naming pattern: + +``` +mcp_{server_name}_{tool_name} +``` + +Hyphens and dots in names are replaced with underscores for LLM API compatibility. + +Examples: +- Server `filesystem`, tool `read_file` → `mcp_filesystem_read_file` +- Server `github`, tool `list-issues` → `mcp_github_list_issues` +- Server `my-api`, tool `fetch.data` → `mcp_my_api_fetch_data` + +### Auto-Injection + +After discovery, MCP tools are automatically injected into all `hermes-*` platform toolsets (CLI, Discord, Telegram, etc.). This means MCP tools are available in every conversation without any additional configuration. + +### Connection Lifecycle + +- Each server runs as a long-lived asyncio Task in a background daemon thread +- Connections persist for the lifetime of the agent process +- If a connection drops, automatic reconnection with exponential backoff kicks in (up to 5 retries, max 60s backoff) +- On agent shutdown, all connections are gracefully closed + +### Idempotency + +`discover_mcp_tools()` is idempotent -- calling it multiple times only connects to servers that aren't already connected. Failed servers are retried on subsequent calls. + +## Transport Types + +### Stdio Transport + +The most common transport. Hermes launches the MCP server as a subprocess and communicates over stdin/stdout. + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/projects"] +``` + +The subprocess inherits a **filtered** environment (see Security section below) plus any variables you specify in `env`. + +### HTTP / StreamableHTTP Transport + +For remote or shared MCP servers. Requires the `mcp` package to include HTTP client support (`mcp.client.streamable_http`). + +```yaml +mcp_servers: + remote_api: + url: "https://mcp.example.com/mcp" + headers: + Authorization: "Bearer sk-..." +``` + +If HTTP support is not available in your installed `mcp` version, the server will fail with an ImportError and other servers will continue normally. + +## Security + +### Environment Variable Filtering + +For stdio servers, Hermes does NOT pass your full shell environment to MCP subprocesses. Only safe baseline variables are inherited: + +- `PATH`, `HOME`, `USER`, `LANG`, `LC_ALL`, `TERM`, `SHELL`, `TMPDIR` +- Any `XDG_*` variables + +All other environment variables (API keys, tokens, secrets) are excluded unless you explicitly add them via the `env` config key. This prevents accidental credential leakage to untrusted MCP servers. + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + # Only this token is passed to the subprocess + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..." +``` + +### Credential Stripping in Error Messages + +If an MCP tool call fails, any credential-like patterns in the error message are automatically redacted before being shown to the LLM. This covers: + +- GitHub PATs (`ghp_...`) +- OpenAI-style keys (`sk-...`) +- Bearer tokens +- Generic `token=`, `key=`, `API_KEY=`, `password=`, `secret=` patterns + +## Troubleshooting + +### "MCP SDK not available -- skipping MCP tool discovery" + +The `mcp` Python package is not installed. Install it: + +```bash +pip install mcp +``` + +### "No MCP servers configured" + +No `mcp_servers` key in `~/.hermes/config.yaml`, or it's empty. Add at least one server. + +### "Failed to connect to MCP server 'X'" + +Common causes: +- **Command not found**: The `command` binary isn't on PATH. Ensure `npx`, `uvx`, or the relevant command is installed. +- **Package not found**: For npx servers, the npm package may not exist or may need `-y` in args to auto-install. +- **Timeout**: The server took too long to start. Increase `connect_timeout`. +- **Port conflict**: For HTTP servers, the URL may be unreachable. + +### "MCP server 'X' requires HTTP transport but mcp.client.streamable_http is not available" + +Your `mcp` package version doesn't include HTTP client support. Upgrade: + +```bash +pip install --upgrade mcp +``` + +### Tools not appearing + +- Check that the server is listed under `mcp_servers` (not `mcp` or `servers`) +- Ensure the YAML indentation is correct +- Look at Hermes Agent startup logs for connection messages +- Tool names are prefixed with `mcp_{server}_{tool}` -- look for that pattern + +### Connection keeps dropping + +The client retries up to 5 times with exponential backoff (1s, 2s, 4s, 8s, 16s, capped at 60s). If the server is fundamentally unreachable, it gives up after 5 attempts. Check the server process and network connectivity. + +## Examples + +### Time Server (uvx) + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] +``` + +Registers tools like `mcp_time_get_current_time`. + +### Filesystem Server (npx) + +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/documents"] + timeout: 30 +``` + +Registers tools like `mcp_filesystem_read_file`, `mcp_filesystem_write_file`, `mcp_filesystem_list_directory`. + +### GitHub Server with Authentication + +```yaml +mcp_servers: + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + timeout: 60 +``` + +Registers tools like `mcp_github_list_issues`, `mcp_github_create_pull_request`, etc. + +### Remote HTTP Server + +```yaml +mcp_servers: + company_api: + url: "https://mcp.mycompany.com/v1/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + X-Team-Id: "engineering" + timeout: 180 + connect_timeout: 30 +``` + +### Multiple Servers + +```yaml +mcp_servers: + time: + command: "uvx" + args: ["mcp-server-time"] + + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"] + + github: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-github"] + env: + GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxxxxxxxxxxxxxxxxxxx" + + company_api: + url: "https://mcp.internal.company.com/mcp" + headers: + Authorization: "Bearer sk-xxxxxxxxxxxxxxxxxxxx" + timeout: 300 +``` + +All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions. + +## Sampling (Server-Initiated LLM Requests) + +Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making). + +Sampling is **enabled by default**. Configure per server: + +```yaml +mcp_servers: + my_server: + command: "npx" + args: ["-y", "my-mcp-server"] + sampling: + enabled: true # default: true + model: "gemini-3-flash" # model override (optional) + max_tokens_cap: 4096 # max tokens per request + timeout: 30 # LLM call timeout (seconds) + max_rpm: 10 # max requests per minute + allowed_models: [] # model whitelist (empty = all) + max_tool_rounds: 5 # tool loop limit (0 = disable) + log_level: "info" # audit verbosity +``` + +Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`. + +Disable sampling for untrusted servers with `sampling: { enabled: false }`. + +## Notes + +- MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop +- Tool results are returned as JSON with either `{"result": "..."}` or `{"error": "..."}` +- The native MCP client is independent of `mcporter` -- you can use both simultaneously +- Server connections are persistent and shared across all conversations in the same agent process +- Adding or removing servers requires restarting the agent (no hot-reload currently) diff --git a/website/docs/user-guide/skills/bundled/media/media-gif-search.md b/website/docs/user-guide/skills/bundled/media/media-gif-search.md new file mode 100644 index 0000000000..67b56645db --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-gif-search.md @@ -0,0 +1,101 @@ +--- +title: "Gif Search — Search and download GIFs from Tenor using curl" +sidebar_label: "Gif Search" +description: "Search and download GIFs from Tenor using curl" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Gif Search + +Search and download GIFs from Tenor using curl. No dependencies beyond curl and jq. Useful for finding reaction GIFs, creating visual content, and sending GIFs in chat. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/gif-search` | +| Version | `1.1.0` | +| Author | Hermes Agent | +| License | MIT | +| Tags | `GIF`, `Media`, `Search`, `Tenor`, `API` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# GIF Search (Tenor API) + +Search and download GIFs directly via the Tenor API using curl. No extra tools needed. + +## Setup + +Set your Tenor API key in your environment (add to `~/.hermes/.env`): + +```bash +TENOR_API_KEY=your_key_here +``` + +Get a free API key at https://developers.google.com/tenor/guides/quickstart — the Google Cloud Console Tenor API key is free and has generous rate limits. + +## Prerequisites + +- `curl` and `jq` (both standard on macOS/Linux) +- `TENOR_API_KEY` environment variable + +## Search for GIFs + +```bash +# Search and get GIF URLs +curl -s "https://tenor.googleapis.com/v2/search?q=thumbs+up&limit=5&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.gif.url' + +# Get smaller/preview versions +curl -s "https://tenor.googleapis.com/v2/search?q=nice+work&limit=3&key=${TENOR_API_KEY}" | jq -r '.results[].media_formats.tinygif.url' +``` + +## Download a GIF + +```bash +# Search and download the top result +URL=$(curl -s "https://tenor.googleapis.com/v2/search?q=celebration&limit=1&key=${TENOR_API_KEY}" | jq -r '.results[0].media_formats.gif.url') +curl -sL "$URL" -o celebration.gif +``` + +## Get Full Metadata + +```bash +curl -s "https://tenor.googleapis.com/v2/search?q=cat&limit=3&key=${TENOR_API_KEY}" | jq '.results[] | {title: .title, url: .media_formats.gif.url, preview: .media_formats.tinygif.url, dimensions: .media_formats.gif.dims}' +``` + +## API Parameters + +| Parameter | Description | +|-----------|-------------| +| `q` | Search query (URL-encode spaces as `+`) | +| `limit` | Max results (1-50, default 20) | +| `key` | API key (from `$TENOR_API_KEY` env var) | +| `media_filter` | Filter formats: `gif`, `tinygif`, `mp4`, `tinymp4`, `webm` | +| `contentfilter` | Safety: `off`, `low`, `medium`, `high` | +| `locale` | Language: `en_US`, `es`, `fr`, etc. | + +## Available Media Formats + +Each result has multiple formats under `.media_formats`: + +| Format | Use case | +|--------|----------| +| `gif` | Full quality GIF | +| `tinygif` | Small preview GIF | +| `mp4` | Video version (smaller file size) | +| `tinymp4` | Small preview video | +| `webm` | WebM video | +| `nanogif` | Tiny thumbnail | + +## Notes + +- URL-encode the query: spaces as `+`, special chars as `%XX` +- For sending in chat, `tinygif` URLs are lighter weight +- GIF URLs can be used directly in markdown: `![alt](https://github.com/NousResearch/hermes-agent/blob/main/skills/media/gif-search/url)` diff --git a/website/docs/user-guide/skills/bundled/media/media-heartmula.md b/website/docs/user-guide/skills/bundled/media/media-heartmula.md new file mode 100644 index 0000000000..85dae5e867 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-heartmula.md @@ -0,0 +1,188 @@ +--- +title: "Heartmula — Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +sidebar_label: "Heartmula" +description: "Set up and run HeartMuLa, the open-source music generation model family (Suno-like)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Heartmula + +Set up and run HeartMuLa, the open-source music generation model family (Suno-like). Generates full songs from lyrics + tags with multilingual support. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/heartmula` | +| Version | `1.0.0` | +| Tags | `music`, `audio`, `generation`, `ai`, `heartmula`, `heartcodec`, `lyrics`, `songs` | +| Related skills | `audiocraft` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# HeartMuLa - Open-Source Music Generation + +## Overview +HeartMuLa is a family of open-source music foundation models (Apache-2.0) that generates music conditioned on lyrics and tags. Comparable to Suno for open-source. Includes: +- **HeartMuLa** - Music language model (3B/7B) for generation from lyrics + tags +- **HeartCodec** - 12.5Hz music codec for high-fidelity audio reconstruction +- **HeartTranscriptor** - Whisper-based lyrics transcription +- **HeartCLAP** - Audio-text alignment model + +## When to Use +- User wants to generate music/songs from text descriptions +- User wants an open-source Suno alternative +- User wants local/offline music generation +- User asks about HeartMuLa, heartlib, or AI music generation + +## Hardware Requirements +- **Minimum**: 8GB VRAM with `--lazy_load true` (loads/unloads models sequentially) +- **Recommended**: 16GB+ VRAM for comfortable single-GPU usage +- **Multi-GPU**: Use `--mula_device cuda:0 --codec_device cuda:1` to split across GPUs +- 3B model with lazy_load peaks at ~6.2GB VRAM + +## Installation Steps + +### 1. Clone Repository +```bash +cd ~/ # or desired directory +git clone https://github.com/HeartMuLa/heartlib.git +cd heartlib +``` + +### 2. Create Virtual Environment (Python 3.10 required) +```bash +uv venv --python 3.10 .venv +. .venv/bin/activate +uv pip install -e . +``` + +### 3. Fix Dependency Compatibility Issues + +**IMPORTANT**: As of Feb 2026, the pinned dependencies have conflicts with newer packages. Apply these fixes: + +```bash +# Upgrade datasets (old version incompatible with current pyarrow) +uv pip install --upgrade datasets + +# Upgrade transformers (needed for huggingface-hub 1.x compatibility) +uv pip install --upgrade transformers +``` + +### 4. Patch Source Code (Required for transformers 5.x) + +**Patch 1 - RoPE cache fix** in `src/heartlib/heartmula/modeling_heartmula.py`: + +In the `setup_caches` method of the `HeartMuLa` class, add RoPE reinitialization after the `reset_caches` try/except block and before the `with device:` block: + +```python +# Re-initialize RoPE caches that were skipped during meta-device loading +from torchtune.models.llama3_1._position_embeddings import Llama3ScaledRoPE +for module in self.modules(): + if isinstance(module, Llama3ScaledRoPE) and not module.is_cache_built: + module.rope_init() + module.to(device) +``` + +**Why**: `from_pretrained` creates model on meta device first; `Llama3ScaledRoPE.rope_init()` skips cache building on meta tensors, then never rebuilds after weights are loaded to real device. + +**Patch 2 - HeartCodec loading fix** in `src/heartlib/pipelines/music_generation.py`: + +Add `ignore_mismatched_sizes=True` to ALL `HeartCodec.from_pretrained()` calls (there are 2: the eager load in `__init__` and the lazy load in the `codec` property). + +**Why**: VQ codebook `initted` buffers have shape `[1]` in checkpoint vs `[]` in model. Same data, just scalar vs 0-d tensor. Safe to ignore. + +### 5. Download Model Checkpoints +```bash +cd heartlib # project root +hf download --local-dir './ckpt' 'HeartMuLa/HeartMuLaGen' +hf download --local-dir './ckpt/HeartMuLa-oss-3B' 'HeartMuLa/HeartMuLa-oss-3B-happy-new-year' +hf download --local-dir './ckpt/HeartCodec-oss' 'HeartMuLa/HeartCodec-oss-20260123' +``` + +All 3 can be downloaded in parallel. Total size is several GB. + +## GPU / CUDA + +HeartMuLa uses CUDA by default (`--mula_device cuda --codec_device cuda`). No extra setup needed if the user has an NVIDIA GPU with PyTorch CUDA support installed. + +- The installed `torch==2.4.1` includes CUDA 12.1 support out of the box +- `torchtune` may report version `0.4.0+cpu` — this is just package metadata, it still uses CUDA via PyTorch +- To verify GPU is being used, look for "CUDA memory" lines in the output (e.g. "CUDA memory before unloading: 6.20 GB") +- **No GPU?** You can run on CPU with `--mula_device cpu --codec_device cpu`, but expect generation to be **extremely slow** (potentially 30-60+ minutes for a single song vs ~4 minutes on GPU). CPU mode also requires significant RAM (~12GB+ free). If the user has no NVIDIA GPU, recommend using a cloud GPU service (Google Colab free tier with T4, Lambda Labs, etc.) or the online demo at https://heartmula.github.io/ instead. + +## Usage + +### Basic Generation +```bash +cd heartlib +. .venv/bin/activate +python ./examples/run_music_generation.py \ + --model_path=./ckpt \ + --version="3B" \ + --lyrics="./assets/lyrics.txt" \ + --tags="./assets/tags.txt" \ + --save_path="./assets/output.mp3" \ + --lazy_load true +``` + +### Input Formatting + +**Tags** (comma-separated, no spaces): +``` +piano,happy,wedding,synthesizer,romantic +``` +or +``` +rock,energetic,guitar,drums,male-vocal +``` + +**Lyrics** (use bracketed structural tags): +``` +[Intro] + +[Verse] +Your lyrics here... + +[Chorus] +Chorus lyrics... + +[Bridge] +Bridge lyrics... + +[Outro] +``` + +### Key Parameters +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--max_audio_length_ms` | 240000 | Max length in ms (240s = 4 min) | +| `--topk` | 50 | Top-k sampling | +| `--temperature` | 1.0 | Sampling temperature | +| `--cfg_scale` | 1.5 | Classifier-free guidance scale | +| `--lazy_load` | false | Load/unload models on demand (saves VRAM) | +| `--mula_dtype` | bfloat16 | Dtype for HeartMuLa (bf16 recommended) | +| `--codec_dtype` | float32 | Dtype for HeartCodec (fp32 recommended for quality) | + +### Performance +- RTF (Real-Time Factor) ≈ 1.0 — a 4-minute song takes ~4 minutes to generate +- Output: MP3, 48kHz stereo, 128kbps + +## Pitfalls +1. **Do NOT use bf16 for HeartCodec** — degrades audio quality. Use fp32 (default). +2. **Tags may be ignored** — known issue (#90). Lyrics tend to dominate; experiment with tag ordering. +3. **Triton not available on macOS** — Linux/CUDA only for GPU acceleration. +4. **RTX 5080 incompatibility** reported in upstream issues. +5. The dependency pin conflicts require the manual upgrades and patches described above. + +## Links +- Repo: https://github.com/HeartMuLa/heartlib +- Models: https://huggingface.co/HeartMuLa +- Paper: https://arxiv.org/abs/2601.10547 +- License: Apache-2.0 diff --git a/website/docs/user-guide/skills/bundled/media/media-songsee.md b/website/docs/user-guide/skills/bundled/media/media-songsee.md new file mode 100644 index 0000000000..231b87ea3b --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-songsee.md @@ -0,0 +1,97 @@ +--- +title: "Songsee — Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +sidebar_label: "Songsee" +description: "Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Songsee + +Generate spectrograms and audio feature visualizations (mel, chroma, MFCC, tempogram, etc.) from audio files via CLI. Useful for audio analysis, music production debugging, and visual documentation. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/songsee` | +| Version | `1.0.0` | +| Author | community | +| License | MIT | +| Tags | `Audio`, `Visualization`, `Spectrogram`, `Music`, `Analysis` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# songsee + +Generate spectrograms and multi-panel audio feature visualizations from audio files. + +## Prerequisites + +Requires [Go](https://go.dev/doc/install): +```bash +go install github.com/steipete/songsee/cmd/songsee@latest +``` + +Optional: `ffmpeg` for formats beyond WAV/MP3. + +## Quick Start + +```bash +# Basic spectrogram +songsee track.mp3 + +# Save to specific file +songsee track.mp3 -o spectrogram.png + +# Multi-panel visualization grid +songsee track.mp3 --viz spectrogram,mel,chroma,hpss,selfsim,loudness,tempogram,mfcc,flux + +# Time slice (start at 12.5s, 8s duration) +songsee track.mp3 --start 12.5 --duration 8 -o slice.jpg + +# From stdin +cat track.mp3 | songsee - --format png -o out.png +``` + +## Visualization Types + +Use `--viz` with comma-separated values: + +| Type | Description | +|------|-------------| +| `spectrogram` | Standard frequency spectrogram | +| `mel` | Mel-scaled spectrogram | +| `chroma` | Pitch class distribution | +| `hpss` | Harmonic/percussive separation | +| `selfsim` | Self-similarity matrix | +| `loudness` | Loudness over time | +| `tempogram` | Tempo estimation | +| `mfcc` | Mel-frequency cepstral coefficients | +| `flux` | Spectral flux (onset detection) | + +Multiple `--viz` types render as a grid in a single image. + +## Common Flags + +| Flag | Description | +|------|-------------| +| `--viz` | Visualization types (comma-separated) | +| `--style` | Color palette: `classic`, `magma`, `inferno`, `viridis`, `gray` | +| `--width` / `--height` | Output image dimensions | +| `--window` / `--hop` | FFT window and hop size | +| `--min-freq` / `--max-freq` | Frequency range filter | +| `--start` / `--duration` | Time slice of the audio | +| `--format` | Output format: `jpg` or `png` | +| `-o` | Output file path | + +## Notes + +- WAV and MP3 are decoded natively; other formats require `ffmpeg` +- Output images can be inspected with `vision_analyze` for automated audio analysis +- Useful for comparing audio outputs, debugging synthesis, or documenting audio processing pipelines diff --git a/website/docs/user-guide/skills/bundled/media/media-youtube-content.md b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md new file mode 100644 index 0000000000..e94c755c98 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/media/media-youtube-content.md @@ -0,0 +1,88 @@ +--- +title: "Youtube Content" +sidebar_label: "Youtube Content" +description: "Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Youtube Content + +Fetch YouTube video transcripts and transform them into structured content (chapters, summaries, threads, blog posts). Use when the user shares a YouTube URL or video link, asks to summarize a video, requests a transcript, or wants to extract and reformat content from any YouTube video. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/media/youtube-content` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# YouTube Content Tool + +Extract transcripts from YouTube videos and convert them into useful formats. + +## Setup + +```bash +pip install youtube-transcript-api +``` + +## Helper Script + +`SKILL_DIR` is the directory containing this SKILL.md file. The script accepts any standard YouTube URL format, short links (youtu.be), shorts, embeds, live links, or a raw 11-character video ID. + +```bash +# JSON output with metadata +python3 SKILL_DIR/scripts/fetch_transcript.py "https://youtube.com/watch?v=VIDEO_ID" + +# Plain text (good for piping into further processing) +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --text-only + +# With timestamps +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --timestamps + +# Specific language with fallback chain +python3 SKILL_DIR/scripts/fetch_transcript.py "URL" --language tr,en +``` + +## Output Formats + +After fetching the transcript, format it based on what the user asks for: + +- **Chapters**: Group by topic shifts, output timestamped chapter list +- **Summary**: Concise 5-10 sentence overview of the entire video +- **Chapter summaries**: Chapters with a short paragraph summary for each +- **Thread**: Twitter/X thread format — numbered posts, each under 280 chars +- **Blog post**: Full article with title, sections, and key takeaways +- **Quotes**: Notable quotes with timestamps + +### Example — Chapters Output + +``` +00:00 Introduction — host opens with the problem statement +03:45 Background — prior work and why existing solutions fall short +12:20 Core method — walkthrough of the proposed approach +24:10 Results — benchmark comparisons and key takeaways +31:55 Q&A — audience questions on scalability and next steps +``` + +## Workflow + +1. **Fetch** the transcript using the helper script with `--text-only --timestamps`. +2. **Validate**: confirm the output is non-empty and in the expected language. If empty, retry without `--language` to get any available transcript. If still empty, tell the user the video likely has transcripts disabled. +3. **Chunk if needed**: if the transcript exceeds ~50K characters, split into overlapping chunks (~40K with 2K overlap) and summarize each chunk before merging. +4. **Transform** into the requested output format. If the user did not specify a format, default to a summary. +5. **Verify**: re-read the transformed output to check for coherence, correct timestamps, and completeness before presenting. + +## Error Handling + +- **Transcript disabled**: tell the user; suggest they check if subtitles are available on the video page. +- **Private/unavailable video**: relay the error and ask the user to verify the URL. +- **No matching language**: retry without `--language` to fetch any available transcript, then note the actual language to the user. +- **Dependency missing**: run `pip install youtube-transcript-api` and retry. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md new file mode 100644 index 0000000000..0112f747a3 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-lm-evaluation-harness.md @@ -0,0 +1,507 @@ +--- +title: "Evaluating Llms Harness — Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +sidebar_label: "Evaluating Llms Harness" +description: "Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag)" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Evaluating Llms Harness + +Evaluates LLMs across 60+ academic benchmarks (MMLU, HumanEval, GSM8K, TruthfulQA, HellaSwag). Use when benchmarking model quality, comparing models, reporting academic results, or tracking training progress. Industry standard used by EleutherAI, HuggingFace, and major labs. Supports HuggingFace, vLLM, APIs. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/lm-evaluation-harness` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `lm-eval`, `transformers`, `vllm` | +| Tags | `Evaluation`, `LM Evaluation Harness`, `Benchmarking`, `MMLU`, `HumanEval`, `GSM8K`, `EleutherAI`, `Model Quality`, `Academic Benchmarks`, `Industry Standard` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# lm-evaluation-harness - LLM Benchmarking + +## Quick start + +lm-evaluation-harness evaluates LLMs across 60+ academic benchmarks using standardized prompts and metrics. + +**Installation**: +```bash +pip install lm-eval +``` + +**Evaluate any HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag \ + --device cuda:0 \ + --batch_size 8 +``` + +**View available tasks**: +```bash +lm_eval --tasks list +``` + +## Common workflows + +### Workflow 1: Standard benchmark evaluation + +Evaluate model on core benchmarks (MMLU, GSM8K, HumanEval). + +Copy this checklist: + +``` +Benchmark Evaluation: +- [ ] Step 1: Choose benchmark suite +- [ ] Step 2: Configure model +- [ ] Step 3: Run evaluation +- [ ] Step 4: Analyze results +``` + +**Step 1: Choose benchmark suite** + +**Core reasoning benchmarks**: +- **MMLU** (Massive Multitask Language Understanding) - 57 subjects, multiple choice +- **GSM8K** - Grade school math word problems +- **HellaSwag** - Common sense reasoning +- **TruthfulQA** - Truthfulness and factuality +- **ARC** (AI2 Reasoning Challenge) - Science questions + +**Code benchmarks**: +- **HumanEval** - Python code generation (164 problems) +- **MBPP** (Mostly Basic Python Problems) - Python coding + +**Standard suite** (recommended for model releases): +```bash +--tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge +``` + +**Step 2: Configure model** + +**HuggingFace model**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,dtype=bfloat16 \ + --tasks mmlu \ + --device cuda:0 \ + --batch_size auto # Auto-detect optimal batch size +``` + +**Quantized model (4-bit/8-bit)**: +```bash +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,load_in_4bit=True \ + --tasks mmlu \ + --device cuda:0 +``` + +**Custom checkpoint**: +```bash +lm_eval --model hf \ + --model_args pretrained=/path/to/my-model,tokenizer=/path/to/tokenizer \ + --tasks mmlu \ + --device cuda:0 +``` + +**Step 3: Run evaluation** + +```bash +# Full MMLU evaluation (57 subjects) +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --num_fewshot 5 \ # 5-shot evaluation (standard) + --batch_size 8 \ + --output_path results/ \ + --log_samples # Save individual predictions + +# Multiple benchmarks at once +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu,gsm8k,hellaswag,truthfulqa,arc_challenge \ + --num_fewshot 5 \ + --batch_size 8 \ + --output_path results/llama2-7b-eval.json +``` + +**Step 4: Analyze results** + +Results saved to `results/llama2-7b-eval.json`: + +```json +{ + "results": { + "mmlu": { + "acc": 0.459, + "acc_stderr": 0.004 + }, + "gsm8k": { + "exact_match": 0.142, + "exact_match_stderr": 0.006 + }, + "hellaswag": { + "acc_norm": 0.765, + "acc_norm_stderr": 0.004 + } + }, + "config": { + "model": "hf", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf", + "num_fewshot": 5 + } +} +``` + +### Workflow 2: Track training progress + +Evaluate checkpoints during training. + +``` +Training Progress Tracking: +- [ ] Step 1: Set up periodic evaluation +- [ ] Step 2: Choose quick benchmarks +- [ ] Step 3: Automate evaluation +- [ ] Step 4: Plot learning curves +``` + +**Step 1: Set up periodic evaluation** + +Evaluate every N training steps: + +```bash +#!/bin/bash +# eval_checkpoint.sh + +CHECKPOINT_DIR=$1 +STEP=$2 + +lm_eval --model hf \ + --model_args pretrained=$CHECKPOINT_DIR/checkpoint-$STEP \ + --tasks gsm8k,hellaswag \ + --num_fewshot 0 \ # 0-shot for speed + --batch_size 16 \ + --output_path results/step-$STEP.json +``` + +**Step 2: Choose quick benchmarks** + +Fast benchmarks for frequent evaluation: +- **HellaSwag**: ~10 minutes on 1 GPU +- **GSM8K**: ~5 minutes +- **PIQA**: ~2 minutes + +Avoid for frequent eval (too slow): +- **MMLU**: ~2 hours (57 subjects) +- **HumanEval**: Requires code execution + +**Step 3: Automate evaluation** + +Integrate with training script: + +```python +# In training loop +if step % eval_interval == 0: + model.save_pretrained(f"checkpoints/step-{step}") + + # Run evaluation + os.system(f"./eval_checkpoint.sh checkpoints step-{step}") +``` + +Or use PyTorch Lightning callbacks: + +```python +from pytorch_lightning import Callback + +class EvalHarnessCallback(Callback): + def on_validation_epoch_end(self, trainer, pl_module): + step = trainer.global_step + checkpoint_path = f"checkpoints/step-{step}" + + # Save checkpoint + trainer.save_checkpoint(checkpoint_path) + + # Run lm-eval + os.system(f"lm_eval --model hf --model_args pretrained={checkpoint_path} ...") +``` + +**Step 4: Plot learning curves** + +```python +import json +import matplotlib.pyplot as plt + +# Load all results +steps = [] +mmlu_scores = [] + +for file in sorted(glob.glob("results/step-*.json")): + with open(file) as f: + data = json.load(f) + step = int(file.split("-")[1].split(".")[0]) + steps.append(step) + mmlu_scores.append(data["results"]["mmlu"]["acc"]) + +# Plot +plt.plot(steps, mmlu_scores) +plt.xlabel("Training Step") +plt.ylabel("MMLU Accuracy") +plt.title("Training Progress") +plt.savefig("training_curve.png") +``` + +### Workflow 3: Compare multiple models + +Benchmark suite for model comparison. + +``` +Model Comparison: +- [ ] Step 1: Define model list +- [ ] Step 2: Run evaluations +- [ ] Step 3: Generate comparison table +``` + +**Step 1: Define model list** + +```bash +# models.txt +meta-llama/Llama-2-7b-hf +meta-llama/Llama-2-13b-hf +mistralai/Mistral-7B-v0.1 +microsoft/phi-2 +``` + +**Step 2: Run evaluations** + +```bash +#!/bin/bash +# eval_all_models.sh + +TASKS="mmlu,gsm8k,hellaswag,truthfulqa" + +while read model; do + echo "Evaluating $model" + + # Extract model name for output file + model_name=$(echo $model | sed 's/\//-/g') + + lm_eval --model hf \ + --model_args pretrained=$model,dtype=bfloat16 \ + --tasks $TASKS \ + --num_fewshot 5 \ + --batch_size auto \ + --output_path results/$model_name.json + +done < models.txt +``` + +**Step 3: Generate comparison table** + +```python +import json +import pandas as pd + +models = [ + "meta-llama-Llama-2-7b-hf", + "meta-llama-Llama-2-13b-hf", + "mistralai-Mistral-7B-v0.1", + "microsoft-phi-2" +] + +tasks = ["mmlu", "gsm8k", "hellaswag", "truthfulqa"] + +results = [] +for model in models: + with open(f"results/{model}.json") as f: + data = json.load(f) + row = {"Model": model.replace("-", "/")} + for task in tasks: + # Get primary metric for each task + metrics = data["results"][task] + if "acc" in metrics: + row[task.upper()] = f"{metrics['acc']:.3f}" + elif "exact_match" in metrics: + row[task.upper()] = f"{metrics['exact_match']:.3f}" + results.append(row) + +df = pd.DataFrame(results) +print(df.to_markdown(index=False)) +``` + +Output: +``` +| Model | MMLU | GSM8K | HELLASWAG | TRUTHFULQA | +|------------------------|-------|-------|-----------|------------| +| meta-llama/Llama-2-7b | 0.459 | 0.142 | 0.765 | 0.391 | +| meta-llama/Llama-2-13b | 0.549 | 0.287 | 0.801 | 0.430 | +| mistralai/Mistral-7B | 0.626 | 0.395 | 0.812 | 0.428 | +| microsoft/phi-2 | 0.560 | 0.613 | 0.682 | 0.447 | +``` + +### Workflow 4: Evaluate with vLLM (faster inference) + +Use vLLM backend for 5-10x faster evaluation. + +``` +vLLM Evaluation: +- [ ] Step 1: Install vLLM +- [ ] Step 2: Configure vLLM backend +- [ ] Step 3: Run evaluation +``` + +**Step 1: Install vLLM** + +```bash +pip install vllm +``` + +**Step 2: Configure vLLM backend** + +```bash +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.8 \ + --tasks mmlu \ + --batch_size auto +``` + +**Step 3: Run evaluation** + +vLLM is 5-10× faster than standard HuggingFace: + +```bash +# Standard HF: ~2 hours for MMLU on 7B model +lm_eval --model hf \ + --model_args pretrained=meta-llama/Llama-2-7b-hf \ + --tasks mmlu \ + --batch_size 8 + +# vLLM: ~15-20 minutes for MMLU on 7B model +lm_eval --model vllm \ + --model_args pretrained=meta-llama/Llama-2-7b-hf,tensor_parallel_size=2 \ + --tasks mmlu \ + --batch_size auto +``` + +## When to use vs alternatives + +**Use lm-evaluation-harness when:** +- Benchmarking models for academic papers +- Comparing model quality across standard tasks +- Tracking training progress +- Reporting standardized metrics (everyone uses same prompts) +- Need reproducible evaluation + +**Use alternatives instead:** +- **HELM** (Stanford): Broader evaluation (fairness, efficiency, calibration) +- **AlpacaEval**: Instruction-following evaluation with LLM judges +- **MT-Bench**: Conversational multi-turn evaluation +- **Custom scripts**: Domain-specific evaluation + +## Common issues + +**Issue: Evaluation too slow** + +Use vLLM backend: +```bash +lm_eval --model vllm \ + --model_args pretrained=model-name,tensor_parallel_size=2 +``` + +Or reduce fewshot examples: +```bash +--num_fewshot 0 # Instead of 5 +``` + +Or evaluate subset of MMLU: +```bash +--tasks mmlu_stem # Only STEM subjects +``` + +**Issue: Out of memory** + +Reduce batch size: +```bash +--batch_size 1 # Or --batch_size auto +``` + +Use quantization: +```bash +--model_args pretrained=model-name,load_in_8bit=True +``` + +Enable CPU offloading: +```bash +--model_args pretrained=model-name,device_map=auto,offload_folder=offload +``` + +**Issue: Different results than reported** + +Check fewshot count: +```bash +--num_fewshot 5 # Most papers use 5-shot +``` + +Check exact task name: +```bash +--tasks mmlu # Not mmlu_direct or mmlu_fewshot +``` + +Verify model and tokenizer match: +```bash +--model_args pretrained=model-name,tokenizer=same-model-name +``` + +**Issue: HumanEval not executing code** + +Install execution dependencies: +```bash +pip install human-eval +``` + +Enable code execution: +```bash +lm_eval --model hf \ + --model_args pretrained=model-name \ + --tasks humaneval \ + --allow_code_execution # Required for HumanEval +``` + +## Advanced topics + +**Benchmark descriptions**: See [references/benchmark-guide.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md) for detailed description of all 60+ tasks, what they measure, and interpretation. + +**Custom tasks**: See [references/custom-tasks.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md) for creating domain-specific evaluation tasks. + +**API evaluation**: See [references/api-evaluation.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md) for evaluating OpenAI, Anthropic, and other API models. + +**Multi-GPU strategies**: See [references/distributed-eval.md](https://github.com/NousResearch/hermes-agent/blob/main/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md) for data parallel and tensor parallel evaluation. + +## Hardware requirements + +- **GPU**: NVIDIA (CUDA 11.8+), works on CPU (very slow) +- **VRAM**: + - 7B model: 16GB (bf16) or 8GB (8-bit) + - 13B model: 28GB (bf16) or 14GB (8-bit) + - 70B model: Requires multi-GPU or quantization +- **Time** (7B model, single A100): + - HellaSwag: 10 minutes + - GSM8K: 5 minutes + - MMLU (full): 2 hours + - HumanEval: 20 minutes + +## Resources + +- GitHub: https://github.com/EleutherAI/lm-evaluation-harness +- Docs: https://github.com/EleutherAI/lm-evaluation-harness/tree/main/docs +- Task library: 60+ tasks including MMLU, GSM8K, HumanEval, TruthfulQA, HellaSwag, ARC, WinoGrande, etc. +- Leaderboard: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard (uses this harness) diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md new file mode 100644 index 0000000000..db8c4d4d71 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-evaluation-weights-and-biases.md @@ -0,0 +1,608 @@ +--- +title: "Weights And Biases" +sidebar_label: "Weights And Biases" +description: "Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - coll..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Weights And Biases + +Track ML experiments with automatic logging, visualize training in real-time, optimize hyperparameters with sweeps, and manage model registry with W&B - collaborative MLOps platform + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/evaluation/weights-and-biases` | +| Version | `1.0.0` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `wandb` | +| Tags | `MLOps`, `Weights And Biases`, `WandB`, `Experiment Tracking`, `Hyperparameter Tuning`, `Model Registry`, `Collaboration`, `Real-Time Visualization`, `PyTorch`, `TensorFlow`, `HuggingFace` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Weights & Biases: ML Experiment Tracking & MLOps + +## When to Use This Skill + +Use Weights & Biases (W&B) when you need to: +- **Track ML experiments** with automatic metric logging +- **Visualize training** in real-time dashboards +- **Compare runs** across hyperparameters and configurations +- **Optimize hyperparameters** with automated sweeps +- **Manage model registry** with versioning and lineage +- **Collaborate on ML projects** with team workspaces +- **Track artifacts** (datasets, models, code) with lineage + +**Users**: 200,000+ ML practitioners | **GitHub Stars**: 10.5k+ | **Integrations**: 100+ + +## Installation + +```bash +# Install W&B +pip install wandb + +# Login (creates API key) +wandb login + +# Or set API key programmatically +export WANDB_API_KEY=your_api_key_here +``` + +## Quick Start + +### Basic Experiment Tracking + +```python +import wandb + +# Initialize a run +run = wandb.init( + project="my-project", + config={ + "learning_rate": 0.001, + "epochs": 10, + "batch_size": 32, + "architecture": "ResNet50" + } +) + +# Training loop +for epoch in range(run.config.epochs): + # Your training code + train_loss = train_epoch() + val_loss = validate() + + # Log metrics + wandb.log({ + "epoch": epoch, + "train/loss": train_loss, + "val/loss": val_loss, + "train/accuracy": train_acc, + "val/accuracy": val_acc + }) + +# Finish the run +wandb.finish() +``` + +### With PyTorch + +```python +import torch +import wandb + +# Initialize +wandb.init(project="pytorch-demo", config={ + "lr": 0.001, + "epochs": 10 +}) + +# Access config +config = wandb.config + +# Training loop +for epoch in range(config.epochs): + for batch_idx, (data, target) in enumerate(train_loader): + # Forward pass + output = model(data) + loss = criterion(output, target) + + # Backward pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # Log every 100 batches + if batch_idx % 100 == 0: + wandb.log({ + "loss": loss.item(), + "epoch": epoch, + "batch": batch_idx + }) + +# Save model +torch.save(model.state_dict(), "model.pth") +wandb.save("model.pth") # Upload to W&B + +wandb.finish() +``` + +## Core Concepts + +### 1. Projects and Runs + +**Project**: Collection of related experiments +**Run**: Single execution of your training script + +```python +# Create/use project +run = wandb.init( + project="image-classification", + name="resnet50-experiment-1", # Optional run name + tags=["baseline", "resnet"], # Organize with tags + notes="First baseline run" # Add notes +) + +# Each run has unique ID +print(f"Run ID: {run.id}") +print(f"Run URL: {run.url}") +``` + +### 2. Configuration Tracking + +Track hyperparameters automatically: + +```python +config = { + # Model architecture + "model": "ResNet50", + "pretrained": True, + + # Training params + "learning_rate": 0.001, + "batch_size": 32, + "epochs": 50, + "optimizer": "Adam", + + # Data params + "dataset": "ImageNet", + "augmentation": "standard" +} + +wandb.init(project="my-project", config=config) + +# Access config during training +lr = wandb.config.learning_rate +batch_size = wandb.config.batch_size +``` + +### 3. Metric Logging + +```python +# Log scalars +wandb.log({"loss": 0.5, "accuracy": 0.92}) + +# Log multiple metrics +wandb.log({ + "train/loss": train_loss, + "train/accuracy": train_acc, + "val/loss": val_loss, + "val/accuracy": val_acc, + "learning_rate": current_lr, + "epoch": epoch +}) + +# Log with custom x-axis +wandb.log({"loss": loss}, step=global_step) + +# Log media (images, audio, video) +wandb.log({"examples": [wandb.Image(img) for img in images]}) + +# Log histograms +wandb.log({"gradients": wandb.Histogram(gradients)}) + +# Log tables +table = wandb.Table(columns=["id", "prediction", "ground_truth"]) +wandb.log({"predictions": table}) +``` + +### 4. Model Checkpointing + +```python +import torch +import wandb + +# Save model checkpoint +checkpoint = { + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': loss, +} + +torch.save(checkpoint, 'checkpoint.pth') + +# Upload to W&B +wandb.save('checkpoint.pth') + +# Or use Artifacts (recommended) +artifact = wandb.Artifact('model', type='model') +artifact.add_file('checkpoint.pth') +wandb.log_artifact(artifact) +``` + +## Hyperparameter Sweeps + +Automatically search for optimal hyperparameters. + +### Define Sweep Configuration + +```python +sweep_config = { + 'method': 'bayes', # or 'grid', 'random' + 'metric': { + 'name': 'val/accuracy', + 'goal': 'maximize' + }, + 'parameters': { + 'learning_rate': { + 'distribution': 'log_uniform', + 'min': 1e-5, + 'max': 1e-1 + }, + 'batch_size': { + 'values': [16, 32, 64, 128] + }, + 'optimizer': { + 'values': ['adam', 'sgd', 'rmsprop'] + }, + 'dropout': { + 'distribution': 'uniform', + 'min': 0.1, + 'max': 0.5 + } + } +} + +# Initialize sweep +sweep_id = wandb.sweep(sweep_config, project="my-project") +``` + +### Define Training Function + +```python +def train(): + # Initialize run + run = wandb.init() + + # Access sweep parameters + lr = wandb.config.learning_rate + batch_size = wandb.config.batch_size + optimizer_name = wandb.config.optimizer + + # Build model with sweep config + model = build_model(wandb.config) + optimizer = get_optimizer(optimizer_name, lr) + + # Training loop + for epoch in range(NUM_EPOCHS): + train_loss = train_epoch(model, optimizer, batch_size) + val_acc = validate(model) + + # Log metrics + wandb.log({ + "train/loss": train_loss, + "val/accuracy": val_acc + }) + +# Run sweep +wandb.agent(sweep_id, function=train, count=50) # Run 50 trials +``` + +### Sweep Strategies + +```python +# Grid search - exhaustive +sweep_config = { + 'method': 'grid', + 'parameters': { + 'lr': {'values': [0.001, 0.01, 0.1]}, + 'batch_size': {'values': [16, 32, 64]} + } +} + +# Random search +sweep_config = { + 'method': 'random', + 'parameters': { + 'lr': {'distribution': 'uniform', 'min': 0.0001, 'max': 0.1}, + 'dropout': {'distribution': 'uniform', 'min': 0.1, 'max': 0.5} + } +} + +# Bayesian optimization (recommended) +sweep_config = { + 'method': 'bayes', + 'metric': {'name': 'val/loss', 'goal': 'minimize'}, + 'parameters': { + 'lr': {'distribution': 'log_uniform', 'min': 1e-5, 'max': 1e-1} + } +} +``` + +## Artifacts + +Track datasets, models, and other files with lineage. + +### Log Artifacts + +```python +# Create artifact +artifact = wandb.Artifact( + name='training-dataset', + type='dataset', + description='ImageNet training split', + metadata={'size': '1.2M images', 'split': 'train'} +) + +# Add files +artifact.add_file('data/train.csv') +artifact.add_dir('data/images/') + +# Log artifact +wandb.log_artifact(artifact) +``` + +### Use Artifacts + +```python +# Download and use artifact +run = wandb.init(project="my-project") + +# Download artifact +artifact = run.use_artifact('training-dataset:latest') +artifact_dir = artifact.download() + +# Use the data +data = load_data(f"{artifact_dir}/train.csv") +``` + +### Model Registry + +```python +# Log model as artifact +model_artifact = wandb.Artifact( + name='resnet50-model', + type='model', + metadata={'architecture': 'ResNet50', 'accuracy': 0.95} +) + +model_artifact.add_file('model.pth') +wandb.log_artifact(model_artifact, aliases=['best', 'production']) + +# Link to model registry +run.link_artifact(model_artifact, 'model-registry/production-models') +``` + +## Integration Examples + +### HuggingFace Transformers + +```python +from transformers import Trainer, TrainingArguments +import wandb + +# Initialize W&B +wandb.init(project="hf-transformers") + +# Training arguments with W&B +training_args = TrainingArguments( + output_dir="./results", + report_to="wandb", # Enable W&B logging + run_name="bert-finetuning", + logging_steps=100, + save_steps=500 +) + +# Trainer automatically logs to W&B +trainer = Trainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset +) + +trainer.train() +``` + +### PyTorch Lightning + +```python +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import WandbLogger +import wandb + +# Create W&B logger +wandb_logger = WandbLogger( + project="lightning-demo", + log_model=True # Log model checkpoints +) + +# Use with Trainer +trainer = Trainer( + logger=wandb_logger, + max_epochs=10 +) + +trainer.fit(model, datamodule=dm) +``` + +### Keras/TensorFlow + +```python +import wandb +from wandb.keras import WandbCallback + +# Initialize +wandb.init(project="keras-demo") + +# Add callback +model.fit( + x_train, y_train, + validation_data=(x_val, y_val), + epochs=10, + callbacks=[WandbCallback()] # Auto-logs metrics +) +``` + +## Visualization & Analysis + +### Custom Charts + +```python +# Log custom visualizations +import matplotlib.pyplot as plt + +fig, ax = plt.subplots() +ax.plot(x, y) +wandb.log({"custom_plot": wandb.Image(fig)}) + +# Log confusion matrix +wandb.log({"conf_mat": wandb.plot.confusion_matrix( + probs=None, + y_true=ground_truth, + preds=predictions, + class_names=class_names +)}) +``` + +### Reports + +Create shareable reports in W&B UI: +- Combine runs, charts, and text +- Markdown support +- Embeddable visualizations +- Team collaboration + +## Best Practices + +### 1. Organize with Tags and Groups + +```python +wandb.init( + project="my-project", + tags=["baseline", "resnet50", "imagenet"], + group="resnet-experiments", # Group related runs + job_type="train" # Type of job +) +``` + +### 2. Log Everything Relevant + +```python +# Log system metrics +wandb.log({ + "gpu/util": gpu_utilization, + "gpu/memory": gpu_memory_used, + "cpu/util": cpu_utilization +}) + +# Log code version +wandb.log({"git_commit": git_commit_hash}) + +# Log data splits +wandb.log({ + "data/train_size": len(train_dataset), + "data/val_size": len(val_dataset) +}) +``` + +### 3. Use Descriptive Names + +```python +# ✅ Good: Descriptive run names +wandb.init( + project="nlp-classification", + name="bert-base-lr0.001-bs32-epoch10" +) + +# ❌ Bad: Generic names +wandb.init(project="nlp", name="run1") +``` + +### 4. Save Important Artifacts + +```python +# Save final model +artifact = wandb.Artifact('final-model', type='model') +artifact.add_file('model.pth') +wandb.log_artifact(artifact) + +# Save predictions for analysis +predictions_table = wandb.Table( + columns=["id", "input", "prediction", "ground_truth"], + data=predictions_data +) +wandb.log({"predictions": predictions_table}) +``` + +### 5. Use Offline Mode for Unstable Connections + +```python +import os + +# Enable offline mode +os.environ["WANDB_MODE"] = "offline" + +wandb.init(project="my-project") +# ... your code ... + +# Sync later +# wandb sync +``` + +## Team Collaboration + +### Share Runs + +```python +# Runs are automatically shareable via URL +run = wandb.init(project="team-project") +print(f"Share this URL: {run.url}") +``` + +### Team Projects + +- Create team account at wandb.ai +- Add team members +- Set project visibility (private/public) +- Use team-level artifacts and model registry + +## Pricing + +- **Free**: Unlimited public projects, 100GB storage +- **Academic**: Free for students/researchers +- **Teams**: $50/seat/month, private projects, unlimited storage +- **Enterprise**: Custom pricing, on-prem options + +## Resources + +- **Documentation**: https://docs.wandb.ai +- **GitHub**: https://github.com/wandb/wandb (10.5k+ stars) +- **Examples**: https://github.com/wandb/examples +- **Community**: https://wandb.ai/community +- **Discord**: https://wandb.me/discord + +## See Also + +- `references/sweeps.md` - Comprehensive hyperparameter optimization guide +- `references/artifacts.md` - Data and model versioning patterns +- `references/integrations.md` - Framework-specific examples diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md new file mode 100644 index 0000000000..27ab41b5e2 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-huggingface-hub.md @@ -0,0 +1,99 @@ +--- +title: "Huggingface Hub" +sidebar_label: "Huggingface Hub" +description: "Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Space..." +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Huggingface Hub + +Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/huggingface-hub` | +| Version | `1.0.0` | +| Author | Hugging Face | +| License | MIT | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# Hugging Face CLI (`hf`) Reference Guide + +The `hf` command is the modern command-line interface for interacting with the Hugging Face Hub, providing tools to manage repositories, models, datasets, and Spaces. + +> **IMPORTANT:** The `hf` command replaces the now deprecated `huggingface-cli` command. + +## Quick Start +* **Installation:** `curl -LsSf https://hf.co/cli/install.sh | bash -s` +* **Help:** Use `hf --help` to view all available functions and real-world examples. +* **Authentication:** Recommended via `HF_TOKEN` environment variable or the `--token` flag. + +--- + +## Core Commands + +### General Operations +* `hf download REPO_ID`: Download files from the Hub. +* `hf upload REPO_ID`: Upload files/folders (recommended for single-commit). +* `hf upload-large-folder REPO_ID LOCAL_PATH`: Recommended for resumable uploads of large directories. +* `hf sync`: Sync files between a local directory and a bucket. +* `hf env` / `hf version`: View environment and version details. + +### Authentication (`hf auth`) +* `login` / `logout`: Manage sessions using tokens from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). +* `list` / `switch`: Manage and toggle between multiple stored access tokens. +* `whoami`: Identify the currently logged-in account. + +### Repository Management (`hf repos`) +* `create` / `delete`: Create or permanently remove repositories. +* `duplicate`: Clone a model, dataset, or Space to a new ID. +* `move`: Transfer a repository between namespaces. +* `branch` / `tag`: Manage Git-like references. +* `delete-files`: Remove specific files using patterns. + +--- + +## Specialized Hub Interactions + +### Datasets & Models +* **Datasets:** `hf datasets list`, `info`, and `parquet` (list parquet URLs). +* **SQL Queries:** `hf datasets sql SQL` — Execute raw SQL via DuckDB against dataset parquet URLs. +* **Models:** `hf models list` and `info`. +* **Papers:** `hf papers list` — View daily papers. + +### Discussions & Pull Requests (`hf discussions`) +* Manage the lifecycle of Hub contributions: `list`, `create`, `info`, `comment`, `close`, `reopen`, and `rename`. +* `diff`: View changes in a PR. +* `merge`: Finalize pull requests. + +### Infrastructure & Compute +* **Endpoints:** Deploy and manage Inference Endpoints (`deploy`, `pause`, `resume`, `scale-to-zero`, `catalog`). +* **Jobs:** Run compute tasks on HF infrastructure. Includes `hf jobs uv` for running Python scripts with inline dependencies and `stats` for resource monitoring. +* **Spaces:** Manage interactive apps. Includes `dev-mode` and `hot-reload` for Python files without full restarts. + +### Storage & Automation +* **Buckets:** Full S3-like bucket management (`create`, `cp`, `mv`, `rm`, `sync`). +* **Cache:** Manage local storage with `list`, `prune` (remove detached revisions), and `verify` (checksum checks). +* **Webhooks:** Automate workflows by managing Hub webhooks (`create`, `watch`, `enable`/`disable`). +* **Collections:** Organize Hub items into collections (`add-item`, `update`, `list`). + +--- + +## Advanced Usage & Tips + +### Global Flags +* `--format json`: Produces machine-readable output for automation. +* `-q` / `--quiet`: Limits output to IDs only. + +### Extensions & Skills +* **Extensions:** Extend CLI functionality via GitHub repositories using `hf extensions install REPO_ID`. +* **Skills:** Manage AI assistant skills with `hf skills add`. diff --git a/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md new file mode 100644 index 0000000000..19f08067f8 --- /dev/null +++ b/website/docs/user-guide/skills/bundled/mlops/mlops-inference-llama-cpp.md @@ -0,0 +1,266 @@ +--- +title: "Llama Cpp — llama" +sidebar_label: "Llama Cpp" +description: "llama" +--- + +{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} + +# Llama Cpp + +llama.cpp local GGUF inference + HF Hub model discovery. + +## Skill metadata + +| | | +|---|---| +| Source | Bundled (installed by default) | +| Path | `skills/mlops/inference/llama-cpp` | +| Version | `2.1.2` | +| Author | Orchestra Research | +| License | MIT | +| Dependencies | `llama-cpp-python>=0.2.0` | +| Tags | `llama.cpp`, `GGUF`, `Quantization`, `Hugging Face Hub`, `CPU Inference`, `Apple Silicon`, `Edge Deployment`, `AMD GPUs`, `Intel GPUs`, `NVIDIA`, `URL-first` | + +## Reference: full SKILL.md + +:::info +The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. +::: + +# llama.cpp + GGUF + +Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp. + +## When to use + +- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs +- Find the right GGUF for a specific Hugging Face repo +- Build a `llama-server` or `llama-cli` command from the Hub +- Search the Hub for models that already support llama.cpp +- Enumerate available `.gguf` files and sizes for a repo +- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM + +## Model Discovery workflow + +Prefer URL workflows before asking for `hf`, Python, or custom scripts. + +1. Search for candidate repos on the Hub: + - Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending` + - Add `search=` for a model family + - Add `num_parameters=min:0,max:24B` or similar when the user has size constraints +2. Open the repo with the llama.cpp local-app view: + - `https://huggingface.co/?local-app=llama.cpp` +3. Treat the local-app snippet as the source of truth when it is visible: + - copy the exact `llama-server` or `llama-cli` command + - report the recommended quant exactly as HF shows it +4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`: + - prefer its exact quant labels and sizes over generic tables + - keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL` + - if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance +5. Query the tree API to confirm what actually exists: + - `https://huggingface.co/api/models//tree/main?recursive=true` + - keep entries where `type` is `file` and `path` ends with `.gguf` + - use `path` and `size` as the source of truth for filenames and byte sizes + - separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files + - use `https://huggingface.co//tree/main` only as a human fallback +6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant: + - shorthand quant selection: `llama-server -hf :` + - exact-file fallback: `llama-server --hf-repo --hf-file ` +7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files. + +## Quick start + +### Install llama.cpp + +```bash +# macOS / Linux (simplest) +brew install llama.cpp +``` + +```bash +winget install llama.cpp +``` + +```bash +git clone https://github.com/ggml-org/llama.cpp +cd llama.cpp +cmake -B build +cmake --build build --config Release +``` + +### Run directly from the Hugging Face Hub + +```bash +llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +```bash +llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +``` + +### Run an exact GGUF file from the Hub + +Use this when the tree API shows custom file naming or the exact HF snippet is missing. + +```bash +llama-server \ + --hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \ + --hf-file Phi-3-mini-4k-instruct-q4.gguf \ + -c 4096 +``` + +### OpenAI-compatible server check + +```bash +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "Write a limerick about Python exceptions"} + ] + }' +``` + +## Python bindings (llama-cpp-python) + +`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`). + +### Basic generation + +```python +from llama_cpp import Llama + +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, # 0 for CPU, 99 to offload everything + n_threads=8, +) + +out = llm("What is machine learning?", max_tokens=256, temperature=0.7) +print(out["choices"][0]["text"]) +``` + +### Chat + streaming + +```python +llm = Llama( + model_path="./model-q4_k_m.gguf", + n_ctx=4096, + n_gpu_layers=35, + chat_format="llama-3", # or "chatml", "mistral", etc. +) + +resp = llm.create_chat_completion( + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Python?"}, + ], + max_tokens=256, +) +print(resp["choices"][0]["message"]["content"]) + +# Streaming +for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True): + print(chunk["choices"][0]["text"], end="", flush=True) +``` + +### Embeddings + +```python +llm = Llama(model_path="./model-q4_k_m.gguf", embedding=True, n_gpu_layers=35) +vec = llm.embed("This is a test sentence.") +print(f"Embedding dimension: {len(vec)}") +``` + +You can also load a GGUF straight from the Hub: + +```python +llm = Llama.from_pretrained( + repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", + filename="*Q4_K_M.gguf", + n_gpu_layers=35, +) +``` + +## Choosing a quant + +Use the Hub page first, generic heuristics second. + +- Prefer the exact quant that HF marks as compatible for the user's hardware profile. +- For general chat, start with `Q4_K_M`. +- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows. +- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality. +- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file. +- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`. + +## Extracting available GGUFs from a repo + +When the user asks what GGUFs exist, return: + +- filename +- file size +- quant label +- whether it is a main model or an auxiliary projector + +Ignore unless requested: + +- README +- BF16 shard files +- imatrix blobs or calibration artifacts + +Use the tree API for this step: + +- `https://huggingface.co/api/models//tree/main?recursive=true` + +For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename. + +## Search patterns + +Use these URL shapes directly: + +```text +https://huggingface.co/models?apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&sort=trending +https://huggingface.co/models?search=&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending +https://huggingface.co/?local-app=llama.cpp +https://huggingface.co/api/models//tree/main?recursive=true +https://huggingface.co//tree/main +``` + +## Output format + +When answering discovery requests, prefer a compact structured result like: + +```text +Repo: +Recommended quant from HF: