From 857b543543ab5faeef5ba851c3878fe289493ad4 Mon Sep 17 00:00:00 2001 From: Arihant Sethia Date: Wed, 15 Apr 2026 06:12:35 +0000 Subject: [PATCH 001/455] feat: add skill analytics to the dashboard Expose skill usage in analytics so the dashboard and insights output can show which skills the agent loads and manages over time. This adds skill aggregation to the InsightsEngine by extracting `skill_view` and `skill_manage` calls from assistant tool_calls, computing per-skill totals, and including the results in both terminal and gateway insights formatting. It also extends the dashboard analytics API and Analytics page to render a Top Skills table. Terminology is aligned with the skills docs: - Agent Loaded = `skill_view` events - Agent Managed = `skill_manage` actions Architecture: - agent/insights.py collects and aggregates per-skill usage - hermes_cli/web_server.py exposes `skills` on `/api/analytics/usage` - web/src/lib/api.ts adds analytics skill response types - web/src/pages/AnalyticsPage.tsx renders the Top Skills table - web/src/i18n/{en,zh}.ts updates user-facing labels Tests: - tests/agent/test_insights.py covers skill aggregation and formatting - tests/hermes_cli/test_web_server.py covers analytics API contract including the `skills` payload - verified with `cd web && npm run build` Files changed: - agent/insights.py - hermes_cli/web_server.py - tests/agent/test_insights.py - tests/hermes_cli/test_web_server.py - web/src/i18n/en.ts - web/src/i18n/types.ts - web/src/i18n/zh.ts - web/src/lib/api.ts - web/src/pages/AnalyticsPage.tsx --- agent/insights.py | 162 ++++++++++++++++++++++++++++ hermes_cli/web_server.py | 20 +++- tests/agent/test_insights.py | 52 +++++++++ tests/hermes_cli/test_web_server.py | 83 +++++++++++++- web/src/i18n/en.ts | 5 + web/src/i18n/types.ts | 5 + web/src/i18n/zh.ts | 5 + web/src/lib/api.ts | 20 ++++ web/src/pages/AnalyticsPage.tsx | 53 ++++++++- 9 files changed, 399 insertions(+), 6 deletions(-) diff --git a/agent/insights.py b/agent/insights.py index a0929c9126..8972f94a83 100644 --- a/agent/insights.py +++ b/agent/insights.py @@ -124,6 +124,7 @@ class InsightsEngine: # Gather raw data sessions = self._get_sessions(cutoff, source) tool_usage = self._get_tool_usage(cutoff, source) + skill_usage = self._get_skill_usage(cutoff, source) message_stats = self._get_message_stats(cutoff, source) if not sessions: @@ -135,6 +136,15 @@ class InsightsEngine: "models": [], "platforms": [], "tools": [], + "skills": { + "summary": { + "total_skill_loads": 0, + "total_skill_edits": 0, + "total_skill_actions": 0, + "distinct_skills_used": 0, + }, + "top_skills": [], + }, "activity": {}, "top_sessions": [], } @@ -144,6 +154,7 @@ class InsightsEngine: models = self._compute_model_breakdown(sessions) platforms = self._compute_platform_breakdown(sessions) tools = self._compute_tool_breakdown(tool_usage) + skills = self._compute_skill_breakdown(skill_usage) activity = self._compute_activity_patterns(sessions) top_sessions = self._compute_top_sessions(sessions) @@ -156,6 +167,7 @@ class InsightsEngine: "models": models, "platforms": platforms, "tools": tools, + "skills": skills, "activity": activity, "top_sessions": top_sessions, } @@ -284,6 +296,82 @@ class InsightsEngine: for name, count in tool_counts.most_common() ] + def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]: + """Extract per-skill usage from assistant tool calls.""" + skill_counts: Dict[str, Dict[str, Any]] = {} + + if source: + cursor = self._conn.execute( + """SELECT m.tool_calls, m.timestamp + FROM messages m + JOIN sessions s ON s.id = m.session_id + WHERE s.started_at >= ? AND s.source = ? + AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""", + (cutoff, source), + ) + else: + cursor = self._conn.execute( + """SELECT m.tool_calls, m.timestamp + FROM messages m + JOIN sessions s ON s.id = m.session_id + WHERE s.started_at >= ? + AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""", + (cutoff,), + ) + + for row in cursor.fetchall(): + try: + calls = row["tool_calls"] + if isinstance(calls, str): + calls = json.loads(calls) + if not isinstance(calls, list): + continue + except (json.JSONDecodeError, TypeError): + continue + + timestamp = row["timestamp"] + for call in calls: + if not isinstance(call, dict): + continue + func = call.get("function", {}) + tool_name = func.get("name") + if tool_name not in {"skill_view", "skill_manage"}: + continue + + args = func.get("arguments") + if isinstance(args, str): + try: + args = json.loads(args) + except (json.JSONDecodeError, TypeError): + continue + if not isinstance(args, dict): + continue + + skill_name = args.get("name") + if not isinstance(skill_name, str) or not skill_name.strip(): + continue + + entry = skill_counts.setdefault( + skill_name, + { + "skill": skill_name, + "view_count": 0, + "manage_count": 0, + "last_used_at": None, + }, + ) + if tool_name == "skill_view": + entry["view_count"] += 1 + else: + entry["manage_count"] += 1 + + if timestamp is not None and ( + entry["last_used_at"] is None or timestamp > entry["last_used_at"] + ): + entry["last_used_at"] = timestamp + + return list(skill_counts.values()) + def _get_message_stats(self, cutoff: float, source: str = None) -> Dict: """Get aggregate message statistics.""" if source: @@ -475,6 +563,46 @@ class InsightsEngine: }) return result + def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]: + """Process per-skill usage into summary + ranked list.""" + total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0 + total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0 + total_skill_actions = total_skill_loads + total_skill_edits + + top_skills = [] + for skill in skill_usage: + total_count = skill["view_count"] + skill["manage_count"] + percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0 + top_skills.append({ + "skill": skill["skill"], + "view_count": skill["view_count"], + "manage_count": skill["manage_count"], + "total_count": total_count, + "percentage": percentage, + "last_used_at": skill.get("last_used_at"), + }) + + top_skills.sort( + key=lambda s: ( + s["total_count"], + s["view_count"], + s["manage_count"], + s["last_used_at"] or 0, + s["skill"], + ), + reverse=True, + ) + + return { + "summary": { + "total_skill_loads": total_skill_loads, + "total_skill_edits": total_skill_edits, + "total_skill_actions": total_skill_actions, + "distinct_skills_used": len(skill_usage), + }, + "top_skills": top_skills, + } + def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict: """Analyze activity patterns by day of week and hour.""" day_counts = Counter() # 0=Monday ... 6=Sunday @@ -682,6 +810,28 @@ class InsightsEngine: lines.append(f" ... and {len(report['tools']) - 15} more tools") lines.append("") + # Skill usage + skills = report.get("skills", {}) + top_skills = skills.get("top_skills", []) + if top_skills: + lines.append(" 🧠 Top Skills") + lines.append(" " + "─" * 56) + lines.append(f" {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}") + for skill in top_skills[:10]: + last_used = "—" + if skill.get("last_used_at"): + last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d") + lines.append( + f" {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}" + ) + summary = skills.get("summary", {}) + lines.append( + f" Distinct skills: {summary.get('distinct_skills_used', 0)} " + f"Loads: {summary.get('total_skill_loads', 0):,} " + f"Edits: {summary.get('total_skill_edits', 0):,}" + ) + lines.append("") + # Activity patterns act = report.get("activity", {}) if act.get("by_day"): @@ -774,6 +924,18 @@ class InsightsEngine: lines.append(f" {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)") lines.append("") + skills = report.get("skills", {}) + if skills.get("top_skills"): + lines.append("**🧠 Top Skills:**") + for skill in skills["top_skills"][:5]: + suffix = "" + if skill.get("last_used_at"): + suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}" + lines.append( + f" {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}" + ) + lines.append("") + # Activity summary act = report.get("activity", {}) if act.get("busiest_day") and act.get("busiest_hour"): diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 22265faa51..f18afbf866 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -1977,6 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate): @app.get("/api/analytics/usage") async def get_usage_analytics(days: int = 30): from hermes_state import SessionDB + from agent.insights import InsightsEngine + db = SessionDB() try: cutoff = time.time() - (days * 86400) @@ -2016,8 +2018,24 @@ async def get_usage_analytics(days: int = 30): FROM sessions WHERE started_at > ? """, (cutoff,)) totals = dict(cur3.fetchone()) + insights_report = InsightsEngine(db).generate(days=days) + skills = insights_report.get("skills", { + "summary": { + "total_skill_loads": 0, + "total_skill_edits": 0, + "total_skill_actions": 0, + "distinct_skills_used": 0, + }, + "top_skills": [], + }) - return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days} + return { + "daily": daily, + "by_model": by_model, + "totals": totals, + "period_days": days, + "skills": skills, + } finally: db.close() diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py index 885e34fec0..7ca8a9792f 100644 --- a/tests/agent/test_insights.py +++ b/tests/agent/test_insights.py @@ -51,6 +51,12 @@ def populated_db(db): db.append_message("s1", role="assistant", content="I found the bug. Let me fix it.", tool_calls=[{"function": {"name": "patch"}}]) db.append_message("s1", role="tool", content="patched successfully", tool_name="patch") + db.append_message( + "s1", + role="assistant", + content="Let me load the PR workflow skill.", + tool_calls=[{"function": {"name": "skill_view", "arguments": '{"name":"github-pr-workflow"}'}}], + ) db.append_message("s1", role="user", content="Thanks!") db.append_message("s1", role="assistant", content="You're welcome!") @@ -88,6 +94,12 @@ def populated_db(db): db.append_message("s3", role="assistant", content="And search files", tool_calls=[{"function": {"name": "search_files"}}]) db.append_message("s3", role="tool", content="found stuff", tool_name="search_files") + db.append_message( + "s3", + role="assistant", + content="Load the debugging skill.", + tool_calls=[{"function": {"name": "skill_view", "arguments": '{"name":"systematic-debugging"}'}}], + ) # Session 4: Discord, same model as s1, ended, 1 day ago db.create_session( @@ -100,6 +112,15 @@ def populated_db(db): db.update_token_counts("s4", input_tokens=10000, output_tokens=5000) db.append_message("s4", role="user", content="Quick question") db.append_message("s4", role="assistant", content="Sure, go ahead") + db.append_message( + "s4", + role="assistant", + content="Load and update GitHub skills.", + tool_calls=[ + {"function": {"name": "skill_view", "arguments": '{"name":"github-pr-workflow"}'}}, + {"function": {"name": "skill_manage", "arguments": '{"name":"github-code-review"}'}}, + ], + ) # Session 5: Old session, 45 days ago (should be excluded from 30-day window) db.create_session( @@ -332,6 +353,35 @@ class TestInsightsPopulated: total_pct = sum(t["percentage"] for t in tools) assert total_pct == pytest.approx(100.0, abs=0.1) + def test_skill_breakdown(self, populated_db): + engine = InsightsEngine(populated_db) + report = engine.generate(days=30) + skills = report["skills"] + + assert skills["summary"]["distinct_skills_used"] == 3 + assert skills["summary"]["total_skill_loads"] == 3 + assert skills["summary"]["total_skill_edits"] == 1 + assert skills["summary"]["total_skill_actions"] == 4 + + top_skill = skills["top_skills"][0] + assert top_skill["skill"] == "github-pr-workflow" + assert top_skill["view_count"] == 2 + assert top_skill["manage_count"] == 0 + assert top_skill["total_count"] == 2 + assert top_skill["last_used_at"] is not None + + def test_skill_breakdown_respects_days_filter(self, populated_db): + engine = InsightsEngine(populated_db) + report = engine.generate(days=3) + skills = report["skills"] + + assert skills["summary"]["distinct_skills_used"] == 2 + assert skills["summary"]["total_skill_loads"] == 2 + assert skills["summary"]["total_skill_edits"] == 1 + + skill_names = [s["skill"] for s in skills["top_skills"]] + assert "systematic-debugging" not in skill_names + def test_activity_patterns(self, populated_db): engine = InsightsEngine(populated_db) report = engine.generate(days=30) @@ -401,6 +451,7 @@ class TestTerminalFormatting: assert "Overview" in text assert "Models Used" in text assert "Top Tools" in text + assert "Top Skills" in text assert "Activity Patterns" in text assert "Notable Sessions" in text @@ -467,6 +518,7 @@ class TestGatewayFormatting: text = engine.format_gateway(report) assert "$" in text + assert "Top Skills" in text assert "Est. cost" in text def test_gateway_format_shows_models(self, populated_db): diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index 365e3d0fe1..fa7ce62b25 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -101,14 +101,19 @@ class TestWebServerEndpoints: """Test the FastAPI REST endpoints using Starlette TestClient.""" @pytest.fixture(autouse=True) - def _setup_test_client(self): - """Create a TestClient — import is deferred to avoid requiring fastapi.""" + def _setup_test_client(self, monkeypatch, _isolate_hermes_home): + """Create a TestClient and isolate the state DB under the test HERMES_HOME.""" try: from starlette.testclient import TestClient except ImportError: pytest.skip("fastapi/starlette not installed") + import hermes_state + from hermes_constants import get_hermes_home from hermes_cli.web_server import app, _SESSION_TOKEN + + monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db") + self.client = TestClient(app) self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" @@ -511,12 +516,18 @@ class TestNewEndpoints: """Tests for session detail, logs, cron, skills, tools, raw config, analytics.""" @pytest.fixture(autouse=True) - def _setup(self): + def _setup(self, monkeypatch, _isolate_hermes_home): try: from starlette.testclient import TestClient except ImportError: pytest.skip("fastapi/starlette not installed") + + import hermes_state + from hermes_constants import get_hermes_home from hermes_cli.web_server import app, _SESSION_TOKEN + + monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db") + self.client = TestClient(app) self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}" @@ -692,8 +703,74 @@ class TestNewEndpoints: assert "daily" in data assert "by_model" in data assert "totals" in data + assert "skills" in data assert isinstance(data["daily"], list) assert "total_sessions" in data["totals"] + assert data["skills"] == { + "summary": { + "total_skill_loads": 0, + "total_skill_edits": 0, + "total_skill_actions": 0, + "distinct_skills_used": 0, + }, + "top_skills": [], + } + + def test_analytics_usage_includes_skill_breakdown(self): + from hermes_state import SessionDB + + db = SessionDB() + try: + db.create_session( + session_id="skills-analytics-test", + source="cli", + model="anthropic/claude-sonnet-4", + ) + db.update_token_counts( + "skills-analytics-test", + input_tokens=120, + output_tokens=45, + ) + db.append_message( + "skills-analytics-test", + role="assistant", + content="Loading and updating skills.", + tool_calls=[ + { + "function": { + "name": "skill_view", + "arguments": '{"name":"github-pr-workflow"}', + } + }, + { + "function": { + "name": "skill_manage", + "arguments": '{"name":"github-code-review"}', + } + }, + ], + ) + finally: + db.close() + + resp = self.client.get("/api/analytics/usage?days=7") + assert resp.status_code == 200 + + data = resp.json() + assert data["skills"]["summary"] == { + "total_skill_loads": 1, + "total_skill_edits": 1, + "total_skill_actions": 2, + "distinct_skills_used": 2, + } + assert len(data["skills"]["top_skills"]) == 2 + + top_skill = data["skills"]["top_skills"][0] + assert top_skill["skill"] == "github-pr-workflow" + assert top_skill["view_count"] == 1 + assert top_skill["manage_count"] == 0 + assert top_skill["total_count"] == 1 + assert top_skill["last_used_at"] is not None def test_session_token_endpoint_removed(self): """GET /api/auth/session-token no longer exists.""" diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index 3bf693f218..b15be08a4c 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -115,6 +115,11 @@ export const en: Translations = { dailyTokenUsage: "Daily Token Usage", dailyBreakdown: "Daily Breakdown", perModelBreakdown: "Per-Model Breakdown", + topSkills: "Top Skills", + skill: "Skill", + loads: "Agent Loaded", + edits: "Agent Managed", + lastUsed: "Last Used", input: "Input", output: "Output", total: "Total", diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts index 34813c68f3..3996fd1f0b 100644 --- a/web/src/i18n/types.ts +++ b/web/src/i18n/types.ts @@ -120,6 +120,11 @@ export interface Translations { dailyTokenUsage: string; dailyBreakdown: string; perModelBreakdown: string; + topSkills: string; + skill: string; + loads: string; + edits: string; + lastUsed: string; input: string; output: string; total: string; diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index 18cb3ee38e..c4e334a885 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -115,6 +115,11 @@ export const zh: Translations = { dailyTokenUsage: "每日 Token 用量", dailyBreakdown: "每日明细", perModelBreakdown: "模型用量明细", + topSkills: "常用技能", + skill: "技能", + loads: "代理加载", + edits: "代理管理", + lastUsed: "最近使用", input: "输入", output: "输出", total: "总计", diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index e610439938..b82c7808c1 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -283,6 +283,22 @@ export interface AnalyticsModelEntry { sessions: number; } +export interface AnalyticsSkillEntry { + skill: string; + view_count: number; + manage_count: number; + total_count: number; + percentage: number; + last_used_at: number | null; +} + +export interface AnalyticsSkillsSummary { + total_skill_loads: number; + total_skill_edits: number; + total_skill_actions: number; + distinct_skills_used: number; +} + export interface AnalyticsResponse { daily: AnalyticsDailyEntry[]; by_model: AnalyticsModelEntry[]; @@ -295,6 +311,10 @@ export interface AnalyticsResponse { total_actual_cost: number; total_sessions: number; }; + skills: { + summary: AnalyticsSkillsSummary; + top_skills: AnalyticsSkillEntry[]; + }; } export interface CronJob { diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx index 2f947cbb6a..c9efd70ac7 100644 --- a/web/src/pages/AnalyticsPage.tsx +++ b/web/src/pages/AnalyticsPage.tsx @@ -1,12 +1,14 @@ import { useEffect, useState, useCallback } from "react"; import { BarChart3, + Brain, Cpu, Hash, TrendingUp, } from "lucide-react"; import { api } from "@/lib/api"; -import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api"; +import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry, AnalyticsSkillEntry } from "@/lib/api"; +import { timeAgo } from "@/lib/utils"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Button } from "@/components/ui/button"; import { useI18n } from "@/i18n"; @@ -227,6 +229,52 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) { ); } +function SkillTable({ skills }: { skills: AnalyticsSkillEntry[] }) { + const { t } = useI18n(); + if (skills.length === 0) return null; + + return ( + + +
+ + {t.analytics.topSkills} +
+
+ +
+ + + + + + + + + + + + {skills.map((skill) => ( + + + + + + + + ))} + +
{t.analytics.skill}{t.analytics.loads}{t.analytics.edits}{t.analytics.total}{t.analytics.lastUsed}
+ {skill.skill} + {skill.view_count}{skill.manage_count}{skill.total_count} + {skill.last_used_at ? timeAgo(skill.last_used_at) : "—"} +
+
+
+
+ ); +} + export default function AnalyticsPage() { const [days, setDays] = useState(30); const [data, setData] = useState(null); @@ -310,10 +358,11 @@ export default function AnalyticsPage() { {/* Tables */} + )} - {data && data.daily.length === 0 && data.by_model.length === 0 && ( + {data && data.daily.length === 0 && data.by_model.length === 0 && data.skills.top_skills.length === 0 && (
From bf5d7462ba33028b34cbbf500ca268b8684a0e9c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 18 Apr 2026 22:30:10 -0700 Subject: [PATCH 002/455] fix(tui): reject history-mutating commands while session is running (#12416) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes silent data loss in the TUI when /undo, /compress, /retry, or rollback.restore runs during an in-flight agent turn. The version- guard at prompt.submit:1449 would fail the version check and silently skip writing the agent's result — UI showed the assistant reply but DB / backend history never received it, causing UI↔backend desync that persisted across session resume. Changes (tui_gateway/server.py): - session.undo, session.compress, /retry, rollback.restore (full-history only — file-scoped rollbacks still allowed): reject with 4009 when session.running is True. Users can /interrupt first. - prompt.submit: on history_version mismatch (defensive backstop), attach a 'warning' field to message.complete and log to stderr instead of silently dropping the agent's output. The UI can surface the warning to the user; the operator can spot it in logs. Tests (tests/test_tui_gateway_server.py): 6 new cases. - test_session_undo_rejects_while_running - test_session_undo_allowed_when_idle (regression guard) - test_session_compress_rejects_while_running - test_rollback_restore_rejects_full_history_while_running - test_prompt_submit_history_version_mismatch_surfaces_warning - test_prompt_submit_history_version_match_persists_normally (regression) Validated: against unpatched server.py the three 'rejects_while_running' tests fail and the version-mismatch test fails (no 'warning' field). With the fix, all 6 pass, all 33 tests in the file pass, 74 TUI tests in total pass. Live E2E against the live Python environment confirmed all 5 patches present and guards enforce 4009 exactly as designed. --- tests/test_tui_gateway_server.py | 166 +++++++++++++++++++++++++++++++ tui_gateway/server.py | 43 +++++++- 2 files changed, 208 insertions(+), 1 deletion(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 35bc3f449b..8831efb896 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -546,3 +546,169 @@ def test_session_info_includes_mcp_servers(monkeypatch): assert info["mcp_servers"] == fake_status + +# --------------------------------------------------------------------------- +# History-mutating commands must reject while session.running is True. +# Without these guards, prompt.submit's post-run history write either +# clobbers the mutation (version matches) or silently drops the agent's +# output (version mismatch) — both produce UI<->backend state desync. +# --------------------------------------------------------------------------- + + +def test_session_undo_rejects_while_running(): + """Fix for TUI silent-drop #1: /undo must not mutate history + while the agent is mid-turn — would either clobber the undo or + cause prompt.submit to silently drop the agent's response.""" + server._sessions["sid"] = _session(running=True, history=[ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hello"}, + ]) + try: + resp = server.handle_request( + {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}} + ) + assert resp.get("error"), "session.undo should reject while running" + assert resp["error"]["code"] == 4009 + assert "session busy" in resp["error"]["message"] + # History must be unchanged + assert len(server._sessions["sid"]["history"]) == 2 + finally: + server._sessions.pop("sid", None) + + +def test_session_undo_allowed_when_idle(): + """Regression guard: when not running, /undo still works.""" + server._sessions["sid"] = _session(running=False, history=[ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hello"}, + ]) + try: + resp = server.handle_request( + {"id": "1", "method": "session.undo", "params": {"session_id": "sid"}} + ) + assert resp.get("result"), f"got error: {resp.get('error')}" + assert resp["result"]["removed"] == 2 + assert server._sessions["sid"]["history"] == [] + finally: + server._sessions.pop("sid", None) + + +def test_session_compress_rejects_while_running(monkeypatch): + server._sessions["sid"] = _session(running=True) + try: + resp = server.handle_request( + {"id": "1", "method": "session.compress", "params": {"session_id": "sid"}} + ) + assert resp.get("error") + assert resp["error"]["code"] == 4009 + finally: + server._sessions.pop("sid", None) + + +def test_rollback_restore_rejects_full_history_while_running(monkeypatch): + """Full-history rollback must reject; file-scoped rollback still allowed.""" + server._sessions["sid"] = _session(running=True) + try: + resp = server.handle_request( + {"id": "1", "method": "rollback.restore", "params": {"session_id": "sid", "hash": "abc"}} + ) + assert resp.get("error"), "full-history rollback should reject while running" + assert resp["error"]["code"] == 4009 + finally: + server._sessions.pop("sid", None) + + +def test_prompt_submit_history_version_mismatch_surfaces_warning(monkeypatch): + """Fix for TUI silent-drop #2: the defensive backstop at prompt.submit + must attach a 'warning' to message.complete when history was + mutated externally during the turn (instead of silently dropping + the agent's output).""" + # Agent bumps history_version itself mid-run to simulate an external + # mutation slipping past the guards. + session_ref = {"s": None} + + class _RacyAgent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + # Simulate: something external bumped history_version + # while we were running. + with session_ref["s"]["history_lock"]: + session_ref["s"]["history_version"] += 1 + return {"final_response": "agent reply", "messages": [{"role": "assistant", "content": "agent reply"}]} + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + + def start(self): + self._target() + + server._sessions["sid"] = _session(agent=_RacyAgent()) + session_ref["s"] = server._sessions["sid"] + emits: list[tuple] = [] + try: + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_get_usage", lambda _a: {}) + monkeypatch.setattr(server, "render_message", lambda _t, _c: "") + monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a)) + + resp = server.handle_request( + {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}} + ) + assert resp.get("result"), f"got error: {resp.get('error')}" + + # History should NOT contain the agent's output (version mismatch) + assert server._sessions["sid"]["history"] == [] + + # message.complete must carry a 'warning' so the UI / operator + # knows the output was not persisted. + complete_calls = [a for a in emits if a[0] == "message.complete"] + assert len(complete_calls) == 1 + _, _, payload = complete_calls[0] + assert "warning" in payload, ( + "message.complete must include a 'warning' field on " + "history_version mismatch — otherwise the UI silently " + "shows output that was never persisted" + ) + assert "not saved" in payload["warning"].lower() or "changed" in payload["warning"].lower() + finally: + server._sessions.pop("sid", None) + + +def test_prompt_submit_history_version_match_persists_normally(monkeypatch): + """Regression guard: the backstop does not affect the happy path.""" + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + return {"final_response": "reply", "messages": [{"role": "assistant", "content": "reply"}]} + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + + def start(self): + self._target() + + server._sessions["sid"] = _session(agent=_Agent()) + emits: list[tuple] = [] + try: + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_get_usage", lambda _a: {}) + monkeypatch.setattr(server, "render_message", lambda _t, _c: "") + monkeypatch.setattr(server, "_emit", lambda *a: emits.append(a)) + + resp = server.handle_request( + {"id": "1", "method": "prompt.submit", "params": {"session_id": "sid", "text": "hi"}} + ) + assert resp.get("result") + + # History was written + assert server._sessions["sid"]["history"] == [{"role": "assistant", "content": "reply"}] + assert server._sessions["sid"]["history_version"] == 1 + + # No warning should be attached + complete_calls = [a for a in emits if a[0] == "message.complete"] + assert len(complete_calls) == 1 + _, _, payload = complete_calls[0] + assert "warning" not in payload + finally: + server._sessions.pop("sid", None) + diff --git a/tui_gateway/server.py b/tui_gateway/server.py index d86db00066..c58c65763e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1224,6 +1224,13 @@ def _(rid, params: dict) -> dict: session, err = _sess(params, rid) if err: return err + # Reject during an in-flight turn. If we mutated history while + # the agent thread is running, prompt.submit's post-run history + # write would either clobber the undo (version matches) or + # silently drop the agent's output (version mismatch, see below). + # Neither is what the user wants — make them /interrupt first. + if session.get("running"): + return _err(rid, 4009, "session busy — /interrupt the current turn before /undo") removed = 0 with session["history_lock"]: history = session.get("history", []) @@ -1243,6 +1250,8 @@ def _(rid, params: dict) -> dict: session, err = _sess(params, rid) if err: return err + if session.get("running"): + return _err(rid, 4009, "session busy — /interrupt the current turn before /compress") try: with session["history_lock"]: removed, usage = _compress_session_history(session, str(params.get("focus_topic", "") or "").strip()) @@ -1443,12 +1452,33 @@ def _(rid, params: dict) -> dict: ) last_reasoning = None + status_note = None if isinstance(result, dict): if isinstance(result.get("messages"), list): with session["history_lock"]: - if int(session.get("history_version", 0)) == history_version: + current_version = int(session.get("history_version", 0)) + if current_version == history_version: session["history"] = result["messages"] session["history_version"] = history_version + 1 + else: + # History mutated externally during the turn + # (undo/compress/retry/rollback now guard on + # session.running, but this is the defensive + # backstop for any path that slips past). + # Surface the desync rather than silently + # dropping the agent's output — the UI can + # show the response and warn that it was + # not persisted. + print( + f"[tui_gateway] prompt.submit: history_version mismatch " + f"(expected={history_version} current={current_version}) — " + f"agent output NOT written to session history", + file=sys.stderr, + ) + status_note = ( + "History changed during this turn — the response above is visible " + "but was not saved to session history." + ) raw = result.get("final_response", "") status = "interrupted" if result.get("interrupted") else "error" if result.get("error") else "complete" lr = result.get("last_reasoning") @@ -1461,6 +1491,8 @@ def _(rid, params: dict) -> dict: payload = {"text": raw, "usage": _get_usage(agent), "status": status} if last_reasoning: payload["reasoning"] = last_reasoning + if status_note: + payload["warning"] = status_note rendered = render_message(raw, cols) if rendered: payload["rendered"] = rendered @@ -2168,6 +2200,8 @@ def _(rid, params: dict) -> dict: if name == "retry": if not session: return _err(rid, 4001, "no active session to retry") + if session.get("running"): + return _err(rid, 4009, "session busy — /interrupt the current turn before /retry") history = session.get("history", []) if not history: return _err(rid, 4018, "no previous user message to retry") @@ -2578,6 +2612,13 @@ def _(rid, params: dict) -> dict: file_path = params.get("file_path", "") if not target: return _err(rid, 4014, "hash required") + # Full-history rollback mutates session history. Rejecting during + # an in-flight turn prevents prompt.submit from silently dropping + # the agent's output (version mismatch path) or clobbering the + # rollback (version-matches path). A file-scoped rollback only + # touches disk, so we allow it. + if not file_path and session.get("running"): + return _err(rid, 4009, "session busy — /interrupt the current turn before full rollback.restore") try: def go(mgr, cwd): resolved = _resolve_checkpoint_hash(mgr, cwd, target) From 78586ce036baab8c294e55a1ef0a279c47a447ed Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 09:35:42 -0400 Subject: [PATCH 003/455] =?UTF-8?q?fix(honcho):=20dialectic=20lifecycle=20?= =?UTF-8?q?=E2=80=94=20defaults,=20retry,=20prewarm=20consumption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several correctness and cost-safety fixes to the Honcho dialectic path after a multi-turn investigation surfaced a chain of silent failures: - dialecticCadence default flipped 3 → 1. PR #10619 changed this from 1 to 3 for cost, but existing installs with no explicit config silently went from per-turn dialectic to every-3-turns on upgrade. Restores pre-#10619 behavior; 3+ remains available for cost-conscious setups. Docs + wizard + status output updated to match. - Session-start prewarm now consumed. Previously fired a .chat() on init whose result landed in HonchoSessionManager._dialectic_cache and was never read — pop_dialectic_result had zero call sites. Turn 1 paid for a duplicate synchronous dialectic. Prewarm now writes directly to the plugin's _prefetch_result via _prefetch_lock so turn 1 consumes it with no extra call. - Prewarm is now dialecticDepth-aware. A single-pass prewarm can return weak output on cold peers; the multi-pass audit/reconcile cycle is exactly the case dialecticDepth was built for. Prewarm now runs the full configured depth in the background. - Silent dialectic failure no longer burns the cadence window. _last_dialectic_turn now advances only when the result is non-empty. Empty result → next eligible turn retries immediately instead of waiting the full cadence gap. - Thread pile-up guard. queue_prefetch skips when a prior dialectic thread is still in-flight, preventing stacked races on _prefetch_result. - First-turn sync timeout is recoverable. Previously on timeout the background thread's result was stored in a dead local list. Now the thread writes into _prefetch_result under lock so the next turn picks it up. - Cadence gate applies uniformly. At cadence=1 the old "cadence > 1" guard let first-turn sync + same-turn queue_prefetch both fire. Gate now always applies. - Restored query-length reasoning-level scaling, dropped in 9a0ab34c. Scales dialecticReasoningLevel up on longer queries (+1 at ≥120 chars, +2 at ≥400), clamped at reasoningLevelCap. Two new config keys: `reasoningHeuristic` (bool, default true) and `reasoningLevelCap` (string, default "high"; previously parsed but never enforced). Respects dialecticDepthLevels and proportional lighter-early passes. - Restored short-prompt skip, dropped in ef7f3156. One-word acknowledgements ("ok", "y", "thanks") and slash commands bypass both injection and dialectic fire. - Purged dead code in session.py: prefetch_dialectic, _dialectic_cache, set_dialectic_result, pop_dialectic_result — all unused after prewarm refactor. Tests: 542 passed across honcho_plugin/, agent/test_memory_provider.py, and run_agent/test_run_agent.py. New coverage: - TestTrivialPromptHeuristic (classifier + prefetch/queue skip) - TestDialecticCadenceAdvancesOnSuccess (empty-result retry, pile-up guard) - TestSessionStartDialecticPrewarm (prewarm consumed, sync fallback) - TestReasoningHeuristic (length bumps, cap clamp, interaction with depth) - TestDialecticLifecycleSmoke (end-to-end 8-turn session walk) --- .../autonomous-ai-agents/honcho/SKILL.md | 6 +- plugins/memory/honcho/__init__.py | 199 ++++++-- plugins/memory/honcho/cli.py | 8 +- plugins/memory/honcho/client.py | 18 + plugins/memory/honcho/session.py | 46 +- tests/agent/test_memory_provider.py | 2 - tests/honcho_plugin/test_async_memory.py | 7 - tests/honcho_plugin/test_session.py | 478 +++++++++++++++++- website/docs/user-guide/features/honcho.md | 4 +- .../user-guide/features/memory-providers.md | 4 +- 10 files changed, 665 insertions(+), 107 deletions(-) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index c60d2c6356..5d03a54985 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic API calls | +| `dialecticCadence` | `1` | Min turns between dialectic API calls | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. +Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost. ### Depth (how many) @@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `1` | Min turns between dialectic LLM calls | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index ca44ce6019..ac0f60279a 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -206,10 +206,11 @@ class HonchoMemoryProvider(MemoryProvider): self._turn_count = 0 self._injection_frequency = "every-turn" # or "first-turn" self._context_cadence = 1 # minimum turns between context API calls - self._dialectic_cadence = 3 # minimum turns between dialectic API calls + self._dialectic_cadence = 1 # minimum turns between dialectic API calls self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3) self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels - self._reasoning_level_cap: Optional[str] = None # "minimal", "low", "medium", "high" + self._reasoning_heuristic: bool = True # scale base level by query length + self._reasoning_level_cap: str = "high" # ceiling for auto-selected level self._last_context_turn = -999 self._last_dialectic_turn = -999 @@ -305,12 +306,12 @@ class HonchoMemoryProvider(MemoryProvider): raw = cfg.raw or {} self._injection_frequency = raw.get("injectionFrequency", "every-turn") self._context_cadence = int(raw.get("contextCadence", 1)) - self._dialectic_cadence = int(raw.get("dialecticCadence", 3)) + self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3)) self._dialectic_depth_levels = cfg.dialectic_depth_levels - cap = raw.get("reasoningLevelCap") - if cap and cap in ("minimal", "low", "medium", "high"): - self._reasoning_level_cap = cap + self._reasoning_heuristic = cfg.reasoning_heuristic + if cfg.reasoning_level_cap in self._LEVEL_ORDER: + self._reasoning_level_cap = cfg.reasoning_level_cap except Exception as e: logger.debug("Honcho cost-awareness config parse error: %s", e) @@ -391,14 +392,42 @@ class HonchoMemoryProvider(MemoryProvider): except Exception as e: logger.debug("Honcho memory file migration skipped: %s", e) - # ----- B7: Pre-warming context at init ----- + # ----- B7: Pre-warming at init ----- + # Context prewarm: warms peer.context() cache (base layer), consumed + # via pop_context_result() in prefetch(). + # Dialectic prewarm: fires a depth-aware cycle against the plugin's + # own _prefetch_result so turn 1 can consume it directly. Without this + # the first-turn sync path pays for a duplicate .chat() — and at + # depth>1 a single-pass session-start dialectic often returns weak + # output that multi-pass audit/reconciliation is meant to catch. if self._recall_mode in ("context", "hybrid"): try: self._manager.prefetch_context(self._session_key) - self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?") - logger.debug("Honcho pre-warm threads started for session: %s", self._session_key) except Exception as e: - logger.debug("Honcho pre-warm failed: %s", e) + logger.debug("Honcho context prewarm failed: %s", e) + + _prewarm_query = ( + "Summarize what you know about this user. " + "Focus on preferences, current projects, and working style." + ) + + def _prewarm_dialectic() -> None: + try: + r = self._run_dialectic_depth(_prewarm_query) + except Exception as exc: + logger.debug("Honcho dialectic prewarm failed: %s", exc) + return + if r and r.strip(): + with self._prefetch_lock: + self._prefetch_result = r + # Treat prewarm as turn 0 so cadence gating starts clean. + self._last_dialectic_turn = 0 + + self._prefetch_thread = threading.Thread( + target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" + ) + self._prefetch_thread.start() + logger.debug("Honcho pre-warm started for session: %s", self._session_key) def _ensure_session(self) -> bool: """Lazily initialize the Honcho session (for tools-only mode). @@ -526,6 +555,11 @@ class HonchoMemoryProvider(MemoryProvider): if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" + # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal, + # so injecting user context there just burns tokens and can derail the reply. + if self._is_trivial_prompt(query): + return "" + parts = [] # ----- Layer 1: Base context (representation + card) ----- @@ -560,37 +594,46 @@ class HonchoMemoryProvider(MemoryProvider): # On the very first turn, no queue_prefetch() has run yet so the # dialectic result is empty. Run with a bounded timeout so a slow # Honcho connection doesn't block the first response indefinitely. - # On timeout the result is skipped and queue_prefetch() will pick it - # up at the next cadence-allowed turn. + # On timeout we let the thread keep running and write its result into + # _prefetch_result under the lock, so the next turn picks it up. + # + # Skip if the session-start prewarm already filled _prefetch_result — + # firing another .chat() would be duplicate work. + with self._prefetch_lock: + _prewarm_landed = bool(self._prefetch_result) + if _prewarm_landed and self._last_dialectic_turn == -999: + self._last_dialectic_turn = self._turn_count + if self._last_dialectic_turn == -999 and query: _first_turn_timeout = ( self._config.timeout if self._config and self._config.timeout else 8.0 ) - _result_holder: list[str] = [] + _fired_at = self._turn_count def _run_first_turn() -> None: try: - _result_holder.append(self._run_dialectic_depth(query)) + r = self._run_dialectic_depth(query) except Exception as exc: logger.debug("Honcho first-turn dialectic failed: %s", exc) - - _t = threading.Thread(target=_run_first_turn, daemon=True) - _t.start() - _t.join(timeout=_first_turn_timeout) - if not _t.is_alive(): - first_turn_dialectic = _result_holder[0] if _result_holder else "" - if first_turn_dialectic and first_turn_dialectic.strip(): + return + if r and r.strip(): with self._prefetch_lock: - self._prefetch_result = first_turn_dialectic - self._last_dialectic_turn = self._turn_count - else: + self._prefetch_result = r + # Only advance cadence on a non-empty result so failures + # don't burn a 3-turn cooldown on nothing. + self._last_dialectic_turn = _fired_at + + self._prefetch_thread = threading.Thread( + target=_run_first_turn, daemon=True, name="honcho-prefetch-first" + ) + self._prefetch_thread.start() + self._prefetch_thread.join(timeout=_first_turn_timeout) + if self._prefetch_thread.is_alive(): logger.debug( - "Honcho first-turn dialectic timed out (%.1fs) — " - "will inject at next cadence-allowed turn", + "Honcho first-turn dialectic still running after %.1fs — " + "will surface on next turn", _first_turn_timeout, ) - # Don't update _last_dialectic_turn: queue_prefetch() will - # retry at the next cadence-allowed turn via the async path. if self._prefetch_thread and self._prefetch_thread.is_alive(): self._prefetch_thread.join(timeout=3.0) @@ -641,6 +684,10 @@ class HonchoMemoryProvider(MemoryProvider): if self._recall_mode == "tools": return + # Trivial prompts don't warrant either a context refresh or a dialectic call. + if self._is_trivial_prompt(query): + return + # ----- Context refresh (base layer) — independent cadence ----- if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence: self._last_context_turn = self._turn_count @@ -650,23 +697,35 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho context prefetch failed: %s", e) # ----- Dialectic prefetch (supplement layer) ----- - # B5: cadence check — skip if too soon since last dialectic call - if self._dialectic_cadence > 1: - if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: - logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", - self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) - return + # Guard against thread pile-up: if a prior dialectic is still in flight, + # let it finish instead of stacking races on _prefetch_result. + if self._prefetch_thread and self._prefetch_thread.is_alive(): + logger.debug("Honcho dialectic prefetch skipped: prior thread still running") + return - self._last_dialectic_turn = self._turn_count + # B5: cadence check — skip if too soon since last *successful* dialectic call. + # The gate applies uniformly (including cadence=1): "every turn" means once + # per turn, not twice on the same turn when first-turn sync already fired. + if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: + logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", + self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) + return + + # Advance cadence only on a non-empty result — otherwise a silent failure + # (empty dialectic, transient API error) would burn the full cadence window + # before the next retry, making it look like dialectic "never fires again". + _fired_at = self._turn_count def _run(): try: result = self._run_dialectic_depth(query) - if result and result.strip(): - with self._prefetch_lock: - self._prefetch_result = result except Exception as e: logger.debug("Honcho prefetch failed: %s", e) + return + if result and result.strip(): + with self._prefetch_lock: + self._prefetch_result = result + self._last_dialectic_turn = _fired_at self._prefetch_thread = threading.Thread( target=_run, daemon=True, name="honcho-prefetch" @@ -692,11 +751,42 @@ class HonchoMemoryProvider(MemoryProvider): _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") - def _resolve_pass_level(self, pass_idx: int) -> str: + # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior). + # Promoted to class constants so tests can override without widening the + # config surface. Bump to config fields only if real use shows they're needed. + _HEURISTIC_LENGTH_MEDIUM = 120 + _HEURISTIC_LENGTH_HIGH = 400 + + def _apply_reasoning_heuristic(self, base: str, query: str) -> str: + """Scale `base` up by query length, clamped at reasoning_level_cap. + + Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is + reasoning_level_cap (default 'high' — 'max' is reserved for + explicit tool-path selection). + """ + if not self._reasoning_heuristic or not query: + return base + if base not in self._LEVEL_ORDER: + return base + n = len(query) + if n < self._HEURISTIC_LENGTH_MEDIUM: + bump = 0 + elif n < self._HEURISTIC_LENGTH_HIGH: + bump = 1 + else: + bump = 2 + base_idx = self._LEVEL_ORDER.index(base) + cap_idx = self._LEVEL_ORDER.index(self._reasoning_level_cap) + return self._LEVEL_ORDER[min(base_idx + bump, cap_idx)] + + def _resolve_pass_level(self, pass_idx: int, query: str = "") -> str: """Resolve reasoning level for a given pass index. - Uses dialecticDepthLevels if configured, otherwise proportional - defaults relative to dialecticReasoningLevel. + Precedence: + 1. dialecticDepthLevels (explicit per-pass) — wins absolutely + 2. _PROPORTIONAL_LEVELS table (depth>1 lighter-early passes) + 3. Base level = dialecticReasoningLevel, optionally scaled by the + reasoning heuristic when the mapping falls through to 'base' """ if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels): return self._dialectic_depth_levels[pass_idx] @@ -704,7 +794,7 @@ class HonchoMemoryProvider(MemoryProvider): base = (self._config.dialectic_reasoning_level if self._config else "low") mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx)) if mapping is None or mapping == "base": - return base + return self._apply_reasoning_heuristic(base, query) return mapping def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str: @@ -791,7 +881,7 @@ class HonchoMemoryProvider(MemoryProvider): break prompt = self._build_dialectic_prompt(i, results, is_cold) - level = self._resolve_pass_level(i) + level = self._resolve_pass_level(i, query=query) logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s", self._dialectic_depth, i, level, is_cold) @@ -808,6 +898,29 @@ class HonchoMemoryProvider(MemoryProvider): return r return "" + # Prompts that carry no semantic signal — trivial acknowledgements, slash + # commands, empty input. Skipping injection here saves tokens and prevents + # stale user-model context from derailing one-word replies. + _TRIVIAL_PROMPT_RE = re.compile( + r'^(yes|no|ok|okay|sure|thanks|thank you|y|n|yep|nope|yeah|nah|' + r'continue|go ahead|do it|proceed|got it|cool|nice|great|done|next|lgtm|k)$', + re.IGNORECASE, + ) + + @classmethod + def _is_trivial_prompt(cls, text: str) -> bool: + """Return True if the prompt is too trivial to warrant context injection.""" + if not text: + return True + stripped = text.strip() + if not stripped: + return True + if stripped.startswith("/"): + return True + if cls._TRIVIAL_PROMPT_RE.match(stripped): + return True + return False + def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None: """Track turn count for cadence and injection_frequency logic.""" self._turn_count = turn_number diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 536d34002d..478bf39d8a 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -460,17 +460,17 @@ def cmd_setup(args) -> None: pass # keep current # --- 7b. Dialectic cadence --- - current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3") + current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.") + print(" 1 = every turn (default), 3+ = sparse (cost-saving).") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) if val >= 1: hermes_host["dialecticCadence"] = val except (ValueError, TypeError): - hermes_host["dialecticCadence"] = 3 + hermes_host["dialecticCadence"] = 1 # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") @@ -636,7 +636,7 @@ def cmd_status(args) -> None: print(f" Recall mode: {hcfg.recall_mode}") print(f" Context budget: {hcfg.context_tokens or '(uncapped)'} tokens") raw = getattr(hcfg, "raw", None) or {} - dialectic_cadence = raw.get("dialecticCadence") or 3 + dialectic_cadence = raw.get("dialecticCadence") or 1 print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Write freq: {hcfg.write_frequency}") diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 2474d3a2b6..136b1e60dc 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -251,6 +251,14 @@ class HonchoClientConfig: # matching dialectic_depth length. When None, uses proportional defaults # derived from dialectic_reasoning_level. dialectic_depth_levels: list[str] | None = None + # Reasoning-level heuristic for auto-injected dialectic calls. When true, + # scales the base level up on longer queries (restored from pre-#10619 + # behavior; see plugins/memory/honcho/__init__.py for thresholds). + # Never auto-selects a level above reasoning_level_cap. + reasoning_heuristic: bool = True + # Ceiling for heuristic-selected reasoning level. "max" is reserved for + # explicit tool-path selection; default "high" matches the old behavior. + reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) message_max_chars: int = 25000 @@ -446,6 +454,16 @@ class HonchoClientConfig: raw.get("dialecticDepthLevels"), depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")), ), + reasoning_heuristic=_resolve_bool( + host_block.get("reasoningHeuristic"), + raw.get("reasoningHeuristic"), + default=True, + ), + reasoning_level_cap=( + host_block.get("reasoningLevelCap") + or raw.get("reasoningLevelCap") + or "high" + ), message_max_chars=int( host_block.get("messageMaxChars") or raw.get("messageMaxChars") diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py index fd91ee3b3b..7344b517e4 100644 --- a/plugins/memory/honcho/session.py +++ b/plugins/memory/honcho/session.py @@ -100,9 +100,11 @@ class HonchoSessionManager: self._write_frequency = write_frequency self._turn_counter: int = 0 - # Prefetch caches: session_key → last result (consumed once per turn) + # Prefetch cache: session_key → last context result (consumed once per turn). + # Dialectic results are cached on the plugin side (HonchoMemoryProvider + # ._prefetch_result) so session-start prewarm and turn-driven fires share + # one source of truth; see __init__.py _do_session_init for the prewarm. self._context_cache: dict[str, dict] = {} - self._dialectic_cache: dict[str, str] = {} self._prefetch_cache_lock = threading.Lock() self._dialectic_reasoning_level: str = ( config.dialectic_reasoning_level if config else "low" @@ -499,8 +501,8 @@ class HonchoSessionManager: Query Honcho's dialectic endpoint about a peer. Runs an LLM on Honcho's backend against the target peer's full - representation. Higher latency than context() — call async via - prefetch_dialectic() to avoid blocking the response. + representation. Higher latency than context() — callers run this in + a background thread (see HonchoMemoryProvider) to avoid blocking. Args: session_key: The session key to query against. @@ -555,42 +557,6 @@ class HonchoSessionManager: logger.warning("Honcho dialectic query failed: %s", e) return "" - def prefetch_dialectic(self, session_key: str, query: str) -> None: - """ - Fire a dialectic_query in a background thread, caching the result. - - Non-blocking. The result is available via pop_dialectic_result() - on the next call (typically the following turn). Reasoning level - is selected dynamically based on query complexity. - - Args: - session_key: The session key to query against. - query: The user's current message, used as the query. - """ - def _run(): - result = self.dialectic_query(session_key, query) - if result: - self.set_dialectic_result(session_key, result) - - t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True) - t.start() - - def set_dialectic_result(self, session_key: str, result: str) -> None: - """Store a prefetched dialectic result in a thread-safe way.""" - if not result: - return - with self._prefetch_cache_lock: - self._dialectic_cache[session_key] = result - - def pop_dialectic_result(self, session_key: str) -> str: - """ - Return and clear the cached dialectic result for this session. - - Returns empty string if no result is ready yet. - """ - with self._prefetch_cache_lock: - return self._dialectic_cache.pop(session_key, "") - def prefetch_context(self, session_key: str, user_message: str | None = None) -> None: """ Fire get_prefetch_context in a background thread, caching the result. diff --git a/tests/agent/test_memory_provider.py b/tests/agent/test_memory_provider.py index 9301960b71..5cd0d8ab41 100644 --- a/tests/agent/test_memory_provider.py +++ b/tests/agent/test_memory_provider.py @@ -971,8 +971,6 @@ class TestHonchoCadenceTracking: class FakeManager: def prefetch_context(self, key, query=None): pass - def prefetch_dialectic(self, key, query): - pass p._manager = FakeManager() diff --git a/tests/honcho_plugin/test_async_memory.py b/tests/honcho_plugin/test_async_memory.py index 936f478846..5df8d27454 100644 --- a/tests/honcho_plugin/test_async_memory.py +++ b/tests/honcho_plugin/test_async_memory.py @@ -460,10 +460,3 @@ class TestPrefetchCacheAccessors: assert mgr.pop_context_result("cli:test") == payload assert mgr.pop_context_result("cli:test") == {} - def test_set_and_pop_dialectic_result(self): - mgr = _make_manager(write_frequency="turn") - - mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup") - - assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup" - assert mgr.pop_dialectic_result("cli:test") == "" diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index 9784959d37..b0282b1969 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -815,6 +815,24 @@ class TestDialecticInputGuard: # --------------------------------------------------------------------------- +def _settle_prewarm(provider): + """Wait for the session-start prewarm dialectic thread, then return the + provider to a clean 'nothing fired yet' state so cadence/first-turn/ + trivial-prompt tests can assert from a known baseline.""" + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=3.0) + with provider._prefetch_lock: + provider._prefetch_result = "" + provider._prefetch_thread = None + provider._last_dialectic_turn = -999 + if getattr(provider, "_manager", None) is not None: + try: + provider._manager.dialectic_query.reset_mock() + provider._manager.prefetch_context.reset_mock() + except AttributeError: + pass + + class TestDialecticCadenceDefaults: """Regression tests for dialectic_cadence default value.""" @@ -840,12 +858,15 @@ class TestDialecticCadenceDefaults: patch("hermes_constants.get_hermes_home", return_value=MagicMock()): provider.initialize(session_id="test-session-001") + _settle_prewarm(provider) return provider - def test_default_is_3(self): - """Default dialectic_cadence should be 3 to avoid per-turn LLM calls.""" + def test_default_is_1(self): + """Default dialectic_cadence should be 1 (every turn) — restored from + pre-#10619 behavior to avoid a silent regression on upgrade for users + who never set dialecticCadence explicitly.""" provider = self._make_provider() - assert provider._dialectic_cadence == 3 + assert provider._dialectic_cadence == 1 def test_config_override(self): """dialecticCadence from config overrides the default.""" @@ -908,6 +929,7 @@ class TestDialecticDepth: patch("hermes_constants.get_hermes_home", return_value=MagicMock()): provider.initialize(session_id="test-session-001") + _settle_prewarm(provider) return provider def test_default_depth_is_1(self): @@ -1062,7 +1084,8 @@ class TestDialecticDepth: provider.prefetch("hello") assert provider._manager.dialectic_query.call_count == 1 - # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3) + # Now queue_prefetch on same turn should skip — _last_dialectic_turn + # was just set to _turn_count by the sync path, so (0 - 0 = 0) < cadence. provider._manager.dialectic_query.reset_mock() provider.queue_prefetch("hello") assert provider._manager.dialectic_query.call_count == 0 @@ -1083,6 +1106,453 @@ class TestDialecticDepth: assert provider._manager.dialectic_query.call_count == 1 +# --------------------------------------------------------------------------- +# Trivial-prompt heuristic + dialectic cadence silent-failure guards +# --------------------------------------------------------------------------- + + +class TestTrivialPromptHeuristic: + """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection. + + Restored after accidental removal during the two-layer prefetch refactor. + """ + + @staticmethod + def _make_provider(): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + cfg = HonchoClientConfig(api_key="test-key", enabled=True, recall_mode="hybrid") + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-session-trivial") + _settle_prewarm(provider) + return provider + + def test_classifier_catches_common_trivial_forms(self): + for t in ("ok", "OK", " ok ", "y", "yes", "sure", "thanks", "lgtm", "/help", "", " "): + assert HonchoMemoryProvider._is_trivial_prompt(t), f"expected trivial: {t!r}" + + def test_classifier_lets_substantive_prompts_through(self): + for t in ("hello world", "what's my name", "explain this", "ok so what's next"): + assert not HonchoMemoryProvider._is_trivial_prompt(t), f"expected non-trivial: {t!r}" + + def test_prefetch_skips_on_trivial_prompt(self): + provider = self._make_provider() + provider._session_key = "test" + provider._base_context_cache = "cached base" + provider._last_dialectic_turn = 0 + provider._turn_count = 5 + + assert provider.prefetch("ok") == "" + assert provider.prefetch("/help") == "" + # Dialectic should not have fired + assert provider._manager.dialectic_query.call_count == 0 + + def test_queue_prefetch_skips_on_trivial_prompt(self): + provider = self._make_provider() + provider._session_key = "test" + provider._turn_count = 10 + provider._last_dialectic_turn = -999 # would otherwise fire + # initialize() pre-warms; clear call counts before the assertion. + provider._manager.prefetch_context.reset_mock() + provider._manager.dialectic_query.reset_mock() + + provider.queue_prefetch("y") + # Trivial prompts short-circuit both context refresh and dialectic fire. + assert provider._manager.prefetch_context.call_count == 0 + assert provider._manager.dialectic_query.call_count == 0 + + +class TestDialecticCadenceAdvancesOnSuccess: + """Cadence tracker must only advance when the dialectic call actually returned. + + A silent failure (empty result, API blip) used to burn the full cadence window + before retrying — making it look like dialectic 'never fires again'. + """ + + @staticmethod + def _make_provider(): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + cfg = HonchoClientConfig( + api_key="test-key", enabled=True, recall_mode="hybrid", dialectic_depth=1, + ) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-session-retry") + _settle_prewarm(provider) + return provider + + def test_empty_dialectic_result_does_not_advance_cadence(self): + import time as _time + provider = self._make_provider() + provider._session_key = "test" + provider._manager.dialectic_query.return_value = "" # silent failure + provider._turn_count = 5 + provider._last_dialectic_turn = 0 # would fire (5 - 0 = 5 ≥ 3) + + provider.queue_prefetch("hello") + # wait for the background thread to settle + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=2.0) + + # Dialectic call was attempted + assert provider._manager.dialectic_query.call_count == 1 + # But cadence tracker did NOT advance — next turn should retry + assert provider._last_dialectic_turn == 0 + + def test_non_empty_dialectic_result_advances_cadence(self): + provider = self._make_provider() + provider._session_key = "test" + provider._manager.dialectic_query.return_value = "real synthesis output" + provider._turn_count = 5 + provider._last_dialectic_turn = 0 + + provider.queue_prefetch("hello") + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=2.0) + + assert provider._last_dialectic_turn == 5 + + def test_in_flight_thread_is_not_stacked(self): + import threading as _threading + provider = self._make_provider() + provider._session_key = "test" + provider._turn_count = 10 + provider._last_dialectic_turn = 0 + + # Simulate a prior thread still running + hold = _threading.Event() + + def _block(): + hold.wait(timeout=5.0) + + stale = _threading.Thread(target=_block, daemon=True) + stale.start() + provider._prefetch_thread = stale + + provider.queue_prefetch("hello") + # Should have short-circuited — no new dialectic call + assert provider._manager.dialectic_query.call_count == 0 + hold.set() + stale.join(timeout=2.0) + + +class TestSessionStartDialecticPrewarm: + """Session-start prewarm fires a depth-aware dialectic whose result is + consumed by turn 1 — no duplicate .chat() and no dead-cache orphaning.""" + + @staticmethod + def _make_provider(cfg_extra=None, dialectic_result="prewarm synthesis"): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid") + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + mock_manager.get_prefetch_context.return_value = None + mock_manager.pop_context_result.return_value = None + mock_manager.dialectic_query.return_value = dialectic_result + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-prewarm") + return provider + + def test_prewarm_populates_prefetch_result(self): + p = self._make_provider() + # Wait for prewarm thread to land + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + with p._prefetch_lock: + assert p._prefetch_result == "prewarm synthesis" + assert p._last_dialectic_turn == 0 + + def test_turn1_consumes_prewarm_without_duplicate_dialectic(self): + """With prewarm result already in _prefetch_result, turn 1 prefetch + should NOT fire another dialectic.""" + p = self._make_provider() + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + p._manager.dialectic_query.reset_mock() + p._session_key = "test-prewarm" + p._base_context_cache = "" + p._turn_count = 1 + + result = p.prefetch("hello world") + assert "prewarm synthesis" in result + # The sync first-turn path must NOT have fired another .chat() + assert p._manager.dialectic_query.call_count == 0 + + def test_turn1_falls_back_to_sync_when_prewarm_missing(self): + """If the prewarm produced nothing (empty graph, API blip), turn 1 + still fires its own sync dialectic.""" + p = self._make_provider(dialectic_result="") # prewarm returns empty + if p._prefetch_thread: + p._prefetch_thread.join(timeout=3.0) + with p._prefetch_lock: + assert p._prefetch_result == "" # prewarm landed nothing + # Switch dialectic_query to return something on the sync first-turn call + p._manager.dialectic_query.return_value = "sync recovery" + p._manager.dialectic_query.reset_mock() + p._session_key = "test-prewarm" + p._base_context_cache = "" + p._turn_count = 1 + + result = p.prefetch("hello world") + assert "sync recovery" in result + assert p._manager.dialectic_query.call_count == 1 + + +class TestDialecticLifecycleSmoke: + """End-to-end smoke: walks a realistic multi-turn session through every + behavior we care about — prewarm → turn 1 consume → trivial skip → cadence + fire → silent-failure retry → heuristic bump → session-end flush. + + This is the 'velvet circuit' test: one provider, one flow, one set of + assertions. If the suite above lies about intent, this one catches it. + """ + + @staticmethod + def _make_provider(cfg_extra=None): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict( + api_key="test-key", enabled=True, recall_mode="hybrid", + dialectic_reasoning_level="low", reasoning_heuristic=True, + reasoning_level_cap="high", dialectic_depth=1, + ) + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_session = MagicMock() + mock_session.messages = [] + mock_manager.get_or_create.return_value = mock_session + mock_manager.get_prefetch_context.return_value = None + mock_manager.pop_context_result.return_value = None + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + return provider, mock_manager, cfg + + def _await_thread(self, provider): + if provider._prefetch_thread: + provider._prefetch_thread.join(timeout=3.0) + + def test_full_multi_turn_session(self): + """Walks init → turns 1..8 → session end. Asserts at every step that + the plugin did exactly what it should and nothing more. + + Uses dialecticCadence=3 so we can exercise skip-turns between fires + and the silent-failure retry path without their gates tripping each + other. Trivial + slash skips apply independent of cadence. + """ + from unittest.mock import patch, MagicMock + provider, mgr, cfg = self._make_provider( + cfg_extra={"raw": {"dialecticCadence": 3}} + ) + + # Program the dialectic responses in the exact order they'll be requested. + # An extra or missing call fails the test — strong smoke signal. + responses = iter([ + "prewarm: user is eri, works on hermes", # session-start prewarm + "cadence fire: long query synthesis", # turn 4 queue_prefetch + "", # turn 7 fire: silent failure + "retry success: fresh synthesis", # turn 8 queue_prefetch retry + ]) + mgr.dialectic_query.side_effect = lambda *a, **kw: next(responses) + + # ---- init: prewarm fires ---- + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mgr), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="smoke-test") + + self._await_thread(provider) + with provider._prefetch_lock: + assert provider._prefetch_result.startswith("prewarm"), \ + "session-start prewarm must land in _prefetch_result" + assert provider._last_dialectic_turn == 0, "prewarm marks turn 0" + assert mgr.dialectic_query.call_count == 1 + + # ---- turn 1: consume prewarm, no duplicate dialectic ---- + provider.on_turn_start(1, "hey") + inject1 = provider.prefetch("hey") + assert "prewarm" in inject1, "turn 1 must surface prewarm" + provider.sync_turn("hey", "hi there") + provider.queue_prefetch("hey") # cadence gate: (1-0)<3 → skip + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 1, \ + "turn 1 must not fire — prewarm covered it and cadence skips" + + # ---- turn 2: trivial 'ok' → skip everything ---- + mgr.prefetch_context.reset_mock() + provider.on_turn_start(2, "ok") + assert provider.prefetch("ok") == "", "trivial prompt must short-circuit injection" + provider.sync_turn("ok", "cool") + provider.queue_prefetch("ok") + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 1, "trivial must not fire dialectic" + assert mgr.prefetch_context.call_count == 0, "trivial must not fire context refresh" + + # ---- turn 3: slash '/help' → also skip ---- + provider.on_turn_start(3, "/help") + assert provider.prefetch("/help") == "" + provider.queue_prefetch("/help") + assert mgr.dialectic_query.call_count == 1 + + # ---- turn 4: long query → cadence fires + heuristic bumps ---- + long_q = "walk me through " + ("x " * 100) # ~200 chars → heuristic +1 + provider.on_turn_start(4, long_q) + provider.prefetch(long_q) + provider.sync_turn(long_q, "sure") + provider.queue_prefetch(long_q) # (4-0)≥3 → fires + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 2, "turn 4 cadence fire" + _, kwargs = mgr.dialectic_query.call_args + assert kwargs.get("reasoning_level") in ("medium", "high"), \ + f"long query must bump reasoning level above 'low'; got {kwargs.get('reasoning_level')}" + assert provider._last_dialectic_turn == 4, "cadence tracker advances on success" + + # ---- turns 5–6: cadence cooldown, no fires ---- + for t in (5, 6): + provider.on_turn_start(t, "tell me more") + provider.queue_prefetch("tell me more") + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 2, "turns 5–6 blocked by cadence window" + + # ---- turn 7: fires but silent failure (empty dialectic) ---- + provider.on_turn_start(7, "and then what") + provider.queue_prefetch("and then what") # (7-4)≥3 → fires + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 3, "turn 7 fires" + assert provider._last_dialectic_turn == 4, \ + "silent failure must NOT burn the cadence window" + + # ---- turn 8: retries because cadence didn't advance ---- + provider.on_turn_start(8, "try again") + provider.queue_prefetch("try again") # (8-4)≥3 → fires again + self._await_thread(provider) + assert mgr.dialectic_query.call_count == 4, \ + "turn 8 retries because turn 7's empty result didn't advance cadence" + assert provider._last_dialectic_turn == 8, "retry success advances" + + # ---- session end: flush messages ---- + provider.on_session_end([]) + mgr.flush_all.assert_called() + + +class TestReasoningHeuristic: + """Restored char-count heuristic for auto-injected dialectic reasoning level. + + Pre-9a0ab34c behavior: scale base up by query length, capped at + reasoning_level_cap. 'max' is reserved for explicit tool-path selection. + """ + + @staticmethod + def _make_provider(cfg_extra=None): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict( + api_key="test-key", enabled=True, recall_mode="hybrid", + dialectic_reasoning_level="low", reasoning_heuristic=True, + reasoning_level_cap="high", + ) + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-heuristic") + _settle_prewarm(provider) + return provider + + def test_short_query_stays_at_base(self): + p = self._make_provider() + assert p._apply_reasoning_heuristic("low", "hey") == "low" + + def test_medium_query_bumps_one_level(self): + p = self._make_provider() + q = "x" * 150 + assert p._apply_reasoning_heuristic("low", q) == "medium" + + def test_long_query_bumps_two_levels(self): + p = self._make_provider() + q = "x" * 500 + assert p._apply_reasoning_heuristic("low", q) == "high" + + def test_bump_respects_cap(self): + p = self._make_provider(cfg_extra={"reasoning_level_cap": "medium"}) + q = "x" * 500 # would hit 'high' without the cap + assert p._apply_reasoning_heuristic("low", q) == "medium" + + def test_max_never_auto_selected_with_default_cap(self): + p = self._make_provider(cfg_extra={"dialectic_reasoning_level": "high"}) + q = "x" * 500 # base=high, bump would push to 'max' + assert p._apply_reasoning_heuristic("high", q) == "high" + + def test_heuristic_disabled_returns_base(self): + p = self._make_provider(cfg_extra={"reasoning_heuristic": False}) + q = "x" * 500 + assert p._apply_reasoning_heuristic("low", q) == "low" + + def test_resolve_pass_level_applies_heuristic_at_base_mapping(self): + """Depth=1, pass 0 maps to 'base' → heuristic applies.""" + p = self._make_provider() + q = "x" * 150 + assert p._resolve_pass_level(0, query=q) == "medium" + + def test_resolve_pass_level_does_not_touch_explicit_per_pass(self): + """dialecticDepthLevels wins absolutely — no heuristic scaling.""" + p = self._make_provider(cfg_extra={"dialectic_depth_levels": ["minimal"]}) + q = "x" * 500 # heuristic would otherwise bump to 'high' + assert p._resolve_pass_level(0, query=q) == "minimal" + + def test_resolve_pass_level_does_not_touch_lighter_passes(self): + """Depth 3 pass 0 is hardcoded 'minimal' — heuristic must not bump it.""" + p = self._make_provider(cfg_extra={"dialectic_depth": 3}) + q = "x" * 500 + assert p._resolve_pass_level(0, query=q) == "minimal" + # But the 'base' pass (idx 1 for depth 3) does get heuristic + assert p._resolve_pass_level(1, query=q) == "high" + + # --------------------------------------------------------------------------- # set_peer_card None guard # --------------------------------------------------------------------------- diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 2040949d25..906a7c030e 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index f571c7d48f..181f30f7fa 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p }, "dialecticReasoningLevel": "low", "dialecticDynamic": true, - "dialecticCadence": 3, + "dialecticCadence": 1, "dialecticDepth": 1, "dialecticMaxChars": 600, "contextCadence": 1, From 5f9907c11616f30a03356900b8831b1fc98e7d31 Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 11:01:45 -0400 Subject: [PATCH 004/455] chore(honcho): drop docs from PR scope, scrub commentary - Revert website/docs and SKILL.md changes; docs unification handled separately - Scrub commit/PR refs and process narration from code comments and test docstrings (no behavior change) --- .../autonomous-ai-agents/honcho/SKILL.md | 6 ++-- plugins/memory/honcho/__init__.py | 27 ++++++-------- plugins/memory/honcho/cli.py | 2 +- plugins/memory/honcho/client.py | 10 +++--- tests/honcho_plugin/test_session.py | 35 ++++++------------- website/docs/user-guide/features/honcho.md | 4 +-- .../user-guide/features/memory-providers.md | 4 +-- 7 files changed, 33 insertions(+), 55 deletions(-) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index 5d03a54985..c60d2c6356 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` | Min turns between dialectic API calls | +| `dialecticCadence` | `3` | Min turns between dialectic API calls | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 1` (default) fires every turn; set to `3` or higher to throttle for cost. +Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. ### Depth (how many) @@ -368,7 +368,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index ac0f60279a..51345b8e92 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -393,13 +393,10 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho memory file migration skipped: %s", e) # ----- B7: Pre-warming at init ----- - # Context prewarm: warms peer.context() cache (base layer), consumed - # via pop_context_result() in prefetch(). - # Dialectic prewarm: fires a depth-aware cycle against the plugin's - # own _prefetch_result so turn 1 can consume it directly. Without this - # the first-turn sync path pays for a duplicate .chat() — and at - # depth>1 a single-pass session-start dialectic often returns weak - # output that multi-pass audit/reconciliation is meant to catch. + # Context prewarm warms peer.context() (base layer), consumed via + # pop_context_result() in prefetch(). Dialectic prewarm runs the + # full configured depth and writes into _prefetch_result so turn 1 + # consumes the result directly. if self._recall_mode in ("context", "hybrid"): try: self._manager.prefetch_context(self._session_key) @@ -555,8 +552,7 @@ class HonchoMemoryProvider(MemoryProvider): if self._injection_frequency == "first-turn" and self._turn_count > 1: return "" - # Skip trivial prompts — "ok", "yes", slash commands carry no semantic signal, - # so injecting user context there just burns tokens and can derail the reply. + # Trivial prompts ("ok", "yes", slash commands) carry no semantic signal. if self._is_trivial_prompt(query): return "" @@ -619,8 +615,8 @@ class HonchoMemoryProvider(MemoryProvider): if r and r.strip(): with self._prefetch_lock: self._prefetch_result = r - # Only advance cadence on a non-empty result so failures - # don't burn a 3-turn cooldown on nothing. + # Advance cadence only on a non-empty result so the next + # turn retries when the call returned nothing. self._last_dialectic_turn = _fired_at self._prefetch_thread = threading.Thread( @@ -711,9 +707,8 @@ class HonchoMemoryProvider(MemoryProvider): self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) return - # Advance cadence only on a non-empty result — otherwise a silent failure - # (empty dialectic, transient API error) would burn the full cadence window - # before the next retry, making it look like dialectic "never fires again". + # Cadence advances only on a non-empty result so empty returns + # (transient API error, sparse representation) retry next turn. _fired_at = self._turn_count def _run(): @@ -751,9 +746,7 @@ class HonchoMemoryProvider(MemoryProvider): _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max") - # Reasoning-level heuristic thresholds (restored from pre-9a0ab34c behavior). - # Promoted to class constants so tests can override without widening the - # config surface. Bump to config fields only if real use shows they're needed. + # Char-count thresholds for the query-length reasoning heuristic. _HEURISTIC_LENGTH_MEDIUM = 120 _HEURISTIC_LENGTH_HIGH = 400 diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 478bf39d8a..5cd25bfbab 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -463,7 +463,7 @@ def cmd_setup(args) -> None: current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (default), 3+ = sparse (cost-saving).") + print(" 1 = every turn (default), 3+ = sparse.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 136b1e60dc..346c2b76e6 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -251,13 +251,11 @@ class HonchoClientConfig: # matching dialectic_depth length. When None, uses proportional defaults # derived from dialectic_reasoning_level. dialectic_depth_levels: list[str] | None = None - # Reasoning-level heuristic for auto-injected dialectic calls. When true, - # scales the base level up on longer queries (restored from pre-#10619 - # behavior; see plugins/memory/honcho/__init__.py for thresholds). - # Never auto-selects a level above reasoning_level_cap. + # When true, the auto-injected dialectic scales reasoning level up on + # longer queries. See HonchoMemoryProvider for thresholds. reasoning_heuristic: bool = True - # Ceiling for heuristic-selected reasoning level. "max" is reserved for - # explicit tool-path selection; default "high" matches the old behavior. + # Ceiling for the heuristic-selected reasoning level. "max" is reserved + # for explicit tool-path selection. reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index b0282b1969..83db3f24dc 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -862,9 +862,7 @@ class TestDialecticCadenceDefaults: return provider def test_default_is_1(self): - """Default dialectic_cadence should be 1 (every turn) — restored from - pre-#10619 behavior to avoid a silent regression on upgrade for users - who never set dialecticCadence explicitly.""" + """Default dialectic_cadence is 1 — fires every turn unless overridden.""" provider = self._make_provider() assert provider._dialectic_cadence == 1 @@ -1112,10 +1110,7 @@ class TestDialecticDepth: class TestTrivialPromptHeuristic: - """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection. - - Restored after accidental removal during the two-layer prefetch refactor. - """ + """Trivial prompts ('ok', 'y', slash commands) must short-circuit injection.""" @staticmethod def _make_provider(): @@ -1173,11 +1168,9 @@ class TestTrivialPromptHeuristic: class TestDialecticCadenceAdvancesOnSuccess: - """Cadence tracker must only advance when the dialectic call actually returned. - - A silent failure (empty result, API blip) used to burn the full cadence window - before retrying — making it look like dialectic 'never fires again'. - """ + """Cadence tracker advances only when the dialectic call returns a + non-empty result. Empty results (transient API error, sparse representation) + must retry on the next eligible turn instead of waiting the full cadence.""" @staticmethod def _make_provider(): @@ -1329,13 +1322,9 @@ class TestSessionStartDialecticPrewarm: class TestDialecticLifecycleSmoke: - """End-to-end smoke: walks a realistic multi-turn session through every - behavior we care about — prewarm → turn 1 consume → trivial skip → cadence - fire → silent-failure retry → heuristic bump → session-end flush. - - This is the 'velvet circuit' test: one provider, one flow, one set of - assertions. If the suite above lies about intent, this one catches it. - """ + """End-to-end smoke walking a multi-turn session through prewarm, + turn 1 consume, trivial skip, cadence fire, empty-result retry, + heuristic bump, and session-end flush.""" @staticmethod def _make_provider(cfg_extra=None): @@ -1473,11 +1462,9 @@ class TestDialecticLifecycleSmoke: class TestReasoningHeuristic: - """Restored char-count heuristic for auto-injected dialectic reasoning level. - - Pre-9a0ab34c behavior: scale base up by query length, capped at - reasoning_level_cap. 'max' is reserved for explicit tool-path selection. - """ + """Char-count heuristic that scales the auto-injected reasoning level by + query length, clamped at reasoning_level_cap. 'max' is reserved for + explicit tool-path selection.""" @staticmethod def _make_provider(cfg_extra=None): diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 906a7c030e..2040949d25 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -104,7 +104,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index 181f30f7fa..f571c7d48f 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -181,7 +181,7 @@ This inherits settings from the default `hermes` host block and creates new AI p }, "dialecticReasoningLevel": "low", "dialecticDynamic": true, - "dialecticCadence": 1, + "dialecticCadence": 3, "dialecticDepth": 1, "dialecticMaxChars": 600, "contextCadence": 1, From 098efde848a1253033fedf04e8184ef843115e11 Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 12:45:04 -0400 Subject: [PATCH 005/455] docs(honcho): wizard cadence default 2, prewarm/depth + observation + multi-peer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - cli: setup wizard pre-fills dialecticCadence=2 (code default stays 1 so unset → every turn) - honcho.md: fix stale dialecticCadence default in tables, add Session-Start Prewarm subsection (depth runs at init), add Query-Adaptive Reasoning Level subsection, expand Observation section with directional vs unified semantics and per-peer patterns - memory-providers.md: fix stale default, rename Multi-agent/Profiles to Multi-peer setup, add concrete walkthrough for new profiles and sync, document observation toggles + presets, link to honcho.md - SKILL.md: fix stale defaults, add Depth at session start callout --- .../autonomous-ai-agents/honcho/SKILL.md | 8 ++- plugins/memory/honcho/cli.py | 6 +- website/docs/user-guide/features/honcho.md | 47 ++++++++++++++- .../user-guide/features/memory-providers.md | 59 ++++++++++++++++--- 4 files changed, 103 insertions(+), 17 deletions(-) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index c60d2c6356..e79875aa07 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,10 +145,10 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic API calls | +| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic API calls. Unset → every turn; wizard pre-fills `2` | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | -Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn. +Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn. ### Depth (how many) @@ -180,6 +180,8 @@ If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived This keeps earlier passes cheap while using full depth on the final synthesis. +**Depth at session start.** The session-start prewarm runs the full configured `dialecticDepth` in the background before turn 1. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. Turn 1 consumes the prewarm result directly; if prewarm hasn't landed in time, turn 1 falls back to a synchronous call with a bounded timeout. + ### Level (how hard) Controls the **intensity** of each dialectic reasoning round. @@ -368,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `3` | Min turns between dialectic LLM calls | +| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic LLM calls | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index 5cd25bfbab..c73dd66f39 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -460,17 +460,17 @@ def cmd_setup(args) -> None: pass # keep current # --- 7b. Dialectic cadence --- - current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "1") + current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn (default), 3+ = sparse.") + print(" 1 = every turn, 2 = every other turn (wizard default), 3+ = sparse.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) if val >= 1: hermes_host["dialecticCadence"] = val except (ValueError, TypeError): - hermes_host["dialecticCadence"] = 1 + hermes_host["dialecticCadence"] = 2 # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index 2040949d25..bf4b5c6bc3 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `3` | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` (code default) / `2` (setup wizard default) | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -94,6 +94,14 @@ Each pass uses a proportional reasoning level (lighter early passes, base level Passes bail out early if the prior pass returned strong signal (long, structured output), so depth 3 doesn't always mean 3 LLM calls. +### Session-Start Prewarm + +On session init, Honcho fires a dialectic call in the background at the full configured `dialecticDepth` and hands the result directly to turn 1's context assembly. A single-pass prewarm on a cold peer often returns thin output — multi-pass depth runs the audit/reconcile cycle before the user ever speaks. If prewarm hasn't landed by turn 1, turn 1 falls back to a synchronous call with a bounded timeout. + +### Query-Adaptive Reasoning Level + +The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. `"max"` is reserved for explicit tool-path selection via `honcho_reasoning`. + ## Configuration Options Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho.json` (profile-local). The setup wizard handles this for you. @@ -104,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Code default fires every turn when the key is unset; the setup wizard pre-fills `2`. In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -142,6 +150,41 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho In `tools` mode, the model is fully in control — it calls `honcho_reasoning` when it wants, at whatever `reasoning_level` it picks. Cadence and budget settings only apply to modes with auto-injection (`hybrid` and `context`). +## Observation (Directional vs. Unified) + +Honcho models a conversation as peers exchanging messages. Each peer has two observation toggles that map 1:1 to Honcho's `SessionPeerConfig`: + +| Toggle | Effect | +|--------|--------| +| `observeMe` | Honcho builds a representation of this peer from its own messages | +| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) | + +Two peers × two toggles = four flags. `observationMode` is a shorthand preset: + +| Preset | User flags | AI flags | Semantics | +|--------|-----------|----------|-----------| +| `"directional"` (default) | me: on, others: on | me: on, others: on | Full mutual observation. Enables cross-peer dialectic — "what does the AI know about the user, based on what the user said and the AI replied." | +| `"unified"` | me: on, others: off | me: off, others: on | Shared-pool semantics — the AI observes the user's messages only, the user peer only self-models. Single-observer pool. | + +Override the preset with an explicit `observation` block for per-peer control: + +```json +"observation": { + "user": { "observeMe": true, "observeOthers": true }, + "ai": { "observeMe": true, "observeOthers": false } +} +``` + +Common patterns: + +| Intent | Config | +|--------|--------| +| Full observation (most users) | `"observationMode": "directional"` | +| AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` | +| Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` | + +Server-side toggles set via the Honcho dashboard win over local defaults — Hermes syncs them back at session init. + ## Tools When Honcho is active as the memory provider, five tools become available: diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index f571c7d48f..b2469a13ee 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `3` | Minimum turns between `peer.chat()` LLM calls. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls. Unset → every turn; wizard pre-fills `2`. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -140,23 +140,64 @@ hermes memory setup # select "honcho" If you previously used `hermes honcho setup`, your config and all server-side data are intact. Just re-enable through the setup wizard again or manually set `memory.provider: honcho` to reactivate via the new system. ::: -**Multi-agent / Profiles:** +**Multi-peer setup:** -Each Hermes profile gets its own Honcho AI peer while sharing the same workspace -- all profiles see the same user representation, but each agent builds its own identity and observations. +Honcho models conversations as peers exchanging messages — one user peer plus one AI peer per Hermes profile, all sharing a workspace. The workspace is the shared environment: the user peer is global across profiles, each AI peer is its own identity. Every AI peer builds an independent representation / card from its own observations, so a `coder` profile stays code-oriented while a `writer` profile stays editorial against the same user. + +The mapping: + +| Concept | What it is | +|---------|-----------| +| **Workspace** | Shared environment. All Hermes profiles under one workspace see the same user identity. | +| **User peer** (`peerName`) | The human. Shared across profiles in the workspace. | +| **AI peer** (`aiPeer`) | One per Hermes profile. Host key `hermes` → default; `hermes.` for others. | +| **Observation** | Per-peer toggles controlling what Honcho models from whose messages. `directional` (default, all four on) or `unified` (single-observer pool). | + +### New profile, fresh Honcho peer ```bash -hermes profile create coder --clone # creates honcho peer "coder", inherits config from default +hermes profile create coder --clone ``` -What `--clone` does: creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The peer is eagerly created in Honcho so it exists before first message. +`--clone` creates a `hermes.coder` host block in `honcho.json` with `aiPeer: "coder"`, shared `workspace`, inherited `peerName`, `recallMode`, `writeFrequency`, `observation`, etc. The AI peer is eagerly created in Honcho so it exists before the first message. -For profiles created before Honcho was set up: +### Existing profiles, backfill Honcho peers ```bash -hermes honcho sync # scans all profiles, creates host blocks for any missing ones +hermes honcho sync ``` -This inherits settings from the default `hermes` host block and creates new AI peers for each profile. Idempotent -- skips profiles that already have a host block. +Scans every Hermes profile, creates host blocks for any profile without one, inherits settings from the default `hermes` block, and creates the new AI peers eagerly. Idempotent — skips profiles that already have a host block. + +### Per-profile observation + +Each host block can override the observation config independently. Example: a code-focused profile where the AI peer observes the user but doesn't self-model: + +```json +"hermes.coder": { + "aiPeer": "coder", + "observation": { + "user": { "observeMe": true, "observeOthers": true }, + "ai": { "observeMe": false, "observeOthers": true } + } +} +``` + +**Observation toggles (one set per peer):** + +| Toggle | Effect | +|--------|--------| +| `observeMe` | Honcho builds a representation of this peer from its own messages | +| `observeOthers` | This peer observes the other peer's messages (feeds cross-peer reasoning) | + +Presets via `observationMode`: + +- **`"directional"`** (default) — all four flags on. Full mutual observation; enables cross-peer dialectic. +- **`"unified"`** — user `observeMe: true`, AI `observeOthers: true`, rest false. Single-observer pool; AI models the user but not itself, user peer only self-models. + +Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — synced back at session init. + +See the [Honcho page](./honcho.md#observation-directional-vs-unified) for the full observation reference.
Full honcho.json example (multi-profile) @@ -181,7 +222,7 @@ This inherits settings from the default `hermes` host block and creates new AI p }, "dialecticReasoningLevel": "low", "dialecticDynamic": true, - "dialecticCadence": 3, + "dialecticCadence": 2, "dialecticDepth": 1, "dialecticMaxChars": 600, "contextCadence": 1, From c630dfcdac4a64a3d55aa8724c7ca3bdd7e64b85 Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 13:07:09 -0400 Subject: [PATCH 006/455] =?UTF-8?q?feat(honcho):=20dialectic=20liveness=20?= =?UTF-8?q?=E2=80=94=20stale-thread=20watchdog,=20stale-result=20discard,?= =?UTF-8?q?=20empty-streak=20backoff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hardens the dialectic lifecycle against three failure modes that could leave the prefetch pipeline stuck or injecting stale content: - Stale-thread watchdog: _thread_is_live() treats any prefetch thread older than timeout × 2.0 as dead. A hung Honcho call can no longer block subsequent fires indefinitely. - Stale-result discard: pending _prefetch_result is tagged with its fire turn. prefetch() discards the result if more than cadence × 2 turns passed before a consumer read it (e.g. a run of trivial-prompt turns between fire and read). - Empty-streak backoff: consecutive empty dialectic returns widen the effective cadence (dialectic_cadence + streak, capped at cadence × 8). A healthy fire resets the streak. Prevents the plugin from hammering the backend every turn when the peer graph is cold. - liveness_snapshot() on the provider exposes current turn, last fire, pending fire-at, empty streak, effective cadence, and thread status for in-process diagnostics. - system_prompt_block: nudge the model that honcho_reasoning accepts reasoning_level minimal/low/medium/high/max per call. - hermes honcho status: surface base reasoning level, cap, and heuristic toggle so config drift is visible at a glance. Tests: 550 passed. - TestDialecticLiveness (8 tests): stale-thread recovery, stale-result discard, fresh-result retention, backoff widening, backoff ceiling, streak reset on success, streak increment on empty, snapshot shape. - Existing TestDialecticCadenceAdvancesOnSuccess::test_in_flight_thread_is_not_stacked updated to set _prefetch_thread_started_at so it tests the fresh-thread-blocks branch (stale path covered separately). - test_cli TestCmdStatus fake updated with the new config attrs surfaced in the status block. --- plugins/memory/honcho/__init__.py | 120 +++++++++++++++++++-- plugins/memory/honcho/cli.py | 3 + tests/honcho_plugin/test_cli.py | 3 + tests/honcho_plugin/test_session.py | 156 +++++++++++++++++++++++++++- 4 files changed, 266 insertions(+), 16 deletions(-) diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index 51345b8e92..68fa868855 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -19,6 +19,7 @@ import json import logging import re import threading +import time from typing import Any, Dict, List, Optional from agent.memory_provider import MemoryProvider @@ -214,6 +215,11 @@ class HonchoMemoryProvider(MemoryProvider): self._last_context_turn = -999 self._last_dialectic_turn = -999 + # Liveness + observability state + self._prefetch_thread_started_at: float = 0.0 # monotonic ts of current thread + self._prefetch_result_fired_at: int = -999 # turn the pending result was fired at + self._dialectic_empty_streak: int = 0 # consecutive empty returns + # Port #1957: lazy session init for tools-only mode self._session_initialized = False self._lazy_init_kwargs: Optional[dict] = None @@ -413,13 +419,19 @@ class HonchoMemoryProvider(MemoryProvider): r = self._run_dialectic_depth(_prewarm_query) except Exception as exc: logger.debug("Honcho dialectic prewarm failed: %s", exc) + self._dialectic_empty_streak += 1 return if r and r.strip(): with self._prefetch_lock: self._prefetch_result = r + self._prefetch_result_fired_at = 0 # Treat prewarm as turn 0 so cadence gating starts clean. self._last_dialectic_turn = 0 + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + self._prefetch_thread_started_at = time.monotonic() self._prefetch_thread = threading.Thread( target=_prewarm_dialectic, daemon=True, name="honcho-prewarm-dialectic" ) @@ -513,7 +525,8 @@ class HonchoMemoryProvider(MemoryProvider): "# Honcho Memory\n" "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, " "honcho_search for raw excerpts, honcho_context for raw peer context, " - "honcho_reasoning for synthesized answers, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user. " "No automatic context injection — you must use tools to access memory." ) @@ -523,7 +536,8 @@ class HonchoMemoryProvider(MemoryProvider): "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. " "Use honcho_profile for a quick factual snapshot, " "honcho_search for raw excerpts, honcho_context for raw peer context, " - "honcho_reasoning for synthesized answers, " + "honcho_reasoning for synthesized answers (pass reasoning_level " + "minimal/low/medium/high/max — you pick the depth per call), " "honcho_conclude to save facts about the user." ) @@ -611,14 +625,20 @@ class HonchoMemoryProvider(MemoryProvider): r = self._run_dialectic_depth(query) except Exception as exc: logger.debug("Honcho first-turn dialectic failed: %s", exc) + self._dialectic_empty_streak += 1 return if r and r.strip(): with self._prefetch_lock: self._prefetch_result = r + self._prefetch_result_fired_at = _fired_at # Advance cadence only on a non-empty result so the next # turn retries when the call returned nothing. self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + self._prefetch_thread_started_at = time.monotonic() self._prefetch_thread = threading.Thread( target=_run_first_turn, daemon=True, name="honcho-prefetch-first" ) @@ -635,7 +655,21 @@ class HonchoMemoryProvider(MemoryProvider): self._prefetch_thread.join(timeout=3.0) with self._prefetch_lock: dialectic_result = self._prefetch_result + fired_at = self._prefetch_result_fired_at self._prefetch_result = "" + self._prefetch_result_fired_at = -999 + + # Discard stale pending results: if the fire happened more than + # cadence × multiplier turns ago (e.g. a run of trivial-prompt turns + # passed without consumption), the content likely no longer tracks + # the current conversational pivot. + stale_limit = self._dialectic_cadence * self._STALE_RESULT_MULTIPLIER + if dialectic_result and fired_at >= 0 and (self._turn_count - fired_at) > stale_limit: + logger.debug( + "Honcho pending dialectic discarded as stale: fired_at=%d, " + "turn=%d, limit=%d", fired_at, self._turn_count, stale_limit, + ) + dialectic_result = "" if dialectic_result and dialectic_result.strip(): parts.append(dialectic_result) @@ -693,18 +727,23 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho context prefetch failed: %s", e) # ----- Dialectic prefetch (supplement layer) ----- - # Guard against thread pile-up: if a prior dialectic is still in flight, - # let it finish instead of stacking races on _prefetch_result. - if self._prefetch_thread and self._prefetch_thread.is_alive(): + # Thread-alive guard with stale-thread recovery: a hung Honcho call + # older than timeout × multiplier is treated as dead so it can't + # block subsequent fires. + if self._thread_is_live(): logger.debug("Honcho dialectic prefetch skipped: prior thread still running") return - # B5: cadence check — skip if too soon since last *successful* dialectic call. - # The gate applies uniformly (including cadence=1): "every turn" means once - # per turn, not twice on the same turn when first-turn sync already fired. - if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence: - logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d", - self._dialectic_cadence, self._turn_count - self._last_dialectic_turn) + # Cadence gate, widened by the empty-streak backoff so a persistently + # silent backend doesn't retry every turn forever. + effective = self._effective_cadence() + if (self._turn_count - self._last_dialectic_turn) < effective: + logger.debug( + "Honcho dialectic prefetch skipped: effective cadence %d " + "(base %d, empty streak %d), turns since last: %d", + effective, self._dialectic_cadence, self._dialectic_empty_streak, + self._turn_count - self._last_dialectic_turn, + ) return # Cadence advances only on a non-empty result so empty returns @@ -716,12 +755,18 @@ class HonchoMemoryProvider(MemoryProvider): result = self._run_dialectic_depth(query) except Exception as e: logger.debug("Honcho prefetch failed: %s", e) + self._dialectic_empty_streak += 1 return if result and result.strip(): with self._prefetch_lock: self._prefetch_result = result + self._prefetch_result_fired_at = _fired_at self._last_dialectic_turn = _fired_at + self._dialectic_empty_streak = 0 + else: + self._dialectic_empty_streak += 1 + self._prefetch_thread_started_at = time.monotonic() self._prefetch_thread = threading.Thread( target=_run, daemon=True, name="honcho-prefetch" ) @@ -750,6 +795,59 @@ class HonchoMemoryProvider(MemoryProvider): _HEURISTIC_LENGTH_MEDIUM = 120 _HEURISTIC_LENGTH_HIGH = 400 + # Liveness constants. A thread older than timeout × multiplier is treated + # as dead so a hung Honcho call can't block future retries indefinitely. + _STALE_THREAD_MULTIPLIER = 2.0 + # Pending result whose fire-turn is older than cadence × multiplier is + # discarded on read so we don't inject context for a stale conversational + # pivot after a gap of trivial-prompt turns. + _STALE_RESULT_MULTIPLIER = 2 + # Cap on the empty-streak backoff so a persistently silent backend + # eventually settles on a ceiling instead of unbounded widening. + _BACKOFF_MAX = 8 + + def _thread_is_live(self) -> bool: + """Thread-alive guard that treats threads older than the stale + threshold as dead, so a hung Honcho request can't block new fires.""" + if not self._prefetch_thread or not self._prefetch_thread.is_alive(): + return False + timeout = (self._config.timeout if self._config and self._config.timeout else 8.0) + age = time.monotonic() - self._prefetch_thread_started_at + if age > timeout * self._STALE_THREAD_MULTIPLIER: + logger.debug( + "Honcho prefetch thread age %.1fs exceeds stale threshold " + "%.1fs — treating as dead", age, timeout * self._STALE_THREAD_MULTIPLIER, + ) + return False + return True + + def _effective_cadence(self) -> int: + """Cadence plus empty-streak backoff, capped at _BACKOFF_MAX × base.""" + if self._dialectic_empty_streak <= 0: + return self._dialectic_cadence + widened = self._dialectic_cadence + self._dialectic_empty_streak + ceiling = self._dialectic_cadence * self._BACKOFF_MAX + return min(widened, ceiling) + + def liveness_snapshot(self) -> dict: + """In-process snapshot of dialectic liveness state for diagnostics. + + Returns current turn, last successful dialectic turn, pending-result + fire turn, empty streak, effective cadence, and thread status. + """ + thread_age = None + if self._prefetch_thread and self._prefetch_thread.is_alive(): + thread_age = time.monotonic() - self._prefetch_thread_started_at + return { + "turn_count": self._turn_count, + "last_dialectic_turn": self._last_dialectic_turn, + "pending_result_fired_at": self._prefetch_result_fired_at, + "empty_streak": self._dialectic_empty_streak, + "effective_cadence": self._effective_cadence(), + "thread_alive": thread_age is not None, + "thread_age_seconds": thread_age, + } + def _apply_reasoning_heuristic(self, base: str, query: str) -> str: """Scale `base` up by query length, clamped at reasoning_level_cap. diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index c73dd66f39..eb21c48eaa 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -638,6 +638,9 @@ def cmd_status(args) -> None: raw = getattr(hcfg, "raw", None) or {} dialectic_cadence = raw.get("dialecticCadence") or 1 print(f" Dialectic cad: every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}") + reasoning_cap = raw.get("reasoningLevelCap") or hcfg.reasoning_level_cap + heuristic_on = "on" if hcfg.reasoning_heuristic else "off" + print(f" Reasoning: base={hcfg.dialectic_reasoning_level}, cap={reasoning_cap}, heuristic={heuristic_on}") print(f" Observation: user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})") print(f" Write freq: {hcfg.write_frequency}") diff --git a/tests/honcho_plugin/test_cli.py b/tests/honcho_plugin/test_cli.py index 006d687dc1..a6fc39ea7c 100644 --- a/tests/honcho_plugin/test_cli.py +++ b/tests/honcho_plugin/test_cli.py @@ -26,6 +26,9 @@ class TestCmdStatus: write_frequency = "async" session_strategy = "per-session" context_tokens = 800 + dialectic_reasoning_level = "low" + reasoning_level_cap = "high" + reasoning_heuristic = True def resolve_session_name(self): return "hermes" diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index 83db3f24dc..37f54b5410 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -823,8 +823,11 @@ def _settle_prewarm(provider): provider._prefetch_thread.join(timeout=3.0) with provider._prefetch_lock: provider._prefetch_result = "" + provider._prefetch_result_fired_at = -999 provider._prefetch_thread = None + provider._prefetch_thread_started_at = 0.0 provider._last_dialectic_turn = -999 + provider._dialectic_empty_streak = 0 if getattr(provider, "_manager", None) is not None: try: provider._manager.dialectic_query.reset_mock() @@ -1227,26 +1230,28 @@ class TestDialecticCadenceAdvancesOnSuccess: def test_in_flight_thread_is_not_stacked(self): import threading as _threading + import time as _time provider = self._make_provider() provider._session_key = "test" provider._turn_count = 10 provider._last_dialectic_turn = 0 - # Simulate a prior thread still running + # Simulate a prior thread still running (fresh, not stale) hold = _threading.Event() def _block(): hold.wait(timeout=5.0) - stale = _threading.Thread(target=_block, daemon=True) - stale.start() - provider._prefetch_thread = stale + fresh = _threading.Thread(target=_block, daemon=True) + fresh.start() + provider._prefetch_thread = fresh + provider._prefetch_thread_started_at = _time.monotonic() # fresh start provider.queue_prefetch("hello") # Should have short-circuited — no new dialectic call assert provider._manager.dialectic_query.call_count == 0 hold.set() - stale.join(timeout=2.0) + fresh.join(timeout=2.0) class TestSessionStartDialecticPrewarm: @@ -1321,6 +1326,147 @@ class TestSessionStartDialecticPrewarm: assert p._manager.dialectic_query.call_count == 1 +class TestDialecticLiveness: + """Liveness + observability: stale-thread recovery, stale-result discard, + empty-streak backoff, and the snapshot method used for diagnostics.""" + + @staticmethod + def _make_provider(cfg_extra=None): + from unittest.mock import patch, MagicMock + from plugins.memory.honcho.client import HonchoClientConfig + + defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid", timeout=2.0) + if cfg_extra: + defaults.update(cfg_extra) + cfg = HonchoClientConfig(**defaults) + provider = HonchoMemoryProvider() + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + mock_manager.get_prefetch_context.return_value = None + mock_manager.pop_context_result.return_value = None + mock_manager.dialectic_query.return_value = "" # default: silent + + with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ + patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("hermes_constants.get_hermes_home", return_value=MagicMock()): + provider.initialize(session_id="test-liveness") + _settle_prewarm(provider) + return provider + + def test_stale_thread_is_treated_as_dead(self): + """A thread older than timeout × multiplier no longer blocks new fires.""" + import threading as _threading + p = self._make_provider() + p._session_key = "test" + p._turn_count = 10 + p._last_dialectic_turn = 0 + p._manager.dialectic_query.return_value = "fresh synthesis" + + # Plant an alive thread with an old timestamp (stale) + hold = _threading.Event() + stuck = _threading.Thread(target=lambda: hold.wait(timeout=10.0), daemon=True) + stuck.start() + p._prefetch_thread = stuck + # timeout=2.0, multiplier=2.0, so anything older than 4s is stale + p._prefetch_thread_started_at = 0.0 # very old (1970 monotonic baseline) + + p.queue_prefetch("hello") + # New thread should have been spawned since stuck one is stale + assert p._prefetch_thread is not stuck, "stale thread must be recycled" + if p._prefetch_thread: + p._prefetch_thread.join(timeout=2.0) + assert p._manager.dialectic_query.call_count == 1 + hold.set() + stuck.join(timeout=2.0) + + def test_stale_pending_result_is_discarded_on_read(self): + """A pending dialectic result from many turns ago is discarded + instead of injected against a fresh conversational pivot.""" + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}}) + p._session_key = "test" + p._base_context_cache = "base ctx" + with p._prefetch_lock: + p._prefetch_result = "ancient synthesis" + p._prefetch_result_fired_at = 1 + # cadence=2, multiplier=2 → stale after 4 turns since fire + p._turn_count = 10 + p._last_dialectic_turn = 1 # prevents sync first-turn path + + result = p.prefetch("what's new") + assert "ancient synthesis" not in result, "stale pending must be discarded" + # Cache slot cleared + with p._prefetch_lock: + assert p._prefetch_result == "" + assert p._prefetch_result_fired_at == -999 + + def test_fresh_pending_result_is_kept(self): + """A pending result within the staleness window is injected normally.""" + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 3}}) + p._session_key = "test" + p._base_context_cache = "" + with p._prefetch_lock: + p._prefetch_result = "recent synthesis" + p._prefetch_result_fired_at = 8 + p._turn_count = 9 # 1 turn since fire, well within cadence × 2 = 6 + p._last_dialectic_turn = 8 + + result = p.prefetch("what's new") + assert "recent synthesis" in result + + def test_empty_streak_widens_effective_cadence(self): + """After N empty returns, the gate waits cadence + N turns.""" + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}}) + p._dialectic_empty_streak = 3 + # cadence=1, streak=3 → effective = 4 + assert p._effective_cadence() == 4 + + def test_backoff_is_capped(self): + """Effective cadence is capped at cadence × _BACKOFF_MAX.""" + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 2}}) + p._dialectic_empty_streak = 100 + # cadence=2, ceiling = 2 × 8 = 16 + assert p._effective_cadence() == 16 + + def test_success_resets_empty_streak(self): + """A non-empty result zeroes the streak so healthy operation restores + the base cadence immediately.""" + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}}) + p._session_key = "test" + p._dialectic_empty_streak = 5 + p._turn_count = 10 + p._last_dialectic_turn = 0 + p._manager.dialectic_query.return_value = "real output" + + p.queue_prefetch("hello") + if p._prefetch_thread: + p._prefetch_thread.join(timeout=2.0) + assert p._dialectic_empty_streak == 0 + assert p._last_dialectic_turn == 10 + + def test_empty_result_increments_streak(self): + p = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 1}}) + p._session_key = "test" + p._turn_count = 5 + p._last_dialectic_turn = 0 + p._manager.dialectic_query.return_value = "" # empty + + p.queue_prefetch("hello") + if p._prefetch_thread: + p._prefetch_thread.join(timeout=2.0) + assert p._dialectic_empty_streak == 1 + assert p._last_dialectic_turn == 0 # cadence not advanced + + def test_liveness_snapshot_shape(self): + p = self._make_provider() + snap = p.liveness_snapshot() + for key in ( + "turn_count", "last_dialectic_turn", "pending_result_fired_at", + "empty_streak", "effective_cadence", "thread_alive", "thread_age_seconds", + ): + assert key in snap + + class TestDialecticLifecycleSmoke: """End-to-end smoke walking a multi-turn session through prewarm, turn 1 consume, trivial skip, cadence fire, empty-result retry, From ba7da73ca931bcdaf64de294c8c9551e0b3615b1 Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 13:17:44 -0400 Subject: [PATCH 007/455] test(honcho): drop two first-turn tests subsumed by prewarm + smoke coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - TestDialecticDepth::test_first_turn_runs_dialectic_synchronously: covered by TestSessionStartDialecticPrewarm::test_turn1_falls_back_to_sync_when_prewarm_missing (more realistic — exercises the empty-prewarm → sync-fallback path) - TestDialecticDepth::test_first_turn_dialectic_does_not_double_fire: covered by TestDialecticLifecycleSmoke (turn 1 flow) and TestDialecticCadenceAdvancesOnSuccess::test_empty_dialectic_result_does_not_advance_cadence Both predate the prewarm refactor and test paths that are now fallback behaviors already covered elsewhere. --- tests/honcho_plugin/test_session.py | 41 ----------------------------- 1 file changed, 41 deletions(-) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index 37f54b5410..7b5ac7e3d0 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -1050,47 +1050,6 @@ class TestDialecticDepth: assert provider._manager.dialectic_query.call_count == 2 assert "Synthesis" in result - def test_first_turn_runs_dialectic_synchronously(self): - """First turn should fire the dialectic synchronously (cold start).""" - from unittest.mock import MagicMock, patch - provider = self._make_provider(cfg_extra={"dialectic_depth": 1}) - provider._manager = MagicMock() - provider._manager.dialectic_query.return_value = "cold start synthesis" - provider._manager.get_prefetch_context.return_value = None - provider._manager.pop_context_result.return_value = None - provider._session_key = "test" - provider._base_context_cache = "" # cold start - provider._last_dialectic_turn = -999 # never fired - - result = provider.prefetch("hello world") - assert "cold start synthesis" in result - assert provider._manager.dialectic_query.call_count == 1 - # After first-turn sync, _last_dialectic_turn should be updated - assert provider._last_dialectic_turn != -999 - - def test_first_turn_dialectic_does_not_double_fire(self): - """After first-turn sync dialectic, queue_prefetch should skip (cadence).""" - from unittest.mock import MagicMock - provider = self._make_provider(cfg_extra={"dialectic_depth": 1}) - provider._manager = MagicMock() - provider._manager.dialectic_query.return_value = "cold start synthesis" - provider._manager.get_prefetch_context.return_value = None - provider._manager.pop_context_result.return_value = None - provider._session_key = "test" - provider._base_context_cache = "" - provider._last_dialectic_turn = -999 - provider._turn_count = 0 - - # First turn fires sync dialectic - provider.prefetch("hello") - assert provider._manager.dialectic_query.call_count == 1 - - # Now queue_prefetch on same turn should skip — _last_dialectic_turn - # was just set to _turn_count by the sync path, so (0 - 0 = 0) < cadence. - provider._manager.dialectic_query.reset_mock() - provider.queue_prefetch("hello") - assert provider._manager.dialectic_query.call_count == 0 - def test_run_dialectic_depth_bails_early_on_strong_signal(self): """Depth 2 skips pass 1 when pass 0 returns strong signal.""" from unittest.mock import MagicMock From 5b6792f04d973f996fcb981ae570e674472c3d4d Mon Sep 17 00:00:00 2001 From: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:49:31 +0800 Subject: [PATCH 008/455] fix(honcho): scope gateway sessions by runtime user id --- plugins/memory/honcho/__init__.py | 9 +--- plugins/memory/honcho/session.py | 9 +++- tests/agent/test_memory_user_id.py | 65 +++++++++++++++++++++++++---- tests/honcho_plugin/test_session.py | 21 +++++----- 4 files changed, 75 insertions(+), 29 deletions(-) diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index 68fa868855..d104deb5d5 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -293,14 +293,6 @@ class HonchoMemoryProvider(MemoryProvider): logger.debug("Honcho not configured — plugin inactive") return - # Override peer_name with gateway user_id for per-user memory scoping. - # Only when no explicit peerName was configured — an explicit peerName - # means the user chose their identity; a raw user_id (e.g. Telegram - # chat ID) should not silently replace it. - _gw_user_id = kwargs.get("user_id") - if _gw_user_id and not cfg.peer_name: - cfg.peer_name = _gw_user_id - self._config = cfg # ----- B1: recall_mode from config ----- @@ -359,6 +351,7 @@ class HonchoMemoryProvider(MemoryProvider): honcho=client, config=cfg, context_tokens=cfg.context_tokens, + runtime_user_peer_name=kwargs.get("user_id") or None, ) # ----- B3: resolve_session_name ----- diff --git a/plugins/memory/honcho/session.py b/plugins/memory/honcho/session.py index 7344b517e4..79625b5cd5 100644 --- a/plugins/memory/honcho/session.py +++ b/plugins/memory/honcho/session.py @@ -78,6 +78,7 @@ class HonchoSessionManager: honcho: Honcho | None = None, context_tokens: int | None = None, config: Any | None = None, + runtime_user_peer_name: str | None = None, ): """ Initialize the session manager. @@ -87,10 +88,12 @@ class HonchoSessionManager: context_tokens: Max tokens for context() calls (None = Honcho default). config: HonchoClientConfig from global config (provides peer_name, ai_peer, write_frequency, observation, etc.). + runtime_user_peer_name: Gateway user identity for per-user memory scoping. """ self._honcho = honcho self._context_tokens = context_tokens self._config = config + self._runtime_user_peer_name = runtime_user_peer_name self._cache: dict[str, HonchoSession] = {} self._peers_cache: dict[str, Any] = {} self._sessions_cache: dict[str, Any] = {} @@ -274,8 +277,10 @@ class HonchoSessionManager: logger.debug("Local session cache hit: %s", key) return self._cache[key] - # Use peer names from global config when available - if self._config and self._config.peer_name: + # Gateway sessions should use the runtime user identity when available. + if self._runtime_user_peer_name: + user_peer_id = self._sanitize_id(self._runtime_user_peer_name) + elif self._config and self._config.peer_name: user_peer_id = self._sanitize_id(self._config.peer_name) else: # Fallback: derive from session key diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py index c1b82208d0..d33753bd2e 100644 --- a/tests/agent/test_memory_user_id.py +++ b/tests/agent/test_memory_user_id.py @@ -208,34 +208,81 @@ class TestMem0UserIdScoping: class TestHonchoUserIdScoping: - """Verify Honcho plugin uses gateway user_id for peer_name when provided.""" + """Verify Honcho plugin keeps runtime user scoping separate from config peer_name.""" - def test_gateway_user_id_overrides_peer_name(self): - """When user_id is in kwargs and no explicit peer_name, user_id should be used.""" + def test_gateway_user_id_is_passed_as_runtime_peer(self): + """Gateway user_id should scope Honcho sessions without mutating config peer_name.""" from plugins.memory.honcho import HonchoMemoryProvider provider = HonchoMemoryProvider() - # Create a mock config with NO explicit peer_name mock_cfg = MagicMock() mock_cfg.enabled = True mock_cfg.api_key = "test-key" mock_cfg.base_url = None - mock_cfg.peer_name = "" # No explicit peer_name — user_id should fill it - mock_cfg.recall_mode = "tools" # Use tools mode to defer session init + mock_cfg.peer_name = "static-user" + mock_cfg.recall_mode = "context" + mock_cfg.context_tokens = None + mock_cfg.raw = {} + mock_cfg.dialectic_depth = 1 + mock_cfg.dialectic_depth_levels = None + mock_cfg.init_on_session_start = False + mock_cfg.ai_peer = "hermes" + mock_cfg.resolve_session_name.return_value = "test-sess" + mock_cfg.session_strategy = "shared" with patch( "plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=mock_cfg, - ): + ), patch( + "plugins.memory.honcho.client.get_honcho_client", + return_value=MagicMock(), + ), patch( + "plugins.memory.honcho.session.HonchoSessionManager", + ) as mock_manager_cls: + mock_manager = MagicMock() + mock_manager.get_or_create.return_value = MagicMock(messages=[]) + mock_manager_cls.return_value = mock_manager provider.initialize( session_id="test-sess", user_id="discord_user_789", platform="discord", ) - # The config's peer_name should have been overridden with the user_id - assert mock_cfg.peer_name == "discord_user_789" + assert mock_cfg.peer_name == "static-user" + assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "discord_user_789" + + def test_session_manager_prefers_runtime_user_id_over_config_peer_name(self): + """Session manager should isolate gateway users even when config peer_name is static.""" + from plugins.memory.honcho.session import HonchoSessionManager + + mock_cfg = MagicMock() + mock_cfg.peer_name = "static-user" + mock_cfg.ai_peer = "hermes" + mock_cfg.write_frequency = "sync" + mock_cfg.dialectic_reasoning_level = "low" + mock_cfg.dialectic_dynamic = True + mock_cfg.dialectic_max_chars = 600 + mock_cfg.observation_mode = "directional" + mock_cfg.user_observe_me = True + mock_cfg.user_observe_others = True + mock_cfg.ai_observe_me = True + mock_cfg.ai_observe_others = True + + manager = HonchoSessionManager( + honcho=MagicMock(), + config=mock_cfg, + runtime_user_peer_name="discord_user_789", + ) + + with patch.object(manager, "_get_or_create_peer", return_value=MagicMock()), patch.object( + manager, + "_get_or_create_honcho_session", + return_value=(MagicMock(), []), + ): + session = manager.get_or_create("discord:channel-1") + + assert session.user_peer_id == "discord_user_789" def test_no_user_id_preserves_config_peer_name(self): """Without user_id, the config peer_name should be preserved.""" diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index 7b5ac7e3d0..f2a6602929 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -568,15 +568,15 @@ class TestToolsModeInitBehavior: with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \ patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \ - patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \ + patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager) as mock_manager_cls, \ patch("hermes_constants.get_hermes_home", return_value=MagicMock()): provider.initialize(session_id="test-session-001", **init_kwargs) - return provider, cfg + return provider, cfg, mock_manager_cls def test_tools_lazy_default(self): """tools + initOnSessionStart=false → session NOT initialized after initialize().""" - provider, _ = self._make_provider_with_config( + provider, _, _ = self._make_provider_with_config( recall_mode="tools", init_on_session_start=False, ) assert provider._session_initialized is False @@ -585,7 +585,7 @@ class TestToolsModeInitBehavior: def test_tools_eager_init(self): """tools + initOnSessionStart=true → session IS initialized after initialize().""" - provider, _ = self._make_provider_with_config( + provider, _, _ = self._make_provider_with_config( recall_mode="tools", init_on_session_start=True, ) assert provider._session_initialized is True @@ -593,33 +593,34 @@ class TestToolsModeInitBehavior: def test_tools_eager_prefetch_still_empty(self): """tools mode with eager init still returns empty from prefetch() (no auto-injection).""" - provider, _ = self._make_provider_with_config( + provider, _, _ = self._make_provider_with_config( recall_mode="tools", init_on_session_start=True, ) assert provider.prefetch("test query") == "" def test_tools_lazy_prefetch_empty(self): """tools mode with lazy init also returns empty from prefetch().""" - provider, _ = self._make_provider_with_config( + provider, _, _ = self._make_provider_with_config( recall_mode="tools", init_on_session_start=False, ) assert provider.prefetch("test query") == "" def test_explicit_peer_name_not_overridden_by_user_id(self): """Explicit peerName in config must not be replaced by gateway user_id.""" - _, cfg = self._make_provider_with_config( + _, cfg, _ = self._make_provider_with_config( recall_mode="tools", init_on_session_start=True, peer_name="Kathie", user_id="8439114563", ) assert cfg.peer_name == "Kathie" def test_user_id_used_when_no_peer_name(self): - """Gateway user_id is used as peer_name when no explicit peerName configured.""" - _, cfg = self._make_provider_with_config( + """Gateway user_id is passed separately from config peer_name.""" + _, cfg, mock_manager_cls = self._make_provider_with_config( recall_mode="tools", init_on_session_start=True, peer_name=None, user_id="8439114563", ) - assert cfg.peer_name == "8439114563" + assert cfg.peer_name is None + assert mock_manager_cls.call_args.kwargs["runtime_user_peer_name"] == "8439114563" class TestPerSessionMigrateGuard: From 21d5ef2f1742b4a8bd5fb69c07eda79cefdc57ab Mon Sep 17 00:00:00 2001 From: Erosika Date: Sat, 18 Apr 2026 13:49:50 -0400 Subject: [PATCH 009/455] feat(honcho): wizard cadence default 2, surface reasoning level, backwards-compat fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setup wizard now always writes dialecticCadence=2 on new configs and surfaces the reasoning level as an explicit step with all five options (minimal / low / medium / high / max), always writing dialecticReasoningLevel. Code keeps a backwards-compat fallback of 1 when dialecticCadence is unset so existing honcho.json configs that predate the setting keep firing every turn on upgrade. New setups via the wizard get 2 explicitly; docs show 2 as the default. Also scrubs editorial lines from code and docs ("max is reserved for explicit tool-path selection", "Unset → every turn; wizard pre-fills 2", and similar process-exposing phrasing) and adds an inline link to app.honcho.dev where the server-side observation sync is mentioned in honcho.md. Recommended cadence range updated to 1-5 across docs and wizard copy. --- .../autonomous-ai-agents/honcho/SKILL.md | 4 ++-- plugins/memory/honcho/__init__.py | 10 +++++---- plugins/memory/honcho/cli.py | 22 ++++++++++++++++++- plugins/memory/honcho/client.py | 3 +-- tests/honcho_plugin/test_session.py | 9 ++++---- website/docs/user-guide/features/honcho.md | 8 +++---- .../user-guide/features/memory-providers.md | 2 +- 7 files changed, 40 insertions(+), 18 deletions(-) diff --git a/optional-skills/autonomous-ai-agents/honcho/SKILL.md b/optional-skills/autonomous-ai-agents/honcho/SKILL.md index e79875aa07..1c099ca605 100644 --- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md +++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md @@ -145,7 +145,7 @@ Controls **how often** dialectic and context calls happen. | Key | Default | Description | |-----|---------|-------------| | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic API calls. Unset → every turn; wizard pre-fills `2` | +| `dialecticCadence` | `2` | Min turns between dialectic API calls. Recommended 1–5 | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection | Higher cadence values fire the dialectic LLM less often. `dialecticCadence: 2` means the engine fires every other turn. Setting it to `1` fires every turn. @@ -370,7 +370,7 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso | `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. | | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` | | `contextCadence` | `1` | Min turns between context API calls | -| `dialecticCadence` | `1` (wizard: `2`) | Min turns between dialectic LLM calls | +| `dialecticCadence` | `2` | Min turns between dialectic LLM calls (recommended 1–5) | The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions. diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py index d104deb5d5..6ca32c1dcb 100644 --- a/plugins/memory/honcho/__init__.py +++ b/plugins/memory/honcho/__init__.py @@ -207,7 +207,7 @@ class HonchoMemoryProvider(MemoryProvider): self._turn_count = 0 self._injection_frequency = "every-turn" # or "first-turn" self._context_cadence = 1 # minimum turns between context API calls - self._dialectic_cadence = 1 # minimum turns between dialectic API calls + self._dialectic_cadence = 1 # backwards-compat fallback; wizard writes 2 on new configs self._dialectic_depth = 1 # how many .chat() calls per dialectic cycle (1-3) self._dialectic_depth_levels: list[str] | None = None # per-pass reasoning levels self._reasoning_heuristic: bool = True # scale base level by query length @@ -304,6 +304,10 @@ class HonchoMemoryProvider(MemoryProvider): raw = cfg.raw or {} self._injection_frequency = raw.get("injectionFrequency", "every-turn") self._context_cadence = int(raw.get("contextCadence", 1)) + # Backwards-compat: unset dialecticCadence falls back to 1 + # (every turn) so existing honcho.json configs without the key + # behave as they did before. New setups via `hermes honcho setup` + # get dialecticCadence=2 written explicitly by the wizard. self._dialectic_cadence = int(raw.get("dialecticCadence", 1)) self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3)) self._dialectic_depth_levels = cfg.dialectic_depth_levels @@ -844,9 +848,7 @@ class HonchoMemoryProvider(MemoryProvider): def _apply_reasoning_heuristic(self, base: str, query: str) -> str: """Scale `base` up by query length, clamped at reasoning_level_cap. - Char-count heuristic: +1 at >=120 chars, +2 at >=400. Ceiling is - reasoning_level_cap (default 'high' — 'max' is reserved for - explicit tool-path selection). + Char-count heuristic: +1 at >=120 chars, +2 at >=400. """ if not self._reasoning_heuristic or not query: return base diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py index eb21c48eaa..5c829a4c98 100644 --- a/plugins/memory/honcho/cli.py +++ b/plugins/memory/honcho/cli.py @@ -463,7 +463,8 @@ def cmd_setup(args) -> None: current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "2") print("\n Dialectic cadence:") print(" How often Honcho rebuilds its user model (LLM call on Honcho backend).") - print(" 1 = every turn, 2 = every other turn (wizard default), 3+ = sparse.") + print(" 1 = every turn, 2 = every other turn, 3+ = sparser.") + print(" Recommended: 1-5.") new_dialectic = _prompt("Dialectic cadence", default=current_dialectic) try: val = int(new_dialectic) @@ -472,6 +473,25 @@ def cmd_setup(args) -> None: except (ValueError, TypeError): hermes_host["dialecticCadence"] = 2 + # --- 7c. Dialectic reasoning level --- + current_reasoning = ( + hermes_host.get("dialecticReasoningLevel") + or cfg.get("dialecticReasoningLevel") + or "low" + ) + print("\n Dialectic reasoning level:") + print(" Depth Honcho uses when synthesizing user context on auto-injected calls.") + print(" minimal -- quick factual lookups") + print(" low -- straightforward questions (default)") + print(" medium -- multi-aspect synthesis") + print(" high -- complex behavioral patterns") + print(" max -- thorough audit-level analysis") + new_reasoning = _prompt("Reasoning level", default=current_reasoning) + if new_reasoning in ("minimal", "low", "medium", "high", "max"): + hermes_host["dialecticReasoningLevel"] = new_reasoning + else: + hermes_host["dialecticReasoningLevel"] = "low" + # --- 8. Session strategy --- current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session") print("\n Session strategy:") diff --git a/plugins/memory/honcho/client.py b/plugins/memory/honcho/client.py index 346c2b76e6..fef2e2d58f 100644 --- a/plugins/memory/honcho/client.py +++ b/plugins/memory/honcho/client.py @@ -254,8 +254,7 @@ class HonchoClientConfig: # When true, the auto-injected dialectic scales reasoning level up on # longer queries. See HonchoMemoryProvider for thresholds. reasoning_heuristic: bool = True - # Ceiling for the heuristic-selected reasoning level. "max" is reserved - # for explicit tool-path selection. + # Ceiling for the heuristic-selected reasoning level. reasoning_level_cap: str = "high" # Honcho API limits — configurable for self-hosted instances # Max chars per message sent via add_messages() (Honcho cloud: 25000) diff --git a/tests/honcho_plugin/test_session.py b/tests/honcho_plugin/test_session.py index f2a6602929..2542611831 100644 --- a/tests/honcho_plugin/test_session.py +++ b/tests/honcho_plugin/test_session.py @@ -865,8 +865,10 @@ class TestDialecticCadenceDefaults: _settle_prewarm(provider) return provider - def test_default_is_1(self): - """Default dialectic_cadence is 1 — fires every turn unless overridden.""" + def test_unset_falls_back_to_1(self): + """Unset dialecticCadence falls back to 1 (every turn) for backwards + compatibility with existing configs that predate the setting. The + setup wizard writes 2 explicitly on new configs.""" provider = self._make_provider() assert provider._dialectic_cadence == 1 @@ -1569,8 +1571,7 @@ class TestDialecticLifecycleSmoke: class TestReasoningHeuristic: """Char-count heuristic that scales the auto-injected reasoning level by - query length, clamped at reasoning_level_cap. 'max' is reserved for - explicit tool-path selection.""" + query length, clamped at reasoning_level_cap.""" @staticmethod def _make_provider(cfg_extra=None): diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md index bf4b5c6bc3..60e82b4b08 100644 --- a/website/docs/user-guide/features/honcho.md +++ b/website/docs/user-guide/features/honcho.md @@ -77,7 +77,7 @@ Cost and depth are controlled by three independent knobs: | Knob | Controls | Default | |------|----------|---------| | `contextCadence` | Turns between `context()` API calls (base layer refresh) | `1` | -| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `1` (code default) / `2` (setup wizard default) | +| `dialecticCadence` | Turns between `peer.chat()` LLM calls (dialectic layer refresh) | `2` (recommended 1–5) | | `dialecticDepth` | Number of `.chat()` passes per dialectic invocation (1–3) | `1` | These are orthogonal — you can have frequent context refreshes with infrequent dialectic, or deep multi-pass dialectic at low frequency. Example: `contextCadence: 1, dialecticCadence: 5, dialecticDepth: 2` refreshes base context every turn, runs dialectic every 5 turns, and each dialectic run makes 2 passes. @@ -100,7 +100,7 @@ On session init, Honcho fires a dialectic call in the background at the full con ### Query-Adaptive Reasoning Level -The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. `"max"` is reserved for explicit tool-path selection via `honcho_reasoning`. +The auto-injected dialectic scales `dialecticReasoningLevel` by query length: +1 level at ≥120 chars, +2 at ≥400, clamped at `reasoningLevelCap` (default `"high"`). Disable with `reasoningHeuristic: false` to pin every auto call to `dialecticReasoningLevel`. Available levels: `minimal`, `low`, `medium`, `high`, `max`. ## Configuration Options @@ -112,7 +112,7 @@ Honcho is configured in `~/.honcho/config.json` (global) or `$HERMES_HOME/honcho |-----|---------|-------------| | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Set to an integer (e.g. 1200) to cap. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Code default fires every turn when the key is unset; the setup wizard pre-fills `2`. In `tools` mode, irrelevant — model calls explicitly | +| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls (dialectic layer). Recommended 1–5. In `tools` mode, irrelevant — model calls explicitly | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped to 1–3 | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | @@ -183,7 +183,7 @@ Common patterns: | AI shouldn't re-model the user from its own replies | `"ai": {"observeMe": true, "observeOthers": false}` | | Strong persona the AI peer shouldn't update from self-observation | `"ai": {"observeMe": false, "observeOthers": true}` | -Server-side toggles set via the Honcho dashboard win over local defaults — Hermes syncs them back at session init. +Server-side toggles set via the [Honcho dashboard](https://app.honcho.dev) win over local defaults — Hermes syncs them back at session init. ## Tools diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md index b2469a13ee..d11c36657a 100644 --- a/website/docs/user-guide/features/memory-providers.md +++ b/website/docs/user-guide/features/memory-providers.md @@ -82,7 +82,7 @@ hermes memory setup # select "honcho" | `workspace` | host key | Shared workspace ID | | `contextTokens` | `null` (uncapped) | Token budget for auto-injected context per turn. Truncates at word boundaries | | `contextCadence` | `1` | Minimum turns between `context()` API calls (base layer refresh) | -| `dialecticCadence` | `1` (wizard sets `2`) | Minimum turns between `peer.chat()` LLM calls. Unset → every turn; wizard pre-fills `2`. Only applies to `hybrid`/`context` modes | +| `dialecticCadence` | `2` | Minimum turns between `peer.chat()` LLM calls. Recommended 1–5. Only applies to `hybrid`/`context` modes | | `dialecticDepth` | `1` | Number of `.chat()` passes per dialectic invocation. Clamped 1–3. Pass 0: cold/warm prompt, pass 1: self-audit, pass 2: reconciliation | | `dialecticDepthLevels` | `null` | Optional array of reasoning levels per pass, e.g. `["minimal", "low", "medium"]`. Overrides proportional defaults | | `dialecticReasoningLevel` | `'low'` | Base reasoning level: `minimal`, `low`, `medium`, `high`, `max` | From 0a8d48809f15157431f373e0add4f1a1be76af4b Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 11:01:26 +0530 Subject: [PATCH 010/455] chore: add LeonSGP43 numeric noreply email to AUTHOR_MAP The cherry-picked commit from #11434 uses the 154585401+ prefixed noreply format. Add it alongside the existing bare entry so the contributor audit passes. --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 90c2a13d0b..b153140057 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -215,6 +215,7 @@ AUTHOR_MAP = { "ziliangpeng@users.noreply.github.com": "ziliangpeng", "centripetal-star@users.noreply.github.com": "centripetal-star", "LeonSGP43@users.noreply.github.com": "LeonSGP43", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", From 7b1a11b97179222c3fc9a721d614eae2d5f4c9f3 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 17:37:02 -0600 Subject: [PATCH 011/455] fix(memory): keep Honcho provider opt-in --- run_agent.py | 25 ------------- tests/run_agent/test_memory_provider_init.py | 39 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 25 deletions(-) create mode 100644 tests/run_agent/test_memory_provider_init.py diff --git a/run_agent.py b/run_agent.py index c87bd35152..0106488098 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1306,31 +1306,6 @@ class AIAgent: try: _mem_provider_name = mem_config.get("provider", "") if mem_config else "" - # Auto-migrate: if Honcho was actively configured (enabled + - # credentials) but memory.provider is not set, activate the - # honcho plugin automatically. Just having the config file - # is not enough — the user may have disabled Honcho or the - # file may be from a different tool. - if not _mem_provider_name: - try: - from plugins.memory.honcho.client import HonchoClientConfig as _HCC - _hcfg = _HCC.from_global_config() - if _hcfg.enabled and (_hcfg.api_key or _hcfg.base_url): - _mem_provider_name = "honcho" - # Persist so this only auto-migrates once - try: - from hermes_cli.config import load_config as _lc, save_config as _sc - _cfg = _lc() - _cfg.setdefault("memory", {})["provider"] = "honcho" - _sc(_cfg) - except Exception: - pass - if not self.quiet_mode: - print(" ✓ Auto-migrated Honcho to memory provider plugin.") - print(" Your config and data are preserved.\n") - except Exception: - pass - if _mem_provider_name: from agent.memory_manager import MemoryManager as _MemoryManager from plugins.memory import load_memory_provider as _load_mem diff --git a/tests/run_agent/test_memory_provider_init.py b/tests/run_agent/test_memory_provider_init.py new file mode 100644 index 0000000000..89431db85d --- /dev/null +++ b/tests/run_agent/test_memory_provider_init.py @@ -0,0 +1,39 @@ +"""Regression tests for memory provider selection during AIAgent init.""" + +from types import SimpleNamespace +from unittest.mock import patch + + +def test_blank_memory_provider_does_not_auto_enable_honcho(): + """Blank memory.provider should remain opt-out even if Honcho fallback looks configured.""" + cfg = {"memory": {"provider": ""}, "agent": {}} + honcho_cfg = SimpleNamespace(enabled=True, api_key="stale-key", base_url=None) + + with ( + patch("hermes_cli.config.load_config", return_value=cfg), + patch("hermes_cli.config.save_config") as save_config, + patch( + "plugins.memory.honcho.client.HonchoClientConfig.from_global_config", + return_value=honcho_cfg, + ) as from_global_config, + patch("plugins.memory.load_memory_provider") as load_memory_provider, + patch("agent.model_metadata.get_model_context_length", return_value=204_800), + patch("run_agent.get_tool_definitions", return_value=[]), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + from run_agent import AIAgent + + agent = AIAgent( + api_key="test-key-1234567890", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=False, + ) + + assert agent._memory_manager is None + from_global_config.assert_not_called() + load_memory_provider.assert_not_called() + save_config.assert_not_called() + From d66414a844b780467b33ea9c861cf07c098ab73b Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 15:54:05 -0600 Subject: [PATCH 012/455] docs(custom-providers): use key_env in examples --- hermes_cli/config.py | 4 ++-- website/docs/integrations/providers.md | 8 ++++---- website/docs/user-guide/features/fallback-providers.md | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d53899b135..1dedc1710a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2861,7 +2861,7 @@ _FALLBACK_COMMENT = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter @@ -2905,7 +2905,7 @@ _COMMENTED_SECTIONS = """ # minimax (MINIMAX_API_KEY) — MiniMax # minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) # -# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# For custom OpenAI-compatible endpoints, add base_url and key_env. # # fallback_model: # provider: openrouter diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index 4f536ec749..9d32fc21ec 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -1052,11 +1052,11 @@ custom_providers: # api_key omitted — Hermes uses "no-key-required" for keyless local servers - name: work base_url: https://gpu-server.internal.corp/v1 - api_key: corp-api-key + key_env: CORP_API_KEY api_mode: chat_completions # optional, auto-detected from URL - name: anthropic-proxy base_url: https://proxy.example.com/anthropic - api_key: proxy-key + key_env: ANTHROPIC_PROXY_KEY api_mode: anthropic_messages # for Anthropic-compatible proxies ``` @@ -1154,7 +1154,7 @@ fallback_model: provider: openrouter # required model: anthropic/claude-sonnet-4 # required # base_url: http://localhost:8000/v1 # optional, for custom endpoints - # api_key_env: MY_CUSTOM_KEY # optional, env var name for custom endpoint API key + # key_env: MY_CUSTOM_KEY # optional, env var name for custom endpoint API key ``` When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. @@ -1178,7 +1178,7 @@ smart_model_routing: provider: openrouter model: google/gemini-2.5-flash # base_url: http://localhost:8000/v1 # optional custom endpoint - # api_key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key + # key_env: MY_CUSTOM_KEY # optional env var name for that endpoint's API key ``` How it works: diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index 2e9bcad99b..01e5524f6a 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -61,18 +61,18 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | Arcee AI | `arcee` | `ARCEEAI_API_KEY` | | Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` | | Hugging Face | `huggingface` | `HF_TOKEN` | -| Custom endpoint | `custom` | `base_url` + `api_key_env` (see below) | +| Custom endpoint | `custom` | `base_url` + `key_env` (see below) | ### Custom Endpoint Fallback -For a custom OpenAI-compatible endpoint, add `base_url` and optionally `api_key_env`: +For a custom OpenAI-compatible endpoint, add `base_url` and optionally `key_env`: ```yaml fallback_model: provider: custom model: my-local-model base_url: http://localhost:8000/v1 - api_key_env: MY_LOCAL_KEY # env var name containing the API key + key_env: MY_LOCAL_KEY # env var name containing the API key ``` ### When Fallback Triggers @@ -128,7 +128,7 @@ fallback_model: provider: custom model: llama-3.1-70b base_url: http://localhost:8000/v1 - api_key_env: LOCAL_API_KEY + key_env: LOCAL_API_KEY ``` **Codex OAuth as fallback:** From ce410521b3d21d71f28e0dd041df872ffbd8344f Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:03:10 -0700 Subject: [PATCH 013/455] feat(browser): add browser_cdp raw DevTools Protocol passthrough (#12369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agents can now send arbitrary CDP commands to the browser. The tool is gated on a reachable CDP endpoint at session start — it only appears in the toolset when BROWSER_CDP_URL is set (from '/browser connect') or 'browser.cdp_url' is configured in config.yaml. Backends that don't currently expose CDP to the Python side (Camofox, default local agent-browser, cloud providers whose per-session cdp_url is not yet surfaced) do not see the tool at all. Tool schema description links to the CDP method reference at https://chromedevtools.github.io/devtools-protocol/ so the agent can web_extract specific method docs on demand. Stateless per call. Browser-level methods (Target.*, Browser.*, Storage.*) omit target_id. Page-level methods attach to the target with flatten=true and dispatch the method on the returned sessionId. Clean errors when the endpoint becomes unreachable mid-session or the URL isn't a WebSocket. Tests: 19 unit (mock CDP server + gate checks) + E2E against real headless Chrome (Target.getTargets, Browser.getVersion, Runtime.evaluate with target_id, Page.navigate + re-eval, bogus method, bogus target_id, missing endpoint) + E2E of the check_fn gate (tool hidden without CDP URL, visible with it, hidden again after unset). --- tests/tools/test_browser_cdp_tool.py | 408 ++++++++++++++++++ tools/browser_cdp_tool.py | 416 +++++++++++++++++++ toolsets.py | 8 +- website/docs/reference/tools-reference.md | 5 +- website/docs/reference/toolsets-reference.md | 2 +- website/docs/user-guide/features/browser.md | 30 ++ 6 files changed, 862 insertions(+), 7 deletions(-) create mode 100644 tests/tools/test_browser_cdp_tool.py create mode 100644 tools/browser_cdp_tool.py diff --git a/tests/tools/test_browser_cdp_tool.py b/tests/tools/test_browser_cdp_tool.py new file mode 100644 index 0000000000..e7e187ceb0 --- /dev/null +++ b/tests/tools/test_browser_cdp_tool.py @@ -0,0 +1,408 @@ +"""Unit tests for browser_cdp tool. + +Uses a tiny in-process ``websockets`` server to simulate a CDP endpoint — +gives real protocol coverage (connect, send, recv, close) without needing +a real Chrome instance. +""" +from __future__ import annotations + +import asyncio +import json +import threading +import time +from typing import Any, Dict, List + +import pytest + +import websockets +from websockets.asyncio.server import serve + +from tools import browser_cdp_tool + + +# --------------------------------------------------------------------------- +# In-process CDP mock server +# --------------------------------------------------------------------------- + + +class _CDPServer: + """A tiny CDP-over-WebSocket mock. + + Each client gets a greeting-free stream. The server replies to each + inbound request whose ``id`` is set, using the registered handler for + that method. If no handler is registered, returns a generic CDP error. + """ + + def __init__(self) -> None: + self._handlers: Dict[str, Any] = {} + self._responses: List[Dict[str, Any]] = [] + self._loop: asyncio.AbstractEventLoop | None = None + self._server: Any = None + self._thread: threading.Thread | None = None + self._host = "127.0.0.1" + self._port = 0 + + # --- handler registration -------------------------------------------- + + def on(self, method: str, handler): + """Register a handler ``handler(params, session_id) -> dict or Exception``.""" + self._handlers[method] = handler + + # --- lifecycle ------------------------------------------------------- + + def start(self) -> str: + ready = threading.Event() + + def _run() -> None: + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) + + async def _handler(ws): + try: + async for raw in ws: + msg = json.loads(raw) + call_id = msg.get("id") + method = msg.get("method", "") + params = msg.get("params", {}) or {} + session_id = msg.get("sessionId") + self._responses.append(msg) + + fn = self._handlers.get(method) + if fn is None: + reply = { + "id": call_id, + "error": { + "code": -32601, + "message": f"No handler for {method}", + }, + } + else: + try: + result = fn(params, session_id) + if isinstance(result, Exception): + raise result + reply = {"id": call_id, "result": result} + except Exception as exc: + reply = { + "id": call_id, + "error": {"code": -1, "message": str(exc)}, + } + if session_id: + reply["sessionId"] = session_id + await ws.send(json.dumps(reply)) + except websockets.exceptions.ConnectionClosed: + pass + + async def _serve() -> None: + self._server = await serve(_handler, self._host, 0) + sock = next(iter(self._server.sockets)) + self._port = sock.getsockname()[1] + ready.set() + await self._server.wait_closed() + + try: + self._loop.run_until_complete(_serve()) + finally: + self._loop.close() + + self._thread = threading.Thread(target=_run, daemon=True) + self._thread.start() + if not ready.wait(timeout=5.0): + raise RuntimeError("CDP mock server failed to start within 5s") + return f"ws://{self._host}:{self._port}/devtools/browser/mock" + + def stop(self) -> None: + if self._loop and self._server: + def _close() -> None: + self._server.close() + + self._loop.call_soon_threadsafe(_close) + if self._thread: + self._thread.join(timeout=3.0) + + def received(self) -> List[Dict[str, Any]]: + return list(self._responses) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def cdp_server(monkeypatch): + """Start a CDP mock and route tool resolution to it.""" + server = _CDPServer() + ws_url = server.start() + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: ws_url + ) + try: + yield server + finally: + server.stop() + + +# --------------------------------------------------------------------------- +# Input validation +# --------------------------------------------------------------------------- + + +def test_missing_method_returns_error(): + result = json.loads(browser_cdp_tool.browser_cdp(method="")) + assert "error" in result + assert "method" in result["error"].lower() + assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL + + +def test_non_string_method_returns_error(): + result = json.loads(browser_cdp_tool.browser_cdp(method=123)) # type: ignore[arg-type] + assert "error" in result + assert "method" in result["error"].lower() + + +def test_non_dict_params_returns_error(monkeypatch): + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "ws://localhost:9999" + ) + result = json.loads( + browser_cdp_tool.browser_cdp(method="Target.getTargets", params="not-a-dict") # type: ignore[arg-type] + ) + assert "error" in result + assert "object" in result["error"].lower() or "dict" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Endpoint resolution +# --------------------------------------------------------------------------- + + +def test_no_endpoint_returns_helpful_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "") + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "/browser connect" in result["error"] + assert result.get("cdp_docs") == browser_cdp_tool.CDP_DOCS_URL + + +def test_non_ws_endpoint_returns_error(monkeypatch): + monkeypatch.setattr( + browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "http://localhost:9222" + ) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "WebSocket" in result["error"] + + +def test_websockets_missing_returns_error(monkeypatch): + monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert "error" in result + assert "websockets" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Happy-path: browser-level call +# --------------------------------------------------------------------------- + + +def test_browser_level_success(cdp_server): + cdp_server.on( + "Target.getTargets", + lambda params, sid: { + "targetInfos": [ + {"targetId": "A", "type": "page", "title": "Tab 1", "url": "about:blank"}, + {"targetId": "B", "type": "page", "title": "Tab 2", "url": "https://a.test"}, + ] + }, + ) + result = json.loads(browser_cdp_tool.browser_cdp(method="Target.getTargets")) + assert result["success"] is True + assert result["method"] == "Target.getTargets" + assert "target_id" not in result + assert len(result["result"]["targetInfos"]) == 2 + # Verify the server actually received exactly one call (no extra traffic) + calls = cdp_server.received() + assert len(calls) == 1 + assert calls[0]["method"] == "Target.getTargets" + assert "sessionId" not in calls[0] + + +def test_empty_params_sends_empty_object(cdp_server): + cdp_server.on("Browser.getVersion", lambda params, sid: {"product": "Mock/1.0"}) + json.loads(browser_cdp_tool.browser_cdp(method="Browser.getVersion")) + assert cdp_server.received()[0]["params"] == {} + + +# --------------------------------------------------------------------------- +# Happy-path: target-attached call +# --------------------------------------------------------------------------- + + +def test_target_attach_then_call(cdp_server): + cdp_server.on( + "Target.attachToTarget", + lambda params, sid: {"sessionId": f"sess-{params['targetId']}"}, + ) + cdp_server.on( + "Runtime.evaluate", + lambda params, sid: { + "result": {"type": "string", "value": f"evaluated[{sid}]"}, + }, + ) + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Runtime.evaluate", + params={"expression": "document.title", "returnByValue": True}, + target_id="tab-A", + ) + ) + assert result["success"] is True + assert result["target_id"] == "tab-A" + assert result["result"]["result"]["value"] == "evaluated[sess-tab-A]" + + calls = cdp_server.received() + # First call: attach + assert calls[0]["method"] == "Target.attachToTarget" + assert calls[0]["params"] == {"targetId": "tab-A", "flatten": True} + # Second call: dispatched method on the session + assert calls[1]["method"] == "Runtime.evaluate" + assert calls[1]["sessionId"] == "sess-tab-A" + + +# --------------------------------------------------------------------------- +# CDP error responses +# --------------------------------------------------------------------------- + + +def test_cdp_method_error_returns_tool_error(cdp_server): + # No handler registered -> server returns CDP error + result = json.loads( + browser_cdp_tool.browser_cdp(method="NonExistent.method") + ) + assert "error" in result + assert "CDP error" in result["error"] + assert result.get("method") == "NonExistent.method" + + +def test_attach_failure_returns_tool_error(cdp_server): + # Target.attachToTarget has no handler -> server errors on attach + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Runtime.evaluate", + params={"expression": "1+1"}, + target_id="missing", + ) + ) + assert "error" in result + assert "Target.attachToTarget" in result["error"] + + +# --------------------------------------------------------------------------- +# Timeouts +# --------------------------------------------------------------------------- + + +def test_timeout_when_server_never_replies(cdp_server): + # Register a handler that blocks forever + def slow(params, sid): + time.sleep(10) + return {} + + cdp_server.on("Page.slowMethod", slow) + result = json.loads( + browser_cdp_tool.browser_cdp( + method="Page.slowMethod", timeout=0.5 + ) + ) + assert "error" in result + assert "tim" in result["error"].lower() + + +# --------------------------------------------------------------------------- +# Timeout clamping +# --------------------------------------------------------------------------- + + +def test_timeout_clamped_above_max(cdp_server): + cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"}) + # timeout=10_000 should be clamped to 300 but still succeed + result = json.loads( + browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout=10_000) + ) + assert result["success"] is True + + +def test_invalid_timeout_falls_back_to_default(cdp_server): + cdp_server.on("Browser.getVersion", lambda p, s: {"product": "ok"}) + result = json.loads( + browser_cdp_tool.browser_cdp(method="Browser.getVersion", timeout="nope") # type: ignore[arg-type] + ) + assert result["success"] is True + + +# --------------------------------------------------------------------------- +# Registry integration +# --------------------------------------------------------------------------- + + +def test_registered_in_browser_toolset(): + from tools.registry import registry + + entry = registry.get_entry("browser_cdp") + assert entry is not None + assert entry.toolset == "browser" + assert entry.schema["name"] == "browser_cdp" + assert entry.schema["parameters"]["required"] == ["method"] + assert "Chrome DevTools Protocol" in entry.schema["description"] + assert browser_cdp_tool.CDP_DOCS_URL in entry.schema["description"] + + +def test_dispatch_through_registry(cdp_server): + from tools.registry import registry + + cdp_server.on("Target.getTargets", lambda p, s: {"targetInfos": []}) + raw = registry.dispatch( + "browser_cdp", {"method": "Target.getTargets"}, task_id="t1" + ) + result = json.loads(raw) + assert result["success"] is True + assert result["method"] == "Target.getTargets" + + +# --------------------------------------------------------------------------- +# check_fn gating +# --------------------------------------------------------------------------- + + +def test_check_fn_false_when_no_cdp_url(monkeypatch): + """Gate closes when no CDP URL is set — even if the browser toolset is + otherwise configured.""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: True) + monkeypatch.setattr(bt, "_get_cdp_override", lambda: "") + assert browser_cdp_tool._browser_cdp_check() is False + + +def test_check_fn_true_when_cdp_url_set(monkeypatch): + """Gate opens as soon as a CDP URL is resolvable.""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: True) + monkeypatch.setattr( + bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x" + ) + assert browser_cdp_tool._browser_cdp_check() is True + + +def test_check_fn_false_when_browser_requirements_fail(monkeypatch): + """Even with a CDP URL, gate closes if the overall browser toolset is + unavailable (e.g. agent-browser not installed).""" + import tools.browser_tool as bt + + monkeypatch.setattr(bt, "check_browser_requirements", lambda: False) + monkeypatch.setattr( + bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x" + ) + assert browser_cdp_tool._browser_cdp_check() is False diff --git a/tools/browser_cdp_tool.py b/tools/browser_cdp_tool.py new file mode 100644 index 0000000000..7817b9c35a --- /dev/null +++ b/tools/browser_cdp_tool.py @@ -0,0 +1,416 @@ +#!/usr/bin/env python3 +""" +Raw Chrome DevTools Protocol (CDP) passthrough tool. + +Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to +the browser's DevTools WebSocket endpoint. Works when a CDP URL is +configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or +``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider +session is active. + +This is the escape hatch for browser operations not covered by the main +browser tool surface (``browser_navigate``, ``browser_click``, +``browser_console``, etc.) — handling native dialogs, iframe-scoped +evaluation, cookie/network control, low-level tab management, etc. + +Method reference: https://chromedevtools.github.io/devtools-protocol/ +""" +from __future__ import annotations + +import asyncio +import json +import logging +import os +from typing import Any, Dict, Optional + +from tools.registry import registry, tool_error + +logger = logging.getLogger(__name__) + +CDP_DOCS_URL = "https://chromedevtools.github.io/devtools-protocol/" + +# ``websockets`` is a transitive dependency of hermes-agent (via fal_client +# and firecrawl-py) and is already imported by gateway/platforms/feishu.py. +# Wrap the import so a clean error surfaces if the package is ever absent. +try: + import websockets + from websockets.exceptions import WebSocketException + + _WS_AVAILABLE = True +except ImportError: + websockets = None # type: ignore[assignment] + WebSocketException = Exception # type: ignore[assignment,misc] + _WS_AVAILABLE = False + + +# --------------------------------------------------------------------------- +# Async-from-sync bridge (matches the pattern in homeassistant_tool.py) +# --------------------------------------------------------------------------- + + +def _run_async(coro): + """Run an async coroutine from a sync handler, safe inside or outside a loop.""" + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop and loop.is_running(): + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + future = pool.submit(asyncio.run, coro) + return future.result() + return asyncio.run(coro) + + +# --------------------------------------------------------------------------- +# Endpoint resolution +# --------------------------------------------------------------------------- + + +def _resolve_cdp_endpoint() -> str: + """Return the normalized CDP WebSocket URL, or empty string if unavailable. + + Delegates to ``tools.browser_tool._get_cdp_override`` so precedence stays + consistent with the rest of the browser tool surface: + + 1. ``BROWSER_CDP_URL`` env var (live override from ``/browser connect``) + 2. ``browser.cdp_url`` in ``config.yaml`` + """ + try: + from tools.browser_tool import _get_cdp_override # type: ignore[import-not-found] + + return (_get_cdp_override() or "").strip() + except Exception as exc: # pragma: no cover — defensive + logger.debug("browser_cdp: failed to resolve CDP endpoint: %s", exc) + return "" + + +# --------------------------------------------------------------------------- +# Core CDP call +# --------------------------------------------------------------------------- + + +async def _cdp_call( + ws_url: str, + method: str, + params: Dict[str, Any], + target_id: Optional[str], + timeout: float, +) -> Dict[str, Any]: + """Make a single CDP call, optionally attaching to a target first. + + When ``target_id`` is provided, we call ``Target.attachToTarget`` with + ``flatten=True`` to multiplex a page-level session over the same + browser-level WebSocket, then send ``method`` with that ``sessionId``. + When ``target_id`` is None, ``method`` is sent at browser level — which + works for ``Target.*``, ``Browser.*``, ``Storage.*`` and a few other + globally-scoped domains. + """ + assert websockets is not None # guarded by _WS_AVAILABLE at call-site + + async with websockets.connect( + ws_url, + max_size=None, # CDP responses (e.g. DOM.getDocument) can be large + open_timeout=timeout, + close_timeout=5, + ping_interval=None, # CDP server doesn't expect pings + ) as ws: + next_id = 1 + session_id: Optional[str] = None + + # --- Step 1: attach to target if requested --- + if target_id: + attach_id = next_id + next_id += 1 + await ws.send( + json.dumps( + { + "id": attach_id, + "method": "Target.attachToTarget", + "params": {"targetId": target_id, "flatten": True}, + } + ) + ) + deadline = asyncio.get_event_loop().time() + timeout + while True: + remaining = deadline - asyncio.get_event_loop().time() + if remaining <= 0: + raise TimeoutError( + f"Timed out attaching to target {target_id}" + ) + raw = await asyncio.wait_for(ws.recv(), timeout=remaining) + msg = json.loads(raw) + if msg.get("id") == attach_id: + if "error" in msg: + raise RuntimeError( + f"Target.attachToTarget failed: {msg['error']}" + ) + session_id = msg.get("result", {}).get("sessionId") + if not session_id: + raise RuntimeError( + "Target.attachToTarget did not return a sessionId" + ) + break + # Ignore events (messages without "id") while waiting + + # --- Step 2: dispatch the real method --- + call_id = next_id + next_id += 1 + req: Dict[str, Any] = { + "id": call_id, + "method": method, + "params": params or {}, + } + if session_id: + req["sessionId"] = session_id + await ws.send(json.dumps(req)) + + deadline = asyncio.get_event_loop().time() + timeout + while True: + remaining = deadline - asyncio.get_event_loop().time() + if remaining <= 0: + raise TimeoutError( + f"Timed out waiting for response to {method}" + ) + raw = await asyncio.wait_for(ws.recv(), timeout=remaining) + msg = json.loads(raw) + if msg.get("id") == call_id: + if "error" in msg: + raise RuntimeError(f"CDP error: {msg['error']}") + return msg.get("result", {}) + # Ignore events / out-of-order responses + + +# --------------------------------------------------------------------------- +# Public tool function +# --------------------------------------------------------------------------- + + +def browser_cdp( + method: str, + params: Optional[Dict[str, Any]] = None, + target_id: Optional[str] = None, + timeout: float = 30.0, + task_id: Optional[str] = None, +) -> str: + """Send a raw CDP command. See ``CDP_DOCS_URL`` for method documentation. + + Args: + method: CDP method name, e.g. ``"Target.getTargets"``. + params: Method-specific parameters; defaults to ``{}``. + target_id: Optional target/tab ID for page-level methods. When set, + we first attach to the target (``flatten=True``) and send + ``method`` with the resulting ``sessionId``. + timeout: Seconds to wait for the call to complete. + task_id: Unused (tool is stateless) — accepted for uniformity with + other browser tools. + + Returns: + JSON string ``{"success": True, "method": ..., "result": {...}}`` on + success, or ``{"error": "..."}`` on failure. + """ + del task_id # unused — stateless + + if not method or not isinstance(method, str): + return tool_error( + "'method' is required (e.g. 'Target.getTargets')", + cdp_docs=CDP_DOCS_URL, + ) + + if not _WS_AVAILABLE: + return tool_error( + "The 'websockets' Python package is required but not installed. " + "Install it with: pip install websockets" + ) + + endpoint = _resolve_cdp_endpoint() + if not endpoint: + return tool_error( + "No CDP endpoint is available. Run '/browser connect' to attach " + "to a running Chrome, or set 'browser.cdp_url' in config.yaml. " + "The Camofox backend is REST-only and does not expose CDP.", + cdp_docs=CDP_DOCS_URL, + ) + + if not endpoint.startswith(("ws://", "wss://")): + return tool_error( + f"CDP endpoint is not a WebSocket URL: {endpoint!r}. " + "Expected ws://... or wss://... — the /browser connect " + "resolver should have rewritten this. Check that Chrome is " + "actually listening on the debug port." + ) + + call_params: Dict[str, Any] = params or {} + if not isinstance(call_params, dict): + return tool_error( + f"'params' must be an object/dict, got {type(call_params).__name__}" + ) + + try: + safe_timeout = float(timeout) if timeout else 30.0 + except (TypeError, ValueError): + safe_timeout = 30.0 + safe_timeout = max(1.0, min(safe_timeout, 300.0)) + + try: + result = _run_async( + _cdp_call(endpoint, method, call_params, target_id, safe_timeout) + ) + except asyncio.TimeoutError as exc: + return tool_error( + f"CDP call timed out after {safe_timeout}s: {exc}", + method=method, + ) + except TimeoutError as exc: + return tool_error(str(exc), method=method) + except RuntimeError as exc: + return tool_error(str(exc), method=method) + except WebSocketException as exc: + return tool_error( + f"WebSocket error talking to CDP at {endpoint}: {exc}. The " + "browser may have disconnected — try '/browser connect' again.", + method=method, + ) + except Exception as exc: # pragma: no cover — unexpected + logger.exception("browser_cdp unexpected error") + return tool_error( + f"Unexpected error: {type(exc).__name__}: {exc}", + method=method, + ) + + payload: Dict[str, Any] = { + "success": True, + "method": method, + "result": result, + } + if target_id: + payload["target_id"] = target_id + return json.dumps(payload, ensure_ascii=False) + + +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + + +BROWSER_CDP_SCHEMA: Dict[str, Any] = { + "name": "browser_cdp", + "description": ( + "Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for " + "browser operations not covered by browser_navigate, browser_click, " + "browser_console, etc.\n\n" + "**Requires a reachable CDP endpoint.** Available when the user has " + "run '/browser connect' to attach to a running Chrome, or when " + "'browser.cdp_url' is set in config.yaml. Not currently wired up for " + "cloud backends (Browserbase, Browser Use, Firecrawl) — those expose " + "CDP per session but live-session routing is a follow-up. Camofox is " + "REST-only and will never support CDP. If the tool is in your toolset " + "at all, a CDP endpoint is already reachable.\n\n" + f"**CDP method reference:** {CDP_DOCS_URL} — use web_extract on a " + "method's URL (e.g. '/tot/Page/#method-handleJavaScriptDialog') " + "to look up parameters and return shape.\n\n" + "**Common patterns:**\n" + "- List tabs: method='Target.getTargets', params={}\n" + "- Handle a native JS dialog: method='Page.handleJavaScriptDialog', " + "params={'accept': true, 'promptText': ''}, target_id=\n" + "- Get all cookies: method='Network.getAllCookies', params={}\n" + "- Eval in a specific tab: method='Runtime.evaluate', " + "params={'expression': '...', 'returnByValue': true}, " + "target_id=\n" + "- Set viewport for a tab: method='Emulation.setDeviceMetricsOverride', " + "params={'width': 1280, 'height': 720, 'deviceScaleFactor': 1, " + "'mobile': false}, target_id=\n\n" + "**Usage rules:**\n" + "- Browser-level methods (Target.*, Browser.*, Storage.*): omit " + "target_id.\n" + "- Page-level methods (Page.*, Runtime.*, DOM.*, Emulation.*, " + "Network.* scoped to a tab): pass target_id from Target.getTargets.\n" + "- Each call is independent — sessions and event subscriptions do " + "not persist between calls. For stateful workflows, prefer the " + "dedicated browser tools." + ), + "parameters": { + "type": "object", + "properties": { + "method": { + "type": "string", + "description": ( + "CDP method name, e.g. 'Target.getTargets', " + "'Runtime.evaluate', 'Page.handleJavaScriptDialog'." + ), + }, + "params": { + "type": "object", + "description": ( + "Method-specific parameters as a JSON object. Omit or " + "pass {} for methods that take no parameters." + ), + "additionalProperties": True, + }, + "target_id": { + "type": "string", + "description": ( + "Optional. Target/tab ID from Target.getTargets result " + "(each entry's 'targetId'). Required for page-level " + "methods; must be omitted for browser-level methods." + ), + }, + "timeout": { + "type": "number", + "description": ( + "Timeout in seconds (default 30, max 300)." + ), + "default": 30, + }, + }, + "required": ["method"], + }, +} + + +def _browser_cdp_check() -> bool: + """Availability check for browser_cdp. + + The tool is only offered when the Python side can actually reach a CDP + endpoint right now — meaning a static URL is set via ``/browser connect`` + (``BROWSER_CDP_URL``) or ``browser.cdp_url`` in ``config.yaml``. + + Backends that do *not* currently expose CDP to us — Camofox (REST-only), + the default local agent-browser mode (Playwright hides its internal CDP + port), and cloud providers whose per-session ``cdp_url`` is not yet + surfaced — are gated out so the model doesn't see a tool that would + reliably fail. Cloud-provider CDP routing is a follow-up. + + Kept in a thin wrapper so the registration statement stays at module top + level (the tool-discovery AST scan only picks up top-level + ``registry.register(...)`` calls). + """ + try: + from tools.browser_tool import ( # type: ignore[import-not-found] + _get_cdp_override, + check_browser_requirements, + ) + except ImportError as exc: # pragma: no cover — defensive + logger.debug("browser_cdp check: browser_tool import failed: %s", exc) + return False + if not check_browser_requirements(): + return False + return bool(_get_cdp_override()) + + +registry.register( + name="browser_cdp", + toolset="browser", + schema=BROWSER_CDP_SCHEMA, + handler=lambda args, **kw: browser_cdp( + method=args.get("method", ""), + params=args.get("params"), + target_id=args.get("target_id"), + timeout=args.get("timeout", 30.0), + task_id=kw.get("task_id"), + ), + check_fn=_browser_cdp_check, + emoji="🧪", +) diff --git a/toolsets.py b/toolsets.py index 6ac8d0782d..d9f353e1f2 100644 --- a/toolsets.py +++ b/toolsets.py @@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [ "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", # Text-to-speech "text_to_speech", # Planning & memory @@ -115,7 +115,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", "web_search" + "browser_vision", "browser_console", "browser_cdp", "web_search" ], "includes": [] }, @@ -249,7 +249,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", "todo", "memory", "session_search", "execute_code", "delegate_task", @@ -274,7 +274,7 @@ TOOLSETS = { "browser_navigate", "browser_snapshot", "browser_click", "browser_type", "browser_scroll", "browser_back", "browser_press", "browser_get_images", - "browser_vision", "browser_console", + "browser_vision", "browser_console", "browser_cdp", # Planning & memory "todo", "memory", # Session history search diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index 40d44627ec..c255c8f6a4 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool # Built-in Tools Reference -This page documents all 52 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. +This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets. -**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. +**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets. :::tip MCP Tools In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration. @@ -19,6 +19,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server | Tool | Description | Requires environment | |------|-------------|----------------------| | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — | +| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — | | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — | | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — | | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — | diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md index 7593a3fdcf..bb911004e1 100644 --- a/website/docs/reference/toolsets-reference.md +++ b/website/docs/reference/toolsets-reference.md @@ -52,7 +52,7 @@ Or in-session: | Toolset | Tools | Purpose | |---------|-------|---------| -| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. | +| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. | | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. | | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. | | `cronjob` | `cronjob` | Schedule and manage recurring tasks. | diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index 5b2462d2e3..d6624bf7d1 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -327,6 +327,36 @@ Check the browser console for any JavaScript errors Use `clear=True` to clear the console after reading, so subsequent calls only show new messages. +### `browser_cdp` + +Raw Chrome DevTools Protocol passthrough — the escape hatch for browser operations not covered by the other tools. Use for native dialog handling, iframe-scoped evaluation, cookie/network control, or any CDP verb the agent needs. + +**Only available when a CDP endpoint is reachable at session start** — meaning `/browser connect` has attached to a running Chrome, or `browser.cdp_url` is set in `config.yaml`. The default local agent-browser mode, Camofox, and cloud providers (Browserbase, Browser Use, Firecrawl) do not currently expose CDP to this tool — cloud providers have per-session CDP URLs but live-session routing is a follow-up. + +**CDP method reference:** https://chromedevtools.github.io/devtools-protocol/ — the agent can `web_extract` a specific method's page to look up parameters and return shape. + +Common patterns: + +``` +# List tabs (browser-level, no target_id) +browser_cdp(method="Target.getTargets") + +# Handle a native JS dialog on a tab +browser_cdp(method="Page.handleJavaScriptDialog", + params={"accept": true, "promptText": ""}, + target_id="") + +# Evaluate JS in a specific tab +browser_cdp(method="Runtime.evaluate", + params={"expression": "document.title", "returnByValue": true}, + target_id="") + +# Get all cookies +browser_cdp(method="Network.getAllCookies") +``` + +Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls. + ## Practical Examples ### Filling Out a Web Form From dca439fe9213f86c83fdd43f70bf6e1750902b54 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:03:58 -0700 Subject: [PATCH 014/455] fix(tui): scope session.interrupt pending-prompt release to the calling session (#12441) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit session.interrupt on session A was blast-resolving pending clarify/sudo/secret prompts on ALL sessions sharing the same tui_gateway process. Other sessions' agent threads unblocked with empty-string answers as if the user had cancelled — silent cross-session corruption. Root cause: _pending and _answers were globals keyed by random rid with no record of the owning session. _clear_pending() iterated every entry, so the session.interrupt handler had no way to limit the release to its own sid. Fix: - tui_gateway/server.py: _pending now maps rid to (sid, Event) tuples. _clear_pending takes an optional sid argument and filters by owner_sid when provided. session.interrupt passes the calling sid so unrelated sessions are untouched. _clear_pending(None) remains the shutdown path for completeness. - _block and _respond updated to pack/unpack the new tuple format. Tests (tests/test_tui_gateway_server.py): 4 new cases. - test_interrupt_only_clears_own_session_pending: two sessions with pending prompts, interrupting one must not release the other. - test_interrupt_clears_multiple_own_pending: same-sid multi-prompt release works. - test_clear_pending_without_sid_clears_all: shutdown path preserved. - test_respond_unpacks_sid_tuple_correctly: _respond handles the tuple format. Also updated tests/tui_gateway/test_protocol.py to use the new tuple format for test_block_and_respond and test_clear_pending. Live E2E against the live Python environment confirmed cross-session isolation: interrupting sid_a released its own pending prompt without touching sid_b's. All 78 related tests pass. --- tests/test_tui_gateway_server.py | 116 +++++++++++++++++++++++++++++ tests/tui_gateway/test_protocol.py | 7 +- tui_gateway/server.py | 32 +++++--- 3 files changed, 144 insertions(+), 11 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 8831efb896..07a68ac9e9 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -712,3 +712,119 @@ def test_prompt_submit_history_version_match_persists_normally(monkeypatch): finally: server._sessions.pop("sid", None) + +# --------------------------------------------------------------------------- +# session.interrupt must only cancel pending prompts owned by the calling +# session — it must not blast-resolve clarify/sudo/secret prompts on +# unrelated sessions sharing the same tui_gateway process. Without +# session scoping the other sessions' prompts silently resolve to empty +# strings, unblocking their agent threads as if the user cancelled. +# --------------------------------------------------------------------------- + + +def test_interrupt_only_clears_own_session_pending(): + """session.interrupt on session A must NOT release pending prompts + that belong to session B.""" + import types + + session_a = _session() + session_a["agent"] = types.SimpleNamespace(interrupt=lambda: None) + session_b = _session() + session_b["agent"] = types.SimpleNamespace(interrupt=lambda: None) + server._sessions["sid_a"] = session_a + server._sessions["sid_b"] = session_b + + try: + # Simulate pending prompts on both sessions (what _block creates + # while a clarify/sudo/secret request is outstanding). + ev_a = threading.Event() + ev_b = threading.Event() + server._pending["rid-a"] = ("sid_a", ev_a) + server._pending["rid-b"] = ("sid_b", ev_b) + server._answers.clear() + + # Interrupt session A. + resp = server.handle_request( + {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid_a"}} + ) + assert resp.get("result"), f"got error: {resp.get('error')}" + + # Session A's pending must be released to empty. + assert ev_a.is_set(), "sid_a pending Event should be set after interrupt" + assert server._answers.get("rid-a") == "" + + # Session B's pending MUST remain untouched — no cross-session blast. + assert not ev_b.is_set(), ( + "CRITICAL: session.interrupt on sid_a released a pending prompt " + "belonging to sid_b — other sessions' clarify/sudo/secret " + "prompts are being silently cancelled" + ) + assert "rid-b" not in server._answers + finally: + server._sessions.pop("sid_a", None) + server._sessions.pop("sid_b", None) + server._pending.pop("rid-a", None) + server._pending.pop("rid-b", None) + server._answers.pop("rid-a", None) + server._answers.pop("rid-b", None) + + +def test_interrupt_clears_multiple_own_pending(): + """When a single session has multiple pending prompts (uncommon but + possible via nested tool calls), interrupt must release all of them.""" + import types + + sess = _session() + sess["agent"] = types.SimpleNamespace(interrupt=lambda: None) + server._sessions["sid"] = sess + + try: + ev1, ev2 = threading.Event(), threading.Event() + server._pending["r1"] = ("sid", ev1) + server._pending["r2"] = ("sid", ev2) + + resp = server.handle_request( + {"id": "1", "method": "session.interrupt", "params": {"session_id": "sid"}} + ) + assert resp.get("result") + assert ev1.is_set() and ev2.is_set() + assert server._answers.get("r1") == "" and server._answers.get("r2") == "" + finally: + server._sessions.pop("sid", None) + for key in ("r1", "r2"): + server._pending.pop(key, None) + server._answers.pop(key, None) + + +def test_clear_pending_without_sid_clears_all(): + """_clear_pending(None) is the shutdown path — must still release + every pending prompt regardless of owning session.""" + ev1, ev2, ev3 = threading.Event(), threading.Event(), threading.Event() + server._pending["a"] = ("sid_x", ev1) + server._pending["b"] = ("sid_y", ev2) + server._pending["c"] = ("sid_z", ev3) + try: + server._clear_pending(None) + assert ev1.is_set() and ev2.is_set() and ev3.is_set() + finally: + for key in ("a", "b", "c"): + server._pending.pop(key, None) + server._answers.pop(key, None) + + +def test_respond_unpacks_sid_tuple_correctly(): + """After the (sid, Event) tuple change, _respond must still work.""" + ev = threading.Event() + server._pending["rid-x"] = ("sid_x", ev) + try: + resp = server.handle_request( + {"id": "1", "method": "clarify.respond", + "params": {"request_id": "rid-x", "answer": "the answer"}} + ) + assert resp.get("result") + assert ev.is_set() + assert server._answers.get("rid-x") == "the answer" + finally: + server._pending.pop("rid-x", None) + server._answers.pop("rid-x", None) + diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index eb51cccfec..926dfadf17 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -120,7 +120,9 @@ def test_block_and_respond(capture): rid = next(iter(server._pending)) server._answers[rid] = "my_answer" - server._pending[rid].set() + # _pending values are (sid, Event) tuples — unpack to set the Event + _, ev = server._pending[rid] + ev.set() threading.Event().wait(0.1) assert result[0] == "my_answer" @@ -128,7 +130,8 @@ def test_block_and_respond(capture): def test_clear_pending(server): ev = threading.Event() - server._pending["r1"] = ev + # _pending values are (sid, Event) tuples + server._pending["r1"] = ("sid-x", ev) server._clear_pending() assert ev.is_set() diff --git a/tui_gateway/server.py b/tui_gateway/server.py index c58c65763e..921f868a3c 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -27,7 +27,7 @@ from tui_gateway.render import make_stream_renderer, render_diff, render_message _sessions: dict[str, dict] = {} _methods: dict[str, callable] = {} -_pending: dict[str, threading.Event] = {} +_pending: dict[str, tuple[str, threading.Event]] = {} _answers: dict[str, str] = {} _db = None _stdout_lock = threading.Lock() @@ -296,7 +296,7 @@ def _enable_gateway_prompts() -> None: def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str: rid = uuid.uuid4().hex[:8] ev = threading.Event() - _pending[rid] = ev + _pending[rid] = (sid, ev) payload["request_id"] = rid _emit(event, sid, payload) ev.wait(timeout=timeout) @@ -304,10 +304,19 @@ def _block(event: str, sid: str, payload: dict, timeout: int = 300) -> str: return _answers.pop(rid, "") -def _clear_pending(): - for rid, ev in list(_pending.items()): - _answers[rid] = "" - ev.set() +def _clear_pending(sid: str | None = None) -> None: + """Release pending prompts with an empty answer. + + When *sid* is provided, only prompts owned by that session are + released — critical for session.interrupt, which must not + collaterally cancel clarify/sudo/secret prompts on unrelated + sessions sharing the same tui_gateway process. When *sid* is + None, every pending prompt is released (used during shutdown). + """ + for rid, (owner_sid, ev) in list(_pending.items()): + if sid is None or owner_sid == sid: + _answers[rid] = "" + ev.set() # ── Agent factory ──────────────────────────────────────────────────── @@ -1345,7 +1354,11 @@ def _(rid, params: dict) -> dict: return err if hasattr(session["agent"], "interrupt"): session["agent"].interrupt() - _clear_pending() + # Scope the pending-prompt release to THIS session. A global + # _clear_pending() would collaterally cancel clarify/sudo/secret + # prompts on unrelated sessions sharing the same tui_gateway + # process, silently resolving them to empty strings. + _clear_pending(params.get("session_id", "")) try: from tools.approval import resolve_gateway_approval resolve_gateway_approval(session["session_key"], "deny", resolve_all=True) @@ -1684,9 +1697,10 @@ def _(rid, params: dict) -> dict: def _respond(rid, params, key): r = params.get("request_id", "") - ev = _pending.get(r) - if not ev: + entry = _pending.get(r) + if not entry: return _err(rid, 4009, f"no pending {key} request") + _, ev = entry _answers[r] = params.get(key, "") ev.set() return _ok(rid, {"status": "ok"}) From 7c10761dd2a2c4e79485f0817011eef6e52dae59 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:09:38 -0700 Subject: [PATCH 015/455] fix(discord): shield text-batch flush from follow-up cancel (#12444) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Discord splits a long message at 2000 chars, _enqueue_text_event buffers each chunk and schedules a _flush_text_batch task with a short delay. If another chunk lands while the prior flush task is already inside handle_message, _enqueue_text_event calls prior_task.cancel() — and without asyncio.shield, CancelledError propagates from the flush task into handle_message → the agent's streaming request, aborting the response the user was waiting on. Reproducer: user sends a 3000-char prompt (split by Discord into 2 messages). Chunk 1 lands, flush delay starts, chunk 2 lands during the brief window when chunk 1's flush has already committed to handle_message. Agent's current streaming response is cancelled with CancelledError, user sees a truncated or missing reply. Fix (gateway/platforms/discord.py): - Wrap the handle_message call in asyncio.shield so the inner dispatch is protected from the outer task's cancel. - Add an except asyncio.CancelledError clause so the outer task still exits cleanly when cancel lands during the sleep window (before the pop) — semantics for that path are unchanged. The new flush task spawned by the follow-up chunk still handles its own batch via the normal pending-message / active-session machinery in base.py, so follow-ups are not lost. Tests: tests/gateway/test_text_batching.py — test_shield_protects_handle_message_from_cancel. Tracks a distinct first_handle_cancelled event so the assertion fails cleanly when the shield is missing (verified by stashing the fix and re-running). Live E2E on the live-loaded DiscordAdapter: first_handle_cancelled: False (shield worked) first_handle_completed: True (handle_message ran to completion) --- gateway/platforms/discord.py | 15 ++++++- tests/gateway/test_text_batching.py | 64 +++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index b1585637ff..1ec831b66d 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -3265,7 +3265,20 @@ class DiscordAdapter(BasePlatformAdapter): "[Discord] Flushing text batch %s (%d chars)", key, len(event.text or ""), ) - await self.handle_message(event) + # Shield the downstream dispatch so that a subsequent chunk + # arriving while handle_message is mid-flight cannot cancel + # the running agent turn. _enqueue_text_event always cancels + # the prior flush task when a new chunk lands; without this + # shield, CancelledError would propagate from our task down + # into handle_message → the agent's streaming request, + # aborting the response the user was waiting on. The new + # chunk is handled by the fresh flush task regardless. + await asyncio.shield(self.handle_message(event)) + except asyncio.CancelledError: + # Only reached if cancel landed before the pop — the shielded + # handle_message is unaffected either way. Let the task exit + # cleanly so the finally block cleans up. + pass finally: if self._pending_text_batch_tasks.get(key) is current_task: self._pending_text_batch_tasks.pop(key, None) diff --git a/tests/gateway/test_text_batching.py b/tests/gateway/test_text_batching.py index 56bc602ef0..1ad89ffd05 100644 --- a/tests/gateway/test_text_batching.py +++ b/tests/gateway/test_text_batching.py @@ -148,6 +148,70 @@ class TestDiscordTextBatching: await asyncio.sleep(0.25) adapter.handle_message.assert_called_once() + @pytest.mark.asyncio + async def test_shield_protects_handle_message_from_cancel(self): + """Regression guard: a follow-up chunk arriving while + handle_message is mid-flight must NOT cancel the running + dispatch. _enqueue_text_event fires prior_task.cancel() on + every new chunk; without asyncio.shield around handle_message + the cancel propagates into the agent's streaming request and + aborts the response. + """ + adapter = _make_discord_adapter() + + handle_started = asyncio.Event() + release_handle = asyncio.Event() + first_handle_cancelled = asyncio.Event() + first_handle_completed = asyncio.Event() + call_count = [0] + + async def slow_handle(event): + call_count[0] += 1 + # Only the first call (batch 1) is the one we're protecting. + if call_count[0] == 1: + handle_started.set() + try: + await release_handle.wait() + first_handle_completed.set() + except asyncio.CancelledError: + first_handle_cancelled.set() + raise + # Second call (batch 2) returns immediately — not the subject + # of this test. + + adapter.handle_message = slow_handle + + # Prime batch 1 and wait for it to land inside handle_message. + adapter._enqueue_text_event(_make_event("batch 1", Platform.DISCORD)) + await asyncio.wait_for(handle_started.wait(), timeout=1.0) + + # A new chunk arrives — _enqueue_text_event fires + # prior_task.cancel() on batch 1's flush task, which is + # currently awaiting inside handle_message. + adapter._enqueue_text_event(_make_event("batch 2 follow-up", Platform.DISCORD)) + + # Let the cancel propagate. + await asyncio.sleep(0.05) + + # CRITICAL ASSERTION: batch 1's handle_message must NOT have + # been cancelled. Without asyncio.shield this assertion fails + # because CancelledError propagates from the flush task's + # `await self.handle_message(event)` into slow_handle. + assert not first_handle_cancelled.is_set(), ( + "handle_message for batch 1 was cancelled by a follow-up " + "chunk — asyncio.shield is missing or broken" + ) + + # Release batch 1's handle_message and let it complete. + release_handle.set() + await asyncio.wait_for(first_handle_completed.wait(), timeout=1.0) + assert first_handle_completed.is_set() + + # Cleanup + for task in list(adapter._pending_text_batch_tasks.values()): + task.cancel() + await asyncio.sleep(0.01) + # ===================================================================== # Matrix text batching From 3ade655999afe1f88e00fd3219bc141988e8c0d3 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:07:37 -0600 Subject: [PATCH 016/455] fix(whatsapp): log allowlist drops in bridge --- scripts/whatsapp-bridge/bridge.js | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 70cf8e95d9..9af85caeea 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -26,7 +26,7 @@ import path from 'path'; import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; import { randomBytes } from 'crypto'; import qrcode from 'qrcode-terminal'; -import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js'; +import { expandWhatsAppIdentifiers, matchesAllowedUser, parseAllowedUsers } from './allowlist.js'; // Parse CLI args const args = process.argv.slice(2); @@ -229,6 +229,15 @@ async function startSocket() { // Check allowlist for messages from others (resolve LID ↔ phone aliases) if (!msg.key.fromMe && !matchesAllowedUser(senderId, ALLOWED_USERS, SESSION_DIR)) { + try { + console.log(JSON.stringify({ + event: 'ignored', + reason: 'allowlist_mismatch', + chatId, + senderId, + senderAliases: Array.from(expandWhatsAppIdentifiers(senderId, SESSION_DIR)), + })); + } catch {} continue; } From 361675018f436a95c0353a2755d7cfdd3b0ac44a Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 16:44:40 -0600 Subject: [PATCH 017/455] fix(setup): stop hardcoding max-iterations copy --- hermes_cli/setup.py | 4 ++- tests/hermes_cli/test_setup_agent_settings.py | 29 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/hermes_cli/test_setup_agent_settings.py diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 8f6b633c6a..f969bd4bd1 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1460,7 +1460,9 @@ def setup_agent_settings(config: dict): ) print_info("Maximum tool-calling iterations per conversation.") print_info("Higher = more complex tasks, but costs more tokens.") - print_info("Default is 90, which works for most tasks. Use 150+ for open exploration.") + print_info( + f"Press Enter to keep {current_max}. Use 90 for most tasks or 150+ for open exploration." + ) max_iter_str = prompt("Max iterations", current_max) try: diff --git a/tests/hermes_cli/test_setup_agent_settings.py b/tests/hermes_cli/test_setup_agent_settings.py new file mode 100644 index 0000000000..868be7508c --- /dev/null +++ b/tests/hermes_cli/test_setup_agent_settings.py @@ -0,0 +1,29 @@ +"""Tests for agent-settings copy in the interactive setup wizard.""" + +from hermes_cli.setup import setup_agent_settings + + +def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys): + """The helper text should match the value shown in the prompt.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + config = { + "agent": {"max_turns": 90}, + "display": {"tool_progress": "all"}, + "compression": {"threshold": 0.50}, + "session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4}, + } + + prompt_answers = iter(["60", "all", "0.5"]) + + monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "") + monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers)) + monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4) + monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None) + monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None) + + setup_agent_settings(config) + + out = capsys.readouterr().out + assert "Press Enter to keep 60." in out + assert "Default is 90" not in out From cd59af17cc095da08b223a9378c4a1621f7c0393 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 14:28:50 -0600 Subject: [PATCH 018/455] fix(agent): silence quiet_mode in python library use --- run_agent.py | 17 +++++++------ tests/run_agent/test_run_agent.py | 40 +++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/run_agent.py b/run_agent.py index 0106488098..050faeea4f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1916,13 +1916,16 @@ class AIAgent: def _should_emit_quiet_tool_messages(self) -> bool: """Return True when quiet-mode tool summaries should print directly. - When the caller provides ``tool_progress_callback`` (for example the CLI - TUI or a gateway progress renderer), that callback owns progress display. - Emitting quiet-mode summary lines here duplicates progress and leaks tool - previews into flows that are expected to stay silent, such as - ``hermes chat -q``. + Quiet mode is used by both the interactive CLI and embedded/library + callers. The CLI may still want compact progress hints when no callback + owns rendering. Embedded/library callers, on the other hand, expect + quiet mode to be truly silent. """ - return self.quiet_mode and not self.tool_progress_callback + return ( + self.quiet_mode + and not self.tool_progress_callback + and getattr(self, "platform", "") == "cli" + ) def _emit_status(self, message: str) -> None: """Emit a lifecycle status message to both CLI and gateway channels. @@ -11184,7 +11187,7 @@ class AIAgent: self._last_content_tools_all_housekeeping = _all_housekeeping if _all_housekeeping and self._has_stream_consumers(): self._mute_post_response = True - elif self.quiet_mode: + elif self.quiet_mode and getattr(self, "platform", "") == "cli": clean = self._strip_think_blocks(turn_content).strip() if clean: relayed = False diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index d30445cf45..bedb7bbf48 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1285,6 +1285,7 @@ class TestExecuteToolCalls: tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) messages = [] + agent.platform = "cli" agent.tool_progress_callback = None with patch("run_agent.handle_function_call", return_value="search result"), \ @@ -1296,6 +1297,21 @@ class TestExecuteToolCalls: assert len(messages) == 1 assert messages[0]["role"] == "tool" + def test_quiet_tool_output_suppressed_without_progress_callback_for_non_cli_agent(self, agent): + tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") + mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) + messages = [] + agent.platform = None + agent.tool_progress_callback = None + + with patch("run_agent.handle_function_call", return_value="search result"), \ + patch.object(agent, "_safe_print") as mock_print: + agent._execute_tool_calls(mock_msg, messages, "task-1") + + mock_print.assert_not_called() + assert len(messages) == 1 + assert messages[0]["role"] == "tool" + def test_vprint_suppressed_in_parseable_quiet_mode(self, agent): agent.suppress_status_output = True @@ -1876,6 +1892,30 @@ class TestRunConversation: assert all("message_count" in c and "messages" not in c for c in pre_request_calls) assert all("usage" in c and "response" not in c for c in post_request_calls) + def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent): + self._setup_agent(agent) + agent.platform = None + tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") + resp1 = _mock_response( + content="I'll search for that.", + finish_reason="tool_calls", + tool_calls=[tc], + ) + resp2 = _mock_response(content="Done searching", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [resp1, resp2] + + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_safe_print") as mock_print, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("search something") + + assert result["final_response"] == "Done searching" + mock_print.assert_not_called() + def test_interrupt_breaks_loop(self, agent): self._setup_agent(agent) From 175cf7e6bb4e629a5f121c8e6f3a56a5903105b7 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 12:57:17 +0530 Subject: [PATCH 019/455] fix: tighten quiet-mode salvage follow-ups Follow-up for the helix4u easy-fix salvage batch: - route remaining context-engine quiet-mode output through _should_emit_quiet_tool_messages() so non-CLI/library callers stay silent consistently - drop the extra senderAliases computation from WhatsApp allowlist-drop logging and remove the now-unused import This keeps the batch scoped to the intended fixes while avoiding leaked quiet-mode output and unnecessary duplicate work in the bridge. --- run_agent.py | 15 ++++----------- scripts/whatsapp-bridge/bridge.js | 3 +-- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/run_agent.py b/run_agent.py index 050faeea4f..8e1fbfed19 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8325,7 +8325,7 @@ class AIAgent: elif self._context_engine_tool_names and function_name in self._context_engine_tool_names: # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.) spinner = None - if self.quiet_mode and not self.tool_progress_callback: + if self._should_emit_quiet_tool_messages(): face = random.choice(KawaiiSpinner.get_waiting_faces()) emoji = _get_tool_emoji(function_name) preview = _build_tool_preview(function_name, function_args) or function_name @@ -8343,7 +8343,7 @@ class AIAgent: cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result) if spinner: spinner.stop(cute_msg) - elif self.quiet_mode: + elif self._should_emit_quiet_tool_messages(): self._vprint(f" {cute_msg}") elif self._memory_manager and self._memory_manager.has_tool(function_name): # Memory provider tools (hindsight_retain, honcho_search, etc.) @@ -11187,17 +11187,10 @@ class AIAgent: self._last_content_tools_all_housekeeping = _all_housekeeping if _all_housekeeping and self._has_stream_consumers(): self._mute_post_response = True - elif self.quiet_mode and getattr(self, "platform", "") == "cli": + elif self._should_emit_quiet_tool_messages(): clean = self._strip_think_blocks(turn_content).strip() if clean: - relayed = False - if ( - self.tool_progress_callback - and getattr(self, "platform", "") == "tui" - ): - relayed = True - if not relayed: - self._vprint(f" ┊ 💬 {clean}") + self._vprint(f" ┊ 💬 {clean}") # Pop thinking-only prefill message(s) before appending # (tool-call path — same rationale as the final-response path). diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 9af85caeea..401651c8a8 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -26,7 +26,7 @@ import path from 'path'; import { mkdirSync, readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; import { randomBytes } from 'crypto'; import qrcode from 'qrcode-terminal'; -import { expandWhatsAppIdentifiers, matchesAllowedUser, parseAllowedUsers } from './allowlist.js'; +import { matchesAllowedUser, parseAllowedUsers } from './allowlist.js'; // Parse CLI args const args = process.argv.slice(2); @@ -235,7 +235,6 @@ async function startSocket() { reason: 'allowlist_mismatch', chatId, senderId, - senderAliases: Array.from(expandWhatsAppIdentifiers(senderId, SESSION_DIR)), })); } catch {} continue; From c94d26c69bf57539f8a53936854b1a8925d70262 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Fri, 17 Apr 2026 13:51:14 -0600 Subject: [PATCH 020/455] fix(cli): sanitize interactive command output --- cli.py | 111 ++++++++++++++++--------------- tests/cli/test_gquota_command.py | 21 ++++++ tests/cli/test_quick_commands.py | 14 ++++ 3 files changed, 94 insertions(+), 52 deletions(-) create mode 100644 tests/cli/test_gquota_command.py diff --git a/cli.py b/cli.py index c9ce95e9f2..e814e35b12 100644 --- a/cli.py +++ b/cli.py @@ -1810,7 +1810,7 @@ class HermesCLI: mcp_names = set((CLI_CONFIG.get("mcp_servers") or {}).keys()) invalid = [t for t in toolsets if not validate_toolset(t) and t not in mcp_names] if invalid: - self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") + self._console_print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") # Filesystem checkpoints: CLI flag > config cp_cfg = CLI_CONFIG.get("checkpoints", {}) @@ -2261,7 +2261,7 @@ class HermesCLI: normalized_model = normalize_model_for_provider(current_model, resolved_provider) if normalized_model and normalized_model != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]" ) self.model = normalized_model @@ -2277,7 +2277,7 @@ class HermesCLI: canonical = normalize_copilot_model_id(current_model, api_key=self.api_key) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Normalized Copilot model '{current_model}' to '{canonical}'.[/]" ) self.model = canonical @@ -2299,7 +2299,7 @@ class HermesCLI: canonical = normalize_opencode_model_id(resolved_provider, current_model) if canonical and canonical != current_model: if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]" ) self.model = canonical @@ -2321,7 +2321,7 @@ class HermesCLI: if "/" in current_model: slug = current_model.split("/", 1)[1] if not self._model_is_default: - self.console.print( + self._console_print( f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; " f"using '{slug}' for OpenAI Codex.[/]" ) @@ -3070,7 +3070,7 @@ class HermesCLI: use_compact = self.compact or term_width < 80 if use_compact: - self.console.print(_build_compact_banner()) + self._console_print(_build_compact_banner()) self._show_status() else: # Get tools for display @@ -3095,25 +3095,25 @@ class HermesCLI: # Warn about very low context lengths (common with local servers) if ctx_len and ctx_len <= 8192: - self.console.print() - self.console.print( + self._console_print() + self._console_print( f"[yellow]⚠️ Context length is only {ctx_len:,} tokens — " f"this is likely too low for agent use with tools.[/]" ) - self.console.print( + self._console_print( "[dim] Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]" ) base_url = getattr(self, "base_url", "") or "" if "11434" in base_url or "ollama" in base_url.lower(): - self.console.print( + self._console_print( "[dim] Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]" ) elif "1234" in base_url: - self.console.print( + self._console_print( "[dim] LM Studio fix: Set context length in model settings → reload model[/]" ) else: - self.console.print( + self._console_print( "[dim] Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]" ) @@ -3122,20 +3122,20 @@ class HermesCLI: model_name = getattr(self, "model", "") or "" if is_nous_hermes_non_agentic(model_name): - self.console.print() - self.console.print( + self._console_print() + self._console_print( "[bold yellow]⚠ Nous Research Hermes 3 & 4 models are NOT agentic and are not " "designed for use with Hermes Agent.[/]" ) - self.console.print( + self._console_print( "[dim] They lack tool-calling capabilities required for agent workflows. " "Consider using an agentic model (Claude, GPT, Gemini, DeepSeek, etc.).[/]" ) - self.console.print( + self._console_print( "[dim] Switch with: /model sonnet or /model gpt5[/]" ) - self.console.print() + self._console_print() def _preload_resumed_session(self) -> bool: """Load a resumed session's history from the DB early (before first chat). @@ -3153,10 +3153,10 @@ class HermesCLI: session_meta = self._session_db.get_session(self.session_id) if not session_meta: - self.console.print( + self._console_print( f"[bold red]Session not found: {self.session_id}[/]" ) - self.console.print( + self._console_print( "[dim]Use a session ID from a previous CLI run " "(hermes sessions list).[/]" ) @@ -3171,7 +3171,7 @@ class HermesCLI: if session_meta.get("title"): title_part = f' "{session_meta["title"]}"' accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]" f"{title_part} " f"({msg_count} user message{'s' if msg_count != 1 else ''}, " @@ -3179,7 +3179,7 @@ class HermesCLI: ) else: accent_color = _accent_hex() - self.console.print( + self._console_print( f"[{accent_color}]Session {self.session_id} found but has no " f"messages. Starting fresh.[/]" ) @@ -3354,7 +3354,7 @@ class HermesCLI: padding=(0, 1), style=_history_text_c, ) - self.console.print(panel) + self._console_print(panel) def _try_attach_clipboard_image(self) -> bool: """Check clipboard for an image and attach it if found. @@ -3790,14 +3790,14 @@ class HermesCLI: api_key_missing = [u for u in unavailable if u["missing_vars"]] if api_key_missing: - self.console.print() - self.console.print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") + self._console_print() + self._console_print("[yellow]⚠️ Some tools disabled (missing API keys):[/]") for item in api_key_missing: tools_str = ", ".join(item["tools"][:2]) # Show first 2 tools if len(item["tools"]) > 2: tools_str += f", +{len(item['tools'])-2} more" - self.console.print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") - self.console.print("[dim] Run 'hermes setup' to configure[/]") + self._console_print(f" [dim]• {item['name']}[/] [dim italic]({', '.join(item['missing_vars'])})[/]") + self._console_print("[dim] Run 'hermes setup' to configure[/]") except Exception: pass # Don't crash on import errors @@ -3835,7 +3835,7 @@ class HermesCLI: if self._provider_source: provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]" - self.console.print( + self._console_print( f" {api_indicator} [{accent_color}]{model_short}[/] " f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]" f"{toolsets_info}{provider_info}" @@ -3892,7 +3892,7 @@ class HermesCLI: f"Tokens: {total_tokens:,}", f"Agent Running: {'Yes' if is_running else 'No'}", ]) - self.console.print("\n".join(lines), highlight=False, markup=False) + self._console_print("\n".join(lines), highlight=False, markup=False) def _fast_command_available(self) -> bool: try: @@ -5090,8 +5090,15 @@ class HermesCLI: print(" To change model or provider, use: hermes model") + def _output_console(self): + """Use prompt_toolkit-safe Rich rendering once the TUI is live.""" + if getattr(self, "_app", None): + return ChatConsole() + return self.console - + def _console_print(self, *args, **kwargs): + """Print through the active command-safe console.""" + self._output_console().print(*args, **kwargs) @staticmethod def _resolve_personality_prompt(value) -> str: @@ -5111,14 +5118,14 @@ class HermesCLI: from agent.google_oauth import get_valid_access_token, GoogleOAuthError, load_credentials from agent.google_code_assist import retrieve_user_quota, CodeAssistError except ImportError as exc: - self.console.print(f" [red]Gemini modules unavailable: {exc}[/]") + self._console_print(f" [red]Gemini modules unavailable: {exc}[/]") return try: access_token = get_valid_access_token() except GoogleOAuthError as exc: - self.console.print(f" [yellow]{exc}[/]") - self.console.print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.") + self._console_print(f" [yellow]{exc}[/]") + self._console_print(" Run [bold]/model[/] and pick 'Google Gemini (OAuth)' to sign in.") return creds = load_credentials() @@ -5127,18 +5134,18 @@ class HermesCLI: try: buckets = retrieve_user_quota(access_token, project_id=project_id) except CodeAssistError as exc: - self.console.print(f" [red]Quota lookup failed:[/] {exc}") + self._console_print(f" [red]Quota lookup failed:[/] {exc}") return if not buckets: - self.console.print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]") + self._console_print(" [dim]No quota buckets reported (account may be on legacy/unmetered tier).[/]") return # Sort for stable display, group by model buckets.sort(key=lambda b: (b.model_id, b.token_type)) - self.console.print() - self.console.print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})") - self.console.print() + self._console_print() + self._console_print(f" [bold]Gemini Code Assist quota[/] (project: {project_id or '(auto / free-tier)'})") + self._console_print() for b in buckets: pct = max(0.0, min(1.0, b.remaining_fraction)) width = 20 @@ -5148,8 +5155,8 @@ class HermesCLI: header = b.model_id if b.token_type: header += f" [{b.token_type}]" - self.console.print(f" {header:40s} {bar} {pct_str}") - self.console.print() + self._console_print(f" {header:40s} {bar} {pct_str}") + self._console_print() def _handle_personality_command(self, cmd: str): """Handle the /personality command to set predefined personalities.""" @@ -5597,7 +5604,7 @@ class HermesCLI: _tip_color = get_active_skin().get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass elif canonical == "history": @@ -5691,7 +5698,7 @@ class HermesCLI: elif canonical == "statusbar": self._status_bar_visible = not self._status_bar_visible state = "visible" if self._status_bar_visible else "hidden" - self.console.print(f" Status bar {state}") + self._console_print(f" Status bar {state}") elif canonical == "verbose": self._toggle_verbose() elif canonical == "yolo": @@ -5814,15 +5821,15 @@ class HermesCLI: ) output = result.stdout.strip() or result.stderr.strip() if output: - self.console.print(_rich_text_from_ansi(output)) + self._console_print(_rich_text_from_ansi(output)) else: - self.console.print("[dim]Command returned no output[/]") + self._console_print("[dim]Command returned no output[/]") except subprocess.TimeoutExpired: - self.console.print("[bold red]Quick command timed out (30s)[/]") + self._console_print("[bold red]Quick command timed out (30s)[/]") except Exception as e: - self.console.print(f"[bold red]Quick command error: {e}[/]") + self._console_print(f"[bold red]Quick command error: {e}[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]") elif qcmd.get("type") == "alias": target = qcmd.get("target", "").strip() if target: @@ -5831,9 +5838,9 @@ class HermesCLI: aliased_command = f"{target} {user_args}".strip() return self.process_command(aliased_command) else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]") else: - self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") + self._console_print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]") # Check for plugin-registered slash commands elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names(): from hermes_cli.plugins import get_plugin_command_handler @@ -8603,7 +8610,7 @@ class HermesCLI: except Exception: _welcome_text = "Welcome to Hermes Agent! Type your message or /help for commands." _welcome_color = "#FFF8DC" - self.console.print(f"[{_welcome_color}]{_welcome_text}[/]") + self._console_print(f"[{_welcome_color}]{_welcome_text}[/]") # Show a random tip to help users discover features try: from hermes_cli.tips import get_random_tip @@ -8612,16 +8619,16 @@ class HermesCLI: _tip_color = _welcome_skin.get_color("banner_dim", "#B8860B") except Exception: _tip_color = "#B8860B" - self.console.print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") + self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]") except Exception: pass # Tips are non-critical — never break startup if self.preloaded_skills and not self._startup_skills_line_shown: skills_label = ", ".join(self.preloaded_skills) - self.console.print( + self._console_print( f"[bold {_accent_hex()}]Activated skills:[/] {skills_label}" ) self._startup_skills_line_shown = True - self.console.print() + self._console_print() # State for async operation self._agent_running = False diff --git a/tests/cli/test_gquota_command.py b/tests/cli/test_gquota_command.py new file mode 100644 index 0000000000..0740e00126 --- /dev/null +++ b/tests/cli/test_gquota_command.py @@ -0,0 +1,21 @@ +from unittest.mock import MagicMock, patch + + +def test_gquota_uses_chat_console_when_tui_is_live(): + from agent.google_oauth import GoogleOAuthError + from cli import HermesCLI + + cli = HermesCLI.__new__(HermesCLI) + cli.console = MagicMock() + cli._app = object() + + live_console = MagicMock() + + with patch("cli.ChatConsole", return_value=live_console), \ + patch("agent.google_oauth.get_valid_access_token", side_effect=GoogleOAuthError("No Google OAuth credentials found")), \ + patch("agent.google_oauth.load_credentials", return_value=None), \ + patch("agent.google_code_assist.retrieve_user_quota"): + cli._handle_gquota_command("/gquota") + + assert live_console.print.call_count == 2 + cli.console.print.assert_not_called() diff --git a/tests/cli/test_quick_commands.py b/tests/cli/test_quick_commands.py index 7a89d4ca28..1c94cb1b02 100644 --- a/tests/cli/test_quick_commands.py +++ b/tests/cli/test_quick_commands.py @@ -33,6 +33,20 @@ class TestCLIQuickCommands: printed = self._printed_plain(cli.console.print.call_args[0][0]) assert printed == "daily-note" + def test_exec_command_uses_chat_console_when_tui_is_live(self): + cli = self._make_cli({"dn": {"type": "exec", "command": "echo daily-note"}}) + cli._app = object() + live_console = MagicMock() + + with patch("cli.ChatConsole", return_value=live_console): + result = cli.process_command("/dn") + + assert result is True + live_console.print.assert_called_once() + printed = self._printed_plain(live_console.print.call_args[0][0]) + assert printed == "daily-note" + cli.console.print.assert_not_called() + def test_exec_command_stderr_shown_on_no_stdout(self): cli = self._make_cli({"err": {"type": "exec", "command": "echo error >&2"}}) result = cli.process_command("/err") From e0171314030fa5fad2e7e7e96c116c98a0178e33 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sat, 18 Apr 2026 19:30:07 -0700 Subject: [PATCH 021/455] =?UTF-8?q?feat(cron):=20add=20wakeAgent=20gate=20?= =?UTF-8?q?=E2=80=94=20scripts=20can=20skip=20the=20agent=20entirely?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the existing cron script hook with a wake gate ported from nanoclaw #1232. When a cron job's pre-check Python script (already sandboxed to HERMES_HOME/scripts/) writes a JSON line like ```json {"wakeAgent": false} ``` on its last stdout line, `run_job()` returns the SILENT marker and skips the agent entirely — no LLM call, no delivery, no tokens spent. Useful for frequent polls (every 1-5 min) that only need to wake the agent when something has genuinely changed. Any other script output (non-JSON, missing key, non-dict, `wakeAgent: true`, truthy/falsy non-False values) behaves as before: stdout is injected as context and the agent runs normally. Strict `False` is required to skip — avoids accidental gating from arbitrary JSON. Refactor: - New pure helper `_parse_wake_gate(script_output)` in cron/scheduler.py - `_build_job_prompt` accepts optional `prerun_script` tuple so the script runs exactly once per job (run_job runs it for the gate check, reuses the output for prompt injection) - `run_job` short-circuits with SILENT_MARKER when gate fires Script failures (success=False) still cannot trigger the gate — the failure is reported as context to the agent as before. This replaces the approach in closed PR #3837, which inlined bash scripts via tempfile and lost the path-traversal/scripts-dir sandbox that main's impl has. The wake-gate idea (the one net-new capability) is ported on top of the existing sandboxed Python-script model. Tests: - 11 pure unit tests for _parse_wake_gate (empty, whitespace, non-JSON, non-dict JSON, missing key, truthy/falsy non-False, multi-line, trailing blanks, non-last-line JSON) - 5 integration tests for run_job wake-gate (skip returns SILENT, wake-true passes through, script-runs-only-once, script failure doesn't gate, no-script regression) - Full tests/cron/ suite: 194/194 pass --- cron/scheduler.py | 69 +++++++++++++- tests/cron/test_scheduler.py | 174 +++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 4 deletions(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index 8938063c7f..6e93fc02fe 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -564,15 +564,53 @@ def _run_job_script(script_path: str) -> tuple[bool, str]: return False, f"Script execution failed: {exc}" -def _build_job_prompt(job: dict) -> str: - """Build the effective prompt for a cron job, optionally loading one or more skills first.""" +def _parse_wake_gate(script_output: str) -> bool: + """Parse the last non-empty stdout line of a cron job's pre-check script + as a wake gate. + + The convention (ported from nanoclaw #1232): if the last stdout line is + JSON like ``{"wakeAgent": false}``, the agent is skipped entirely — no + LLM run, no delivery. Any other output (non-JSON, missing flag, gate + absent, or ``wakeAgent: true``) means wake the agent normally. + + Returns True if the agent should wake, False to skip. + """ + if not script_output: + return True + stripped_lines = [line for line in script_output.splitlines() if line.strip()] + if not stripped_lines: + return True + last_line = stripped_lines[-1].strip() + try: + gate = json.loads(last_line) + except (json.JSONDecodeError, ValueError): + return True + if not isinstance(gate, dict): + return True + return gate.get("wakeAgent", True) is not False + + +def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: + """Build the effective prompt for a cron job, optionally loading one or more skills first. + + Args: + job: The cron job dict. + prerun_script: Optional ``(success, stdout)`` from a script that has + already been executed by the caller (e.g. for a wake-gate check). + When provided, the script is not re-executed and the cached + result is used for prompt injection. When omitted, the script + (if any) runs inline as before. + """ prompt = job.get("prompt", "") skills = job.get("skills") # Run data-collection script if configured, inject output as context. script_path = job.get("script") if script_path: - success, script_output = _run_job_script(script_path) + if prerun_script is not None: + success, script_output = prerun_script + else: + success, script_output = _run_job_script(script_path) if success: if script_output: prompt = ( @@ -674,7 +712,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: job_id = job["id"] job_name = job["name"] - prompt = _build_job_prompt(job) + + # Wake-gate: if this job has a pre-check script, run it BEFORE building + # the prompt so a ``{"wakeAgent": false}`` response can short-circuit + # the whole agent run. We pass the result into _build_job_prompt so + # the script is only executed once. + prerun_script = None + script_path = job.get("script") + if script_path: + prerun_script = _run_job_script(script_path) + _ran_ok, _script_output = prerun_script + if _ran_ok and not _parse_wake_gate(_script_output): + logger.info( + "Job '%s' (ID: %s): wakeAgent=false, skipping agent run", + job_name, job_id, + ) + silent_doc = ( + f"# Cron Job: {job_name}\n\n" + f"**Job ID:** {job_id}\n" + f"**Run Time:** {_hermes_now().strftime('%Y-%m-%d %H:%M:%S')}\n\n" + "Script gate returned `wakeAgent=false` — agent skipped.\n" + ) + return True, silent_doc, SILENT_MARKER, None + + prompt = _build_job_prompt(job, prerun_script=prerun_script) origin = _resolve_origin(job) _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index 2717584e46..b889ede372 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1175,6 +1175,180 @@ class TestBuildJobPromptSilentHint: assert system_pos < prompt_pos +class TestParseWakeGate: + """Unit tests for _parse_wake_gate — pure function, no side effects.""" + + def test_empty_output_wakes(self): + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate("") is True + assert _parse_wake_gate(None) is True + + def test_whitespace_only_wakes(self): + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate(" \n\n \t\n") is True + + def test_non_json_last_line_wakes(self): + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate("hello world") is True + assert _parse_wake_gate("line 1\nline 2\nplain text") is True + + def test_json_non_dict_wakes(self): + """Bare arrays, numbers, strings must not be interpreted as a gate.""" + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate("[1, 2, 3]") is True + assert _parse_wake_gate("42") is True + assert _parse_wake_gate('"wakeAgent"') is True + + def test_wake_gate_false_skips(self): + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate('{"wakeAgent": false}') is False + + def test_wake_gate_true_wakes(self): + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate('{"wakeAgent": true}') is True + + def test_wake_gate_missing_wakes(self): + """A JSON dict without a wakeAgent key defaults to waking.""" + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate('{"data": {"foo": "bar"}}') is True + + def test_non_boolean_false_still_wakes(self): + """Only strict ``False`` skips — truthy/falsy shortcuts are too risky.""" + from cron.scheduler import _parse_wake_gate + assert _parse_wake_gate('{"wakeAgent": 0}') is True + assert _parse_wake_gate('{"wakeAgent": null}') is True + assert _parse_wake_gate('{"wakeAgent": ""}') is True + + def test_only_last_non_empty_line_parsed(self): + from cron.scheduler import _parse_wake_gate + multi = 'some log output\nmore output\n{"wakeAgent": false}' + assert _parse_wake_gate(multi) is False + + def test_trailing_blank_lines_ignored(self): + from cron.scheduler import _parse_wake_gate + multi = '{"wakeAgent": false}\n\n\n' + assert _parse_wake_gate(multi) is False + + def test_non_last_json_line_does_not_gate(self): + """A JSON gate on an earlier line with plain text after it does NOT trigger.""" + from cron.scheduler import _parse_wake_gate + multi = '{"wakeAgent": false}\nactually this is the real output' + assert _parse_wake_gate(multi) is True + + +class TestRunJobWakeGate: + """Integration tests for run_job wake-gate short-circuit.""" + + def _make_job(self, name="wake-gate-test", script="check.py"): + """Minimal valid cron job dict for run_job.""" + return { + "id": f"job_{name}", + "name": name, + "prompt": "Do a thing", + "schedule": "*/5 * * * *", + "script": script, + } + + def test_wake_false_skips_agent_and_returns_silent(self, caplog): + """When _run_job_script output ends with {wakeAgent: false}, the agent + is not invoked and run_job returns the SILENT marker so delivery is + suppressed.""" + from cron.scheduler import SILENT_MARKER + import cron.scheduler as scheduler + + with patch.object(scheduler, "_run_job_script", + return_value=(True, '{"wakeAgent": false}')), \ + patch("run_agent.AIAgent") as agent_cls: + success, doc, final, err = scheduler.run_job(self._make_job()) + + assert success is True + assert err is None + assert final == SILENT_MARKER + assert "Script gate returned `wakeAgent=false`" in doc + agent_cls.assert_not_called() + + def test_wake_true_runs_agent_with_injected_output(self): + """When the script returns {wakeAgent: true, data: ...}, the agent is + invoked and the data line still shows up in the prompt.""" + import cron.scheduler as scheduler + + script_output = '{"wakeAgent": true, "data": {"new": 3}}' + agent = MagicMock() + agent.run_conversation = MagicMock(return_value={ + "final_response": "ok", "messages": [] + }) + with patch.object(scheduler, "_run_job_script", + return_value=(True, script_output)), \ + patch("run_agent.AIAgent", return_value=agent) as agent_cls: + success, doc, final, err = scheduler.run_job(self._make_job()) + + agent_cls.assert_called_once() + # The script output should be visible in the prompt passed to + # run_conversation. + call_kwargs = agent.run_conversation.call_args + prompt_arg = call_kwargs.args[0] if call_kwargs.args else call_kwargs.kwargs.get("user_message", "") + assert script_output in prompt_arg + assert success is True + assert err is None + + def test_script_runs_only_once_on_wake(self): + """Wake-true path must not re-run the script inside _build_job_prompt + (script would execute twice otherwise, wasting work and risking + double-side-effects).""" + import cron.scheduler as scheduler + + call_count = 0 + def _script_stub(path): + nonlocal call_count + call_count += 1 + return (True, "regular output") + + agent = MagicMock() + agent.run_conversation = MagicMock(return_value={ + "final_response": "ok", "messages": [] + }) + with patch.object(scheduler, "_run_job_script", side_effect=_script_stub), \ + patch("run_agent.AIAgent", return_value=agent): + scheduler.run_job(self._make_job()) + + assert call_count == 1, f"script ran {call_count}x, expected exactly 1" + + def test_script_failure_does_not_trigger_gate(self): + """If _run_job_script returns success=False, the gate is NOT evaluated + and the agent still runs (the failure is reported as context).""" + import cron.scheduler as scheduler + + # Malicious or broken script whose stderr happens to contain the + # gate JSON — we must NOT honor it because ran_ok is False. + agent = MagicMock() + agent.run_conversation = MagicMock(return_value={ + "final_response": "ok", "messages": [] + }) + with patch.object(scheduler, "_run_job_script", + return_value=(False, '{"wakeAgent": false}')), \ + patch("run_agent.AIAgent", return_value=agent) as agent_cls: + success, doc, final, err = scheduler.run_job(self._make_job()) + + agent_cls.assert_called_once() # Agent DID wake despite the gate-like text + + def test_no_script_path_runs_agent_normally(self): + """Regression: jobs without a script still work.""" + import cron.scheduler as scheduler + + agent = MagicMock() + agent.run_conversation = MagicMock(return_value={ + "final_response": "ok", "messages": [] + }) + job = self._make_job(script=None) + job.pop("script", None) + with patch.object(scheduler, "_run_job_script") as script_fn, \ + patch("run_agent.AIAgent", return_value=agent) as agent_cls: + scheduler.run_job(job) + + script_fn.assert_not_called() + agent_cls.assert_called_once() + + class TestBuildJobPromptMissingSkill: """Verify that a missing skill logs a warning and does not crash the job.""" From 1d1e1277e496f3b8d2742e4c8ce83b47dde5fa23 Mon Sep 17 00:00:00 2001 From: konsisumer Date: Sat, 18 Apr 2026 07:10:05 +0200 Subject: [PATCH 022/455] fix(gateway): flush undelivered tail before segment reset to preserve streamed text (#8124) When a streaming edit fails mid-stream (flood control, transport error) and a tool boundary arrives before the fallback threshold is reached, the pre-boundary tail in `_accumulated` was silently discarded by `_reset_segment_state`. The user saw a frozen partial message and missing words on the other side of the tool call. Flush the undelivered tail as a continuation message before the reset, computed relative to the last successfully-delivered prefix so we don't duplicate content the user already saw. --- gateway/stream_consumer.py | 48 ++++++++++++++++++++++ tests/gateway/test_stream_consumer.py | 59 ++++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 2 deletions(-) diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index ae00aee392..146715b164 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -430,6 +430,21 @@ class GatewayStreamConsumer: # a real string like "msg_1", not "__no_edit__", so that case # still resets and creates a fresh segment as intended.) if got_segment_break: + # If the segment-break edit failed to deliver the + # accumulated content (flood control that has not yet + # promoted to fallback mode, or fallback mode itself), + # _accumulated still holds pre-boundary text the user + # never saw. Flush that tail as a continuation message + # before the reset below wipes _accumulated — otherwise + # text generated before the tool boundary is silently + # dropped (issue #8124). + if ( + self._accumulated + and not current_update_visible + and self._message_id + and self._message_id != "__no_edit__" + ): + await self._flush_segment_tail_on_edit_failure() self._reset_segment_state(preserve_no_edit=True) await asyncio.sleep(0.05) # Small yield to not busy-loop @@ -620,6 +635,39 @@ class GatewayStreamConsumer: err_lower = err.lower() return "flood" in err_lower or "retry after" in err_lower or "rate" in err_lower + async def _flush_segment_tail_on_edit_failure(self) -> None: + """Deliver un-sent tail content before a segment-break reset. + + When an edit fails (flood control, transport error) and a tool + boundary arrives before the next retry, ``_accumulated`` holds text + that was generated but never shown to the user. Without this flush, + the segment reset would discard that tail and leave a frozen cursor + in the partial message. + + Sends the tail that sits after the last successfully-delivered + prefix as a new message, and best-effort strips the stuck cursor + from the previous partial message. + """ + if not self._fallback_final_send: + await self._try_strip_cursor() + visible = self._fallback_prefix or self._visible_prefix() + tail = self._accumulated + if visible and tail.startswith(visible): + tail = tail[len(visible):].lstrip() + tail = self._clean_for_display(tail) + if not tail.strip(): + return + try: + result = await self.adapter.send( + chat_id=self.chat_id, + content=tail, + metadata=self.metadata, + ) + if result.success: + self._already_sent = True + except Exception as e: + logger.error("Segment-break tail flush error: %s", e) + async def _try_strip_cursor(self) -> None: """Best-effort edit to remove the cursor from the last visible message. diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index 99ac4dc188..3063196f41 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -502,11 +502,13 @@ class TestSegmentBreakOnToolBoundary: @pytest.mark.asyncio async def test_segment_break_clears_failed_edit_fallback_state(self): - """A tool boundary after edit failure must not duplicate the next segment.""" + """A tool boundary after edit failure must flush the undelivered tail + without duplicating the prefix the user already saw (#8124).""" adapter = MagicMock() send_results = [ SimpleNamespace(success=True, message_id="msg_1"), SimpleNamespace(success=True, message_id="msg_2"), + SimpleNamespace(success=True, message_id="msg_3"), ] adapter.send = AsyncMock(side_effect=send_results) adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6")) @@ -526,7 +528,60 @@ class TestSegmentBreakOnToolBoundary: await task sent_texts = [call[1]["content"] for call in adapter.send.call_args_list] - assert sent_texts == ["Hello ▉", "Next segment"] + # The undelivered "world" tail must reach the user, and the next + # segment must not duplicate "Hello" that was already visible. + assert sent_texts == ["Hello ▉", "world", "Next segment"] + + @pytest.mark.asyncio + async def test_segment_break_after_mid_stream_edit_failure_preserves_tail(self): + """Regression for #8124: when an earlier edit succeeded but later edits + fail (persistent flood control) and a tool boundary arrives before the + fallback threshold is reached, the pre-boundary tail must still be + delivered — not silently dropped by the segment reset.""" + adapter = MagicMock() + # msg_1 for the initial partial, msg_2 for the flushed tail, + # msg_3 for the post-boundary segment. + send_results = [ + SimpleNamespace(success=True, message_id="msg_1"), + SimpleNamespace(success=True, message_id="msg_2"), + SimpleNamespace(success=True, message_id="msg_3"), + ] + adapter.send = AsyncMock(side_effect=send_results) + + # First two edits succeed, everything after fails with flood control + # — simulating Telegram's "edit once then get rate-limited" pattern. + edit_results = [ + SimpleNamespace(success=True), # "Hello world ▉" — succeeds + SimpleNamespace(success=False, error="flood_control:6.0"), # "Hello world more ▉" — flood triggered + SimpleNamespace(success=False, error="flood_control:6.0"), # finalize edit at segment break + SimpleNamespace(success=False, error="flood_control:6.0"), # cursor-strip attempt + ] + adapter.edit_message = AsyncMock(side_effect=edit_results + [edit_results[-1]] * 10) + adapter.MAX_MESSAGE_LENGTH = 4096 + + config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉") + consumer = GatewayStreamConsumer(adapter, "chat_123", config) + + consumer.on_delta("Hello") + task = asyncio.create_task(consumer.run()) + await asyncio.sleep(0.08) + consumer.on_delta(" world") + await asyncio.sleep(0.08) + consumer.on_delta(" more") + await asyncio.sleep(0.08) + consumer.on_delta(None) # tool boundary + consumer.on_delta("Here is the tool result.") + consumer.finish() + await task + + sent_texts = [call[1]["content"] for call in adapter.send.call_args_list] + # "more" must have been delivered, not dropped. + all_text = " ".join(sent_texts) + assert "more" in all_text, ( + f"Pre-boundary tail 'more' was silently dropped: sends={sent_texts}" + ) + # Post-boundary text must also reach the user. + assert "Here is the tool result." in all_text @pytest.mark.asyncio async def test_no_message_id_enters_fallback_mode(self): From 62ce6a38ae8de84b7af5772672009f11ada1ef0e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:48:42 -0700 Subject: [PATCH 023/455] fix(gateway): cancel_background_tasks must drain late-arrivals (#12471) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During gateway shutdown, a message arriving while cancel_background_tasks is mid-await (inside asyncio.gather) spawns a fresh _process_message_background task via handle_message and adds it to self._background_tasks. The original implementation's _background_tasks.clear() at the end of cancel_background_tasks dropped the reference; the task ran untracked against a disconnecting adapter, logged send-failures, and lingered until it completed on its own. Fix: wrap the cancel+gather in a bounded loop (MAX_DRAIN_ROUNDS=5). If new tasks appeared during the gather, cancel them in the next round. The .clear() at the end is preserved as a safety net for any task that appeared after MAX_DRAIN_ROUNDS — but in practice the drain stabilizes in 1-2 rounds. Tests: tests/gateway/test_cancel_background_drain.py — 3 cases. - test_cancel_background_tasks_drains_late_arrivals: spawn M1, start cancel, inject M2 during M1's shielded cleanup, verify M2 is cancelled. - test_cancel_background_tasks_handles_no_tasks: no-op path still terminates cleanly. - test_cancel_background_tasks_bounded_rounds: baseline — single task cancels in one round, loop terminates. Regression-guard validated: against the unpatched implementation, the late-arrival test fails with exactly the expected message ('task leaked'). With the fix it passes. Blast radius is shutdown-only; the audit classified this as MED. Shipping because the fix is small and the hygiene is worth it. While investigating the audit's other MEDs (busy-handler double-ack, Discord ExecApprovalView double-resolve, UpdatePromptView double-resolve), I verified all three were false positives — the check-and-set patterns have no await between them, so they're atomic on single-threaded asyncio. No fix needed for those. --- gateway/platforms/base.py | 24 ++- tests/gateway/test_cancel_background_drain.py | 148 ++++++++++++++++++ 2 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 tests/gateway/test_cancel_background_drain.py diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 65f7226e10..645a642ba1 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -2033,12 +2033,26 @@ class BasePlatformAdapter(ABC): Used during gateway shutdown/replacement so active sessions from the old process do not keep running after adapters are being torn down. """ - tasks = [task for task in self._background_tasks if not task.done()] - for task in tasks: - self._expected_cancelled_tasks.add(task) - task.cancel() - if tasks: + # Loop until no new tasks appear. Without this, a message + # arriving during the `await asyncio.gather` below would spawn + # a fresh _process_message_background task (added to + # self._background_tasks at line ~1668 via handle_message), + # and the _background_tasks.clear() at the end of this method + # would drop the reference — the task runs untracked against a + # disconnecting adapter, logs send-failures, and may linger + # until it completes on its own. Retrying the drain until the + # task set stabilizes closes the window. + MAX_DRAIN_ROUNDS = 5 + for _ in range(MAX_DRAIN_ROUNDS): + tasks = [task for task in self._background_tasks if not task.done()] + if not tasks: + break + for task in tasks: + self._expected_cancelled_tasks.add(task) + task.cancel() await asyncio.gather(*tasks, return_exceptions=True) + # Loop: late-arrival tasks spawned during the gather above + # will be in self._background_tasks now. Re-check. self._background_tasks.clear() self._expected_cancelled_tasks.clear() self._pending_messages.clear() diff --git a/tests/gateway/test_cancel_background_drain.py b/tests/gateway/test_cancel_background_drain.py new file mode 100644 index 0000000000..c95fdc062e --- /dev/null +++ b/tests/gateway/test_cancel_background_drain.py @@ -0,0 +1,148 @@ +"""Regression test: cancel_background_tasks must drain late-arrival tasks. + +During gateway shutdown, a message arriving while +cancel_background_tasks is mid-await can spawn a fresh +_process_message_background task via handle_message, which is added +to self._background_tasks. Without the re-drain loop, the subsequent +_background_tasks.clear() drops the reference; the task runs +untracked against a disconnecting adapter. +""" + +import asyncio +from unittest.mock import AsyncMock + +import pytest + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType +from gateway.session import SessionSource, build_session_key + + +class _StubAdapter(BasePlatformAdapter): + async def connect(self): + pass + + async def disconnect(self): + pass + + async def send(self, chat_id, text, **kwargs): + return None + + async def get_chat_info(self, chat_id): + return {} + + +def _make_adapter(): + adapter = _StubAdapter(PlatformConfig(enabled=True, token="t"), Platform.TELEGRAM) + adapter._send_with_retry = AsyncMock(return_value=None) + return adapter + + +def _event(text, cid="42"): + return MessageEvent( + text=text, + message_type=MessageType.TEXT, + source=SessionSource(platform=Platform.TELEGRAM, chat_id=cid, chat_type="dm"), + ) + + +@pytest.mark.asyncio +async def test_cancel_background_tasks_drains_late_arrivals(): + """A message that arrives during the gather window must be picked + up by the re-drain loop, not leaked as an untracked task.""" + adapter = _make_adapter() + sk = build_session_key( + SessionSource(platform=Platform.TELEGRAM, chat_id="42", chat_type="dm") + ) + + m1_started = asyncio.Event() + m1_cleanup_running = asyncio.Event() + m2_started = asyncio.Event() + m2_cancelled = asyncio.Event() + + async def handler(event): + if event.text == "M1": + m1_started.set() + try: + await asyncio.sleep(10) + except asyncio.CancelledError: + m1_cleanup_running.set() + # Widen the gather window with a shielded cleanup + # delay so M2 can get injected during it. + await asyncio.shield(asyncio.sleep(0.2)) + raise + else: # M2 — the late arrival + m2_started.set() + try: + await asyncio.sleep(10) + except asyncio.CancelledError: + m2_cancelled.set() + raise + + adapter._message_handler = handler + + # Spawn M1. + await adapter.handle_message(_event("M1")) + await asyncio.wait_for(m1_started.wait(), timeout=1.0) + + # Kick off shutdown. This will cancel M1 and await its cleanup. + cancel_task = asyncio.create_task(adapter.cancel_background_tasks()) + + # Wait until M1's cleanup is running (inside the shielded sleep). + # This is the race window: cancel_task is awaiting gather, M1 is + # shielded in cleanup, the _active_sessions entry has been cleared + # by M1's own finally. + await asyncio.wait_for(m1_cleanup_running.wait(), timeout=1.0) + + # Clear the active-session entry (M1's finally hasn't fully run yet, + # but in production the platform dispatcher would deliver a new + # message that takes the no-active-session spawn path). For this + # repro, make it deterministic. + adapter._active_sessions.pop(sk, None) + + # Inject late arrival — spawns a fresh _process_message_background + # task and adds it to _background_tasks while cancel_task is still + # in gather. + await adapter.handle_message(_event("M2")) + await asyncio.wait_for(m2_started.wait(), timeout=1.0) + + # Let cancel_task finish. Round 1's gather completes when M1's + # shielded cleanup finishes. Round 2 should pick up M2. + await asyncio.wait_for(cancel_task, timeout=5.0) + + # Assert M2 was drained, not leaked. + assert m2_cancelled.is_set(), ( + "Late-arrival M2 was NOT cancelled by cancel_background_tasks — " + "the re-drain loop is missing and the task leaked" + ) + assert adapter._background_tasks == set() + + +@pytest.mark.asyncio +async def test_cancel_background_tasks_handles_no_tasks(): + """Regression guard: no tasks, no hang, no error.""" + adapter = _make_adapter() + await adapter.cancel_background_tasks() + assert adapter._background_tasks == set() + + +@pytest.mark.asyncio +async def test_cancel_background_tasks_bounded_rounds(): + """Regression guard: the drain loop is bounded — it does not spin + forever even if late-arrival tasks keep getting spawned.""" + adapter = _make_adapter() + + # Single well-behaved task that cancels cleanly — baseline check + # that the loop terminates in one round. + async def quick(): + try: + await asyncio.sleep(10) + except asyncio.CancelledError: + raise + + task = asyncio.create_task(quick()) + adapter._background_tasks.add(task) + + await adapter.cancel_background_tasks() + assert task.done() + assert adapter._background_tasks == set() From b668c09ab2e4a4edeceea04da9521329669b9391 Mon Sep 17 00:00:00 2001 From: Tranquil-Flow Date: Sun, 19 Apr 2026 01:48:33 -0700 Subject: [PATCH 024/455] fix(gateway): strip cursor from frozen message on empty fallback continuation (#7183) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When _send_fallback_final() is called with nothing new to deliver (the visible partial already matches final_text), the last edit may still show the cursor character because fallback mode was entered after a failed edit. Before this fix the early-return path left _already_sent = True without attempting to strip the cursor, so the message stayed frozen with a visible ▉ permanently. Adds a best-effort edit inside the empty-continuation branch to clean the cursor off the last-sent text. Harmless when fallback mode wasn't actually armed or when the cursor isn't present. If the strip edit itself fails (flood still active), we return without crashing and without corrupting _last_sent_text. Adapted from PR #7429 onto current main — the surrounding fallback block grew the #10807 stale-prefix handling since #7429 was written, so the cursor strip lives in the new else-branch where we still return early. 3 unit tests covering: cursor stripped on empty continuation, no edit attempted when cursor is not configured, cursor-strip edit failure handled without crash. Originally proposed as PR #7429. --- gateway/stream_consumer.py | 24 ++++++++ tests/gateway/test_stream_consumer.py | 84 +++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index 146715b164..78e365712d 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -571,6 +571,30 @@ class GatewayStreamConsumer: if final_text.strip() and final_text != self._visible_prefix(): continuation = final_text else: + # Defence-in-depth for #7183: the last edit may still show the + # cursor character because fallback mode was entered after an + # edit failure left it stuck. Try one final edit to strip it + # so the message doesn't freeze with a visible ▉. Best-effort + # — if this edit also fails (flood control still active), + # _try_strip_cursor has already been called on fallback entry + # and the adaptive-backoff retries will have had their shot. + if ( + self._message_id + and self._last_sent_text + and self.cfg.cursor + and self._last_sent_text.endswith(self.cfg.cursor) + ): + clean_text = self._last_sent_text[:-len(self.cfg.cursor)] + try: + result = await self.adapter.edit_message( + chat_id=self.chat_id, + message_id=self._message_id, + content=clean_text, + ) + if result.success: + self._last_sent_text = clean_text + except Exception: + pass self._already_sent = True self._final_response_sent = True return diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index 3063196f41..0a0e0631db 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -1216,3 +1216,87 @@ class TestBufferOnlyMode: # text, the consumer may send then edit, or just send once at got_done. # The key assertion: this doesn't break. assert adapter.send.call_count >= 1 + + +# ── Cursor stripping on fallback (#7183) ──────────────────────────────────── + + +class TestCursorStrippingOnFallback: + """Regression: cursor must be stripped when fallback continuation is empty (#7183). + + When _send_fallback_final is called with nothing new to deliver (the visible + partial already matches final_text), the last edit may still show the cursor + character because fallback mode was entered after a failed edit. Before the + fix this would leave the message permanently frozen with a visible ▉. + """ + + @pytest.mark.asyncio + async def test_cursor_stripped_when_continuation_empty(self): + """_send_fallback_final must attempt a final edit to strip the cursor.""" + adapter = MagicMock() + adapter.MAX_MESSAGE_LENGTH = 4096 + adapter.edit_message = AsyncMock( + return_value=SimpleNamespace(success=True, message_id="msg-1") + ) + + consumer = GatewayStreamConsumer( + adapter, "chat-1", + config=StreamConsumerConfig(cursor=" ▉"), + ) + consumer._message_id = "msg-1" + consumer._last_sent_text = "Hello world ▉" + consumer._fallback_final_send = False + + await consumer._send_fallback_final("Hello world") + + adapter.edit_message.assert_called_once() + call_args = adapter.edit_message.call_args + assert call_args.kwargs["content"] == "Hello world" + assert consumer._already_sent is True + # _last_sent_text should reflect the cleaned text after a successful strip + assert consumer._last_sent_text == "Hello world" + + @pytest.mark.asyncio + async def test_cursor_not_stripped_when_no_cursor_configured(self): + """No edit attempted when cursor is not configured.""" + adapter = MagicMock() + adapter.MAX_MESSAGE_LENGTH = 4096 + adapter.edit_message = AsyncMock() + + consumer = GatewayStreamConsumer( + adapter, "chat-1", + config=StreamConsumerConfig(cursor=""), + ) + consumer._message_id = "msg-1" + consumer._last_sent_text = "Hello world" + consumer._fallback_final_send = False + + await consumer._send_fallback_final("Hello world") + + adapter.edit_message.assert_not_called() + assert consumer._already_sent is True + + @pytest.mark.asyncio + async def test_cursor_strip_edit_failure_handled(self): + """If the cursor-stripping edit itself fails, it must not crash and + must not corrupt _last_sent_text.""" + adapter = MagicMock() + adapter.MAX_MESSAGE_LENGTH = 4096 + adapter.edit_message = AsyncMock( + return_value=SimpleNamespace(success=False, error="flood_control") + ) + + consumer = GatewayStreamConsumer( + adapter, "chat-1", + config=StreamConsumerConfig(cursor=" ▉"), + ) + consumer._message_id = "msg-1" + consumer._last_sent_text = "Hello ▉" + consumer._fallback_final_send = False + + await consumer._send_fallback_final("Hello") + + # Should still set already_sent despite the cursor-strip edit failure + assert consumer._already_sent is True + # _last_sent_text must NOT be updated when the edit failed + assert consumer._last_sent_text == "Hello ▉" From 588333908c52b9eb372fdd2a411062f14d797094 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Wed, 8 Apr 2026 21:13:28 -0600 Subject: [PATCH 025/455] fix(telegram): warn on docker-only media paths --- gateway/platforms/telegram.py | 9 +++- gateway/run.py | 50 +++++++++++++++++++ hermes_cli/config.py | 6 ++- tests/gateway/test_runner_startup_failures.py | 21 ++++++++ tests/gateway/test_telegram_documents.py | 13 +++++ website/docs/user-guide/configuration.md | 18 ++++++- website/docs/user-guide/messaging/telegram.md | 32 ++++++++++++ 7 files changed, 146 insertions(+), 3 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index f71614054c..d1935c8090 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -1759,7 +1759,14 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(file_path): - return SendResult(success=False, error=f"File not found: {file_path}") + error = f"File not found: {file_path}" + if file_path.startswith(("/workspace/", "/output/")): + error += ( + " (path may only exist inside the Docker sandbox. " + "Bind-mount a host directory and emit the host-visible " + "path in MEDIA: for gateway file delivery.)" + ) + return SendResult(success=False, error=error) display_name = file_name or os.path.basename(file_path) _thread = self._metadata_thread_id(metadata) diff --git a/gateway/run.py b/gateway/run.py index b72e95eb83..d7dcaf1451 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -96,6 +96,10 @@ from hermes_cli.env_loader import load_hermes_dotenv _env_path = _hermes_home / '.env' load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env') + +_DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P.+):(?P/[^:]+?)(?::(?P[^:]+))?$") +_DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"} + # Bridge config.yaml values into the environment so os.getenv() picks them up. # config.yaml is authoritative for terminal settings — overrides .env. _config_path = _hermes_home / 'config.yaml' @@ -585,6 +589,7 @@ class GatewayRunner: def __init__(self, config: Optional[GatewayConfig] = None): self.config = config or load_gateway_config() self.adapters: Dict[Platform, BasePlatformAdapter] = {} + self._warn_if_docker_media_delivery_is_likely_misconfigured() # Load ephemeral config from config.yaml / env vars. # Both are injected at API-call time only and never persisted. @@ -691,6 +696,51 @@ class GatewayRunner: self._background_tasks: set = set() + def _warn_if_docker_media_delivery_is_likely_misconfigured(self) -> None: + """Warn when Docker-backed gateway setups lack an obvious output bind mount. + + MEDIA delivery happens in the gateway process, so paths emitted by the model + must be readable from the host. A plain container-local path like + `/workspace/report.txt` often exists only inside Docker. + """ + if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker": + return + + connected = self.config.get_connected_platforms() + messaging_platforms = [p for p in connected if p not in {Platform.LOCAL, Platform.API_SERVER, Platform.WEBHOOK}] + if not messaging_platforms: + return + + raw_volumes = os.getenv("TERMINAL_DOCKER_VOLUMES", "").strip() + volumes: List[str] = [] + if raw_volumes: + try: + parsed = json.loads(raw_volumes) + if isinstance(parsed, list): + volumes = [str(v) for v in parsed if isinstance(v, str)] + except Exception: + logger.debug("Could not parse TERMINAL_DOCKER_VOLUMES for gateway media warning", exc_info=True) + + has_explicit_output_mount = False + for spec in volumes: + match = _DOCKER_VOLUME_SPEC_RE.match(spec) + if not match: + continue + container_path = match.group("container") + if container_path in _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS: + has_explicit_output_mount = True + break + + if has_explicit_output_mount: + return + + logger.warning( + "Docker backend is enabled for the messaging gateway but no explicit host-visible " + "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. " + "MEDIA file delivery can fail for files that only exist inside the container, such as " + "'/workspace/...'." + ) + # -- Setup skill availability ---------------------------------------- diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 1dedc1710a..786ff622d9 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -403,7 +403,11 @@ DEFAULT_CONFIG = { "container_persistent": True, # Persist filesystem across sessions # Docker volume mounts — share host directories with the container. # Each entry is "host_path:container_path" (standard Docker -v syntax). - # Example: ["/home/user/projects:/workspace/projects", "/data:/data"] + # Example: + # ["/home/user/projects:/workspace/projects", + # "/home/user/.hermes/cache/documents:/output"] + # For gateway MEDIA delivery, write inside Docker to /output/... and emit + # the host-visible path in MEDIA:, not the container path. "docker_volumes": [], # Explicit opt-in: mount the host cwd into /workspace for Docker sessions. # Default off because passing host directories into a sandbox weakens isolation. diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index 977d66fb3b..ddcdd1aaa0 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -107,6 +107,7 @@ async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkey assert state["gateway_state"] == "running" +<<<<<<< HEAD @pytest.mark.asyncio async def test_runner_records_connected_platform_state_on_success(monkeypatch, tmp_path): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) @@ -319,3 +320,23 @@ async def test_start_gateway_replace_clears_marker_on_permission_denied( assert ok is False # Marker must NOT be left behind assert not (tmp_path / ".gateway-takeover.json").exists() + + +def test_runner_warns_when_docker_gateway_lacks_explicit_output_mount(monkeypatch, tmp_path, caplog): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + monkeypatch.setenv("TERMINAL_ENV", "docker") + monkeypatch.setenv("TERMINAL_DOCKER_VOLUMES", '["/etc/localtime:/etc/localtime:ro"]') + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="***") + }, + sessions_dir=tmp_path / "sessions", + ) + + with caplog.at_level("WARNING"): + GatewayRunner(config) + + assert any( + "host-visible output mount" in record.message + for record in caplog.records + ) diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index 86e5cb30fb..2036f46a21 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -483,6 +483,19 @@ class TestSendDocument: assert "not found" in result.error.lower() connected_adapter._bot.send_document.assert_not_called() + @pytest.mark.asyncio + async def test_send_document_workspace_path_has_docker_hint(self, connected_adapter): + """Container-local-looking paths get a more actionable Docker hint.""" + result = await connected_adapter.send_document( + chat_id="12345", + file_path="/workspace/report.txt", + ) + + assert result.success is False + assert "docker sandbox" in result.error.lower() + assert "host-visible path" in result.error.lower() + connected_adapter._bot.send_document.assert_not_called() + @pytest.mark.asyncio async def test_send_document_not_connected(self, adapter): """If bot is None, returns not connected error.""" diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index dbc6b0e47e..f91a25c384 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -257,7 +257,7 @@ terminal: docker_volumes: - "/home/user/projects:/workspace/projects" # Read-write (default) - "/home/user/datasets:/data:ro" # Read-only - - "/home/user/outputs:/outputs" # Agent writes, you read + - "/home/user/.hermes/cache/documents:/output" # Gateway-visible exports ``` This is useful for: @@ -265,6 +265,22 @@ This is useful for: - **Receiving files** from the agent (generated code, reports, exports) - **Shared workspaces** where both you and the agent access the same files +If you use a messaging gateway and want the agent to send generated files via +`MEDIA:/...`, prefer a dedicated host-visible export mount such as +`/home/user/.hermes/cache/documents:/output`. + +- Write files inside Docker to `/output/...` +- Emit the **host path** in `MEDIA:`, for example: + `MEDIA:/home/user/.hermes/cache/documents/report.txt` +- Do **not** emit `/workspace/...` or `/output/...` unless that exact path also + exists for the gateway process on the host + +:::warning +YAML duplicate keys silently override earlier ones. If you already have a +`docker_volumes:` block, merge new mounts into the same list instead of adding +another `docker_volumes:` key later in the file. +::: + Can also be set via environment variable: `TERMINAL_DOCKER_VOLUMES='["/host:/container"]'` (JSON array). ### Docker Credential Forwarding diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index 6dbf9e61df..a92fc8d223 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -112,6 +112,38 @@ hermes gateway The bot should come online within seconds. Send it a message on Telegram to verify. +## Sending Generated Files from Docker-backed Terminals + +If your terminal backend is `docker`, keep in mind that Telegram attachments are +sent by the **gateway process**, not from inside the container. That means the +final `MEDIA:/...` path must be readable on the host where the gateway is +running. + +Common pitfall: + +- the agent writes a file inside Docker to `/workspace/report.txt` +- the model emits `MEDIA:/workspace/report.txt` +- Telegram delivery fails because `/workspace/report.txt` only exists inside the + container, not on the host + +Recommended pattern: + +```yaml +terminal: + backend: docker + docker_volumes: + - "/home/user/.hermes/cache/documents:/output" +``` + +Then: + +- write files inside Docker to `/output/...` +- emit the **host-visible** path in `MEDIA:`, for example: + `MEDIA:/home/user/.hermes/cache/documents/report.txt` + +If you already have a `docker_volumes:` section, add the new mount to the same +list. YAML duplicate keys silently override earlier ones. + ## Webhook Mode By default, Hermes connects to Telegram using **long polling** — the gateway makes outbound requests to Telegram's servers to fetch new updates. This works well for local and always-on deployments. From ff63e2e005ebbbfade9542437713b699624ed254 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 14:08:30 +0530 Subject: [PATCH 026/455] fix: tighten telegram docker-media salvage follow-ups Follow-up on top of the helix4u #6392 cherry-pick: - reuse one helper for actionable Docker-local file-not-found errors across document/image/video/audio local-media send paths - include /outputs/... alongside /output/... in the container-local path hint - soften the gateway startup warning so it does not imply custom host-visible mounts are broken; the warning now targets the specific risky pattern of emitting container-local MEDIA paths without an explicit export mount - add focused regressions for /outputs/... and non-document media hint coverage This keeps the salvage aligned with the actual MEDIA delivery problem on current main while reducing false-positive operator messaging. --- gateway/platforms/telegram.py | 30 ++++++++++++------- gateway/run.py | 14 +++++---- tests/gateway/test_runner_startup_failures.py | 1 - tests/gateway/test_telegram_documents.py | 24 +++++++++++++++ 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index d1935c8090..0b74c4e15f 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -1657,6 +1657,21 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as exc: logger.error("Failed to write update response from callback: %s", exc) + def _missing_media_path_error(self, label: str, path: str) -> str: + """Build an actionable file-not-found error for gateway MEDIA delivery. + + Paths like /workspace/... or /output/... often only exist inside the + Docker sandbox, while the gateway process runs on the host. + """ + error = f"{label} file not found: {path}" + if path.startswith(("/workspace/", "/output/", "/outputs/")): + error += ( + " (path may only exist inside the Docker sandbox. " + "Bind-mount a host directory and emit the host-visible " + "path in MEDIA: for gateway file delivery.)" + ) + return error + async def send_voice( self, chat_id: str, @@ -1673,7 +1688,7 @@ class TelegramAdapter(BasePlatformAdapter): try: import os if not os.path.exists(audio_path): - return SendResult(success=False, error=f"Audio file not found: {audio_path}") + return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path)) with open(audio_path, "rb") as audio_file: # .ogg files -> send as voice (round playable bubble) @@ -1722,7 +1737,7 @@ class TelegramAdapter(BasePlatformAdapter): try: import os if not os.path.exists(image_path): - return SendResult(success=False, error=f"Image file not found: {image_path}") + return SendResult(success=False, error=self._missing_media_path_error("Image", image_path)) _thread = self._metadata_thread_id(metadata) with open(image_path, "rb") as image_file: @@ -1759,14 +1774,7 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(file_path): - error = f"File not found: {file_path}" - if file_path.startswith(("/workspace/", "/output/")): - error += ( - " (path may only exist inside the Docker sandbox. " - "Bind-mount a host directory and emit the host-visible " - "path in MEDIA: for gateway file delivery.)" - ) - return SendResult(success=False, error=error) + return SendResult(success=False, error=self._missing_media_path_error("File", file_path)) display_name = file_name or os.path.basename(file_path) _thread = self._metadata_thread_id(metadata) @@ -1800,7 +1808,7 @@ class TelegramAdapter(BasePlatformAdapter): try: if not os.path.exists(video_path): - return SendResult(success=False, error=f"Video file not found: {video_path}") + return SendResult(success=False, error=self._missing_media_path_error("Video", video_path)) _thread = self._metadata_thread_id(metadata) with open(video_path, "rb") as f: diff --git a/gateway/run.py b/gateway/run.py index d7dcaf1451..37b2723213 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -589,7 +589,7 @@ class GatewayRunner: def __init__(self, config: Optional[GatewayConfig] = None): self.config = config or load_gateway_config() self.adapters: Dict[Platform, BasePlatformAdapter] = {} - self._warn_if_docker_media_delivery_is_likely_misconfigured() + self._warn_if_docker_media_delivery_is_risky() # Load ephemeral config from config.yaml / env vars. # Both are injected at API-call time only and never persisted. @@ -696,12 +696,14 @@ class GatewayRunner: self._background_tasks: set = set() - def _warn_if_docker_media_delivery_is_likely_misconfigured(self) -> None: - """Warn when Docker-backed gateway setups lack an obvious output bind mount. + def _warn_if_docker_media_delivery_is_risky(self) -> None: + """Warn when Docker-backed gateways lack an explicit export mount. MEDIA delivery happens in the gateway process, so paths emitted by the model must be readable from the host. A plain container-local path like - `/workspace/report.txt` often exists only inside Docker. + `/workspace/report.txt` or `/output/report.txt` often exists only inside + Docker, so users commonly need a dedicated export mount such as + `host-dir:/output`. """ if os.getenv("TERMINAL_ENV", "").strip().lower() != "docker": return @@ -737,8 +739,8 @@ class GatewayRunner: logger.warning( "Docker backend is enabled for the messaging gateway but no explicit host-visible " "output mount (for example '/home/user/.hermes/cache/documents:/output') is configured. " - "MEDIA file delivery can fail for files that only exist inside the container, such as " - "'/workspace/...'." + "This is fine if the model already emits host-visible paths, but MEDIA file delivery can fail " + "for container-local paths like '/workspace/...' or '/output/...'." ) diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index ddcdd1aaa0..96d5d4627b 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -107,7 +107,6 @@ async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkey assert state["gateway_state"] == "running" -<<<<<<< HEAD @pytest.mark.asyncio async def test_runner_records_connected_platform_state_on_success(monkeypatch, tmp_path): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) diff --git a/tests/gateway/test_telegram_documents.py b/tests/gateway/test_telegram_documents.py index 2036f46a21..3a68139fa9 100644 --- a/tests/gateway/test_telegram_documents.py +++ b/tests/gateway/test_telegram_documents.py @@ -496,6 +496,19 @@ class TestSendDocument: assert "host-visible path" in result.error.lower() connected_adapter._bot.send_document.assert_not_called() + @pytest.mark.asyncio + async def test_send_document_outputs_path_has_docker_hint(self, connected_adapter): + """Legacy /outputs paths also get the Docker hint.""" + result = await connected_adapter.send_document( + chat_id="12345", + file_path="/outputs/report.txt", + ) + + assert result.success is False + assert "docker sandbox" in result.error.lower() + assert "host-visible path" in result.error.lower() + connected_adapter._bot.send_document.assert_not_called() + @pytest.mark.asyncio async def test_send_document_not_connected(self, adapter): """If bot is None, returns not connected error.""" @@ -678,6 +691,17 @@ class TestSendVideo: assert result.success is False assert "not found" in result.error.lower() + @pytest.mark.asyncio + async def test_send_video_workspace_path_has_docker_hint(self, connected_adapter): + result = await connected_adapter.send_video( + chat_id="12345", + video_path="/workspace/video.mp4", + ) + + assert result.success is False + assert "docker sandbox" in result.error.lower() + assert "host-visible path" in result.error.lower() + @pytest.mark.asyncio async def test_send_video_not_connected(self, adapter): result = await adapter.send_video( From b05d30418d1acce913a1b9a768a3330cf63d8341 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Tue, 14 Apr 2026 00:09:43 -0600 Subject: [PATCH 027/455] docs: clarify profiles vs workspaces --- website/docs/reference/profile-commands.md | 4 +++ website/docs/user-guide/profiles.md | 42 +++++++++++++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/website/docs/reference/profile-commands.md b/website/docs/reference/profile-commands.md index 8c8feafb51..e4f28e8346 100644 --- a/website/docs/reference/profile-commands.md +++ b/website/docs/reference/profile-commands.md @@ -81,6 +81,8 @@ Creates a new profile. | `--clone-from ` | Clone from a specific profile instead of the current one. Used with `--clone` or `--clone-all`. | | `--no-alias` | Skip wrapper script creation. | +Creating a profile does **not** make that profile directory the default project/workspace directory for terminal commands. If you want a profile to start in a specific project, set `terminal.cwd` in that profile's `config.yaml`. + **Examples:** ```bash @@ -129,6 +131,8 @@ hermes profile show Displays details about a profile including its home directory, configured model, gateway status, skills count, and configuration file status. +This shows the profile's Hermes home directory, not the terminal working directory. Terminal commands start from `terminal.cwd` (or the launch directory on the local backend when `cwd: "."`). + | Argument | Description | |----------|-------------| | `` | Profile to inspect. | diff --git a/website/docs/user-guide/profiles.md b/website/docs/user-guide/profiles.md index 67609564f7..aef4d10b21 100644 --- a/website/docs/user-guide/profiles.md +++ b/website/docs/user-guide/profiles.md @@ -4,11 +4,11 @@ sidebar_position: 2 # Profiles: Running Multiple Agents -Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway. +Run multiple independent Hermes agents on the same machine — each with its own config, API keys, memory, sessions, skills, and gateway state. ## What are profiles? -A profile is a fully isolated Hermes environment. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without any cross-contamination. +A profile is a separate Hermes home directory. Each profile gets its own directory containing its own `config.yaml`, `.env`, `SOUL.md`, memories, sessions, skills, cron jobs, and state database. Profiles let you run separate agents for different purposes — a coding assistant, a personal bot, a research agent — without mixing up Hermes state. When you create a profile, it automatically becomes its own command. Create a profile called `coder` and you immediately have `coder chat`, `coder setup`, `coder gateway start`, etc. @@ -20,7 +20,7 @@ coder setup # configure API keys and model coder chat # start chatting ``` -That's it. `coder` is now a fully independent agent. It has its own config, its own memory, its own everything. +That's it. `coder` is now its own Hermes profile with its own config, memory, and state. ## Creating a profile @@ -104,6 +104,32 @@ The CLI always shows which profile is active: - **Banner**: Shows `Profile: coder` on startup - **`hermes profile`**: Shows current profile name, path, model, gateway status +## Profiles vs workspaces vs sandboxing + +Profiles are often confused with workspaces or sandboxes, but they are different things: + +- A **profile** gives Hermes its own state directory: `config.yaml`, `.env`, `SOUL.md`, sessions, memory, logs, cron jobs, and gateway state. +- A **workspace** or **working directory** is where terminal commands start. That is controlled separately by `terminal.cwd`. +- A **sandbox** is what limits filesystem access. Profiles do **not** sandbox the agent. + +On the default `local` terminal backend, the agent still has the same filesystem access as your user account. A profile does not stop it from accessing folders outside the profile directory. + +If you want a profile to start in a specific project folder, set an explicit absolute `terminal.cwd` in that profile's `config.yaml`: + +```yaml +terminal: + backend: local + cwd: /absolute/path/to/project +``` + +Using `cwd: "."` on the local backend means "the directory Hermes was launched from", not "the profile directory". + +Also note: + +- `SOUL.md` can guide the model, but it does not enforce a workspace boundary. +- Changes to `SOUL.md` take effect cleanly on a new session. Existing sessions may still be using the old prompt state. +- Asking the model "what directory are you in?" is not a reliable isolation test. If you need a predictable starting directory for tools, set `terminal.cwd` explicitly. + ## Running gateways Each profile runs its own gateway as a separate process with its own bot token: @@ -151,6 +177,12 @@ coder config set model.model anthropic/claude-sonnet-4 echo "You are a focused coding assistant." > ~/.hermes/profiles/coder/SOUL.md ``` +If you want this profile to work in a specific project by default, also set its own `terminal.cwd`: + +```bash +coder config set terminal.cwd /absolute/path/to/project +``` + ## Updating `hermes update` pulls code once (shared) and syncs new bundled skills to **all** profiles automatically: @@ -201,6 +233,8 @@ Add the line to your `~/.bashrc` or `~/.zshrc` for persistent completion. Comple ## How it works -Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, everything automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs. +Profiles use the `HERMES_HOME` environment variable. When you run `coder chat`, the wrapper script sets `HERMES_HOME=~/.hermes/profiles/coder` before launching hermes. Since 119+ files in the codebase resolve paths via `get_hermes_home()`, Hermes state automatically scopes to the profile's directory — config, sessions, memory, skills, state database, gateway PID, logs, and cron jobs. + +This is separate from terminal working directory. Tool execution starts from `terminal.cwd` (or the launch directory when `cwd: "."` on the local backend), not automatically from `HERMES_HOME`. The default profile is simply `~/.hermes` itself. No migration needed — existing installs work identically. From 150382e8b79018f0967724ee10403409fdec0060 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:21:55 -0600 Subject: [PATCH 028/455] fix(gateway): stop typing loops on session interrupt --- gateway/platforms/base.py | 38 ++++- gateway/run.py | 147 +++++++++++++++-- tests/gateway/test_pending_event_none.py | 32 +++- tests/gateway/test_run_progress_topics.py | 186 ++++++++++++++++++++++ tests/gateway/test_session_race_guard.py | 24 ++- tests/gateway/test_status_command.py | 47 ++++++ 6 files changed, 456 insertions(+), 18 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 645a642ba1..1f26ed854e 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1401,7 +1401,13 @@ class BasePlatformAdapter(ABC): return paths, cleaned - async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None: + async def _keep_typing( + self, + chat_id: str, + interval: float = 2.0, + metadata=None, + stop_event: asyncio.Event | None = None, + ) -> None: """ Continuously send typing indicator until cancelled. @@ -1415,9 +1421,18 @@ class BasePlatformAdapter(ABC): """ try: while True: + if stop_event is not None and stop_event.is_set(): + return if chat_id not in self._typing_paused: await self.send_typing(chat_id, metadata=metadata) - await asyncio.sleep(interval) + if stop_event is None: + await asyncio.sleep(interval) + continue + try: + await asyncio.wait_for(stop_event.wait(), timeout=interval) + except asyncio.TimeoutError: + continue + return except asyncio.CancelledError: pass # Normal cancellation when handler completes finally: @@ -1444,6 +1459,17 @@ class BasePlatformAdapter(ABC): """Resume typing indicator for a chat after approval resolves.""" self._typing_paused.discard(chat_id) + async def interrupt_session_activity(self, session_key: str, chat_id: str) -> None: + """Signal the active session loop to stop and clear typing immediately.""" + if session_key: + interrupt_event = self._active_sessions.get(session_key) + if interrupt_event is not None: + interrupt_event.set() + try: + await self.stop_typing(chat_id) + except Exception: + pass + # ── Processing lifecycle hooks ────────────────────────────────────────── # Subclasses override these to react to message processing events # (e.g. Discord adds 👀/✅/❌ reactions). @@ -1717,7 +1743,13 @@ class BasePlatformAdapter(ABC): # Start continuous typing indicator (refreshes every 2 seconds) _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None - typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata)) + typing_task = asyncio.create_task( + self._keep_typing( + event.source.chat_id, + metadata=_thread_metadata, + stop_event=interrupt_event, + ) + ) try: await self._run_processing_hook("on_processing_start", event) diff --git a/gateway/run.py b/gateway/run.py index 37b2723213..ed3b6b5ee3 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -402,6 +402,26 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None: return adapter.get_pending_message(session_key) +_CONTROL_INTERRUPT_MESSAGES = frozenset( + { + "stop requested", + "session reset requested", + "execution timed out (inactivity)", + "sse client disconnected", + "gateway shutting down", + "gateway restarting", + } +) + + +def _is_control_interrupt_message(message: Optional[str]) -> bool: + """Return True when an interrupt message is internal control flow.""" + if not message: + return False + normalized = " ".join(str(message).strip().split()).lower() + return normalized in _CONTROL_INTERRUPT_MESSAGES + + def _check_unavailable_skill(command_name: str) -> str | None: """Check if a command matches a known-but-inactive skill. @@ -630,6 +650,7 @@ class GatewayRunner: self._running_agents_ts: Dict[str, float] = {} # start timestamp per session self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt self._busy_ack_ts: Dict[str, float] = {} # last busy-ack timestamp per session (debounce) + self._session_run_generation: Dict[str, int] = {} # Cache AIAgent instances per session to preserve prompt caching. # Without this, a new AIAgent is created per message, rebuilding the @@ -3064,6 +3085,10 @@ class GatewayRunner: _quick_key[:30], _stale_age, _stale_idle, _raw_stale_timeout, _stale_detail, ) + self._invalidate_session_run_generation( + _quick_key, + reason="stale_running_agent_eviction", + ) self._release_running_agent_state(_quick_key) if _quick_key in self._running_agents: @@ -3091,7 +3116,13 @@ class GatewayRunner: if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: running_agent.interrupt("Stop requested") # Force-clean: remove the session lock regardless of agent state + self._invalidate_session_run_generation( + _quick_key, + reason="stop_command", + ) adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(_quick_key, source.chat_id) if adapter and hasattr(adapter, 'get_pending_message'): adapter.get_pending_message(_quick_key) # consume and discard self._pending_messages.pop(_quick_key, None) @@ -3111,7 +3142,13 @@ class GatewayRunner: if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: running_agent.interrupt("Session reset requested") # Clear any pending messages so the old text doesn't replay + self._invalidate_session_run_generation( + _quick_key, + reason="new_command", + ) adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(_quick_key, source.chat_id) if adapter and hasattr(adapter, 'get_pending_message'): adapter.get_pending_message(_quick_key) # consume and discard self._pending_messages.pop(_quick_key, None) @@ -3598,9 +3635,10 @@ class GatewayRunner: # same session — corrupting the transcript. self._running_agents[_quick_key] = _AGENT_PENDING_SENTINEL self._running_agents_ts[_quick_key] = time.time() + _run_generation = self._begin_session_run_generation(_quick_key) try: - return await self._handle_message_with_agent(event, source, _quick_key) + return await self._handle_message_with_agent(event, source, _quick_key, _run_generation) finally: # If _run_agent replaced the sentinel with a real agent and # then cleaned it up, this is a no-op. If we exited early @@ -3771,7 +3809,7 @@ class GatewayRunner: return message_text - async def _handle_message_with_agent(self, event, source, _quick_key: str): + async def _handle_message_with_agent(self, event, source, _quick_key: str, run_generation: int): """Inner handler that runs under the _running_agents sentinel guard.""" _msg_start_time = time.time() _platform_name = source.platform.value if hasattr(source.platform, "value") else str(source.platform) @@ -4246,6 +4284,7 @@ class GatewayRunner: source=source, session_id=session_entry.session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event.message_id, channel_prompt=event.channel_prompt, ) @@ -4258,6 +4297,17 @@ class GatewayRunner: except Exception: pass + if not self._is_session_run_current(_quick_key, run_generation): + logger.info( + "Discarding stale agent result for %s — generation %d is no longer current", + _quick_key[:20] if _quick_key else "?", + run_generation, + ) + _stale_adapter = self.adapters.get(source.platform) + if _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"): + _stale_adapter._post_delivery_callbacks.pop(_quick_key, None) + return None + response = agent_result.get("final_response") or "" # Convert the agent's internal "(empty)" sentinel into a @@ -4672,6 +4722,7 @@ class GatewayRunner: # Get existing session key session_key = self._session_key_for_source(source) + self._invalidate_session_run_generation(session_key, reason="session_reset") # Flush memories in the background (fire-and-forget) so the user # gets the "Session reset!" response immediately. @@ -4931,6 +4982,10 @@ class GatewayRunner: agent = self._running_agents.get(session_key) if agent is _AGENT_PENDING_SENTINEL: # Force-clean the sentinel so the session is unlocked. + self._invalidate_session_run_generation(session_key, reason="stop_command_pending") + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(session_key, source.chat_id) self._release_running_agent_state(session_key) logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20]) return "⚡ Stopped. The agent hadn't started yet — you can continue this session." @@ -4938,6 +4993,10 @@ class GatewayRunner: agent.interrupt("Stop requested") # Force-clean the session lock so a truly hung agent doesn't # keep it locked forever. + self._invalidate_session_run_generation(session_key, reason="stop_command_handler") + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(session_key, source.chat_id) self._release_running_agent_state(session_key) return "⚡ Stopped. You can continue this session." else: @@ -8385,6 +8444,43 @@ class GatewayRunner: if hasattr(self, "_busy_ack_ts"): self._busy_ack_ts.pop(session_key, None) + def _begin_session_run_generation(self, session_key: str) -> int: + """Claim a fresh run generation token for ``session_key``. + + Every top-level gateway turn gets a monotonically increasing token. + If a later command like /stop or /new invalidates that token while the + old worker is still unwinding, the late result can be recognized and + dropped instead of bleeding into the fresh session. + """ + if not session_key: + return 0 + generations = self.__dict__.get("_session_run_generation") + if generations is None: + generations = {} + self._session_run_generation = generations + next_generation = int(generations.get(session_key, 0)) + 1 + generations[session_key] = next_generation + return next_generation + + def _invalidate_session_run_generation(self, session_key: str, *, reason: str = "") -> int: + """Invalidate any in-flight run token for ``session_key``.""" + generation = self._begin_session_run_generation(session_key) + if reason: + logger.info( + "Invalidated run generation for %s → %d (%s)", + session_key[:20], + generation, + reason, + ) + return generation + + def _is_session_run_current(self, session_key: str, generation: int) -> bool: + """Return True when ``generation`` is still current for ``session_key``.""" + if not session_key: + return True + generations = self.__dict__.get("_session_run_generation") or {} + return int(generations.get(session_key, 0)) == int(generation) + def _evict_cached_agent(self, session_key: str) -> None: """Remove a cached agent for a session (called on /new, /model, etc).""" _lock = getattr(self, "_agent_cache_lock", None) @@ -8807,6 +8903,7 @@ class GatewayRunner: source: SessionSource, session_id: str, session_key: str = None, + run_generation: Optional[int] = None, _interrupt_depth: int = 0, event_message_id: Optional[str] = None, channel_prompt: Optional[str] = None, @@ -8837,6 +8934,11 @@ class GatewayRunner: from run_agent import AIAgent import queue + + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) user_config = _load_gateway_config() platform_key = _platform_config_key(source.platform) @@ -8891,7 +8993,7 @@ class GatewayRunner: def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs): """Callback invoked by agent on tool lifecycle events.""" - if not progress_queue: + if not progress_queue or not _run_still_current(): return # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.) @@ -8996,6 +9098,14 @@ class GatewayRunner: while True: try: + if not _run_still_current(): + while not progress_queue.empty(): + try: + progress_queue.get_nowait() + except Exception: + break + return + raw = progress_queue.get_nowait() # Handle dedup messages: update last line with repeat counter @@ -9021,6 +9131,9 @@ class GatewayRunner: await asyncio.sleep(_remaining) continue + if not _run_still_current(): + return + if can_edit and progress_msg_id is not None: # Try to edit the existing progress message full_text = "\n".join(progress_lines) @@ -9056,7 +9169,8 @@ class GatewayRunner: # Restore typing indicator await asyncio.sleep(0.3) - await adapter.send_typing(source.chat_id, metadata=_progress_metadata) + if _run_still_current(): + await adapter.send_typing(source.chat_id, metadata=_progress_metadata) except queue.Empty: await asyncio.sleep(0.3) @@ -9100,6 +9214,8 @@ class GatewayRunner: _hooks_ref = self.hooks def _step_callback_sync(iteration: int, prev_tools: list) -> None: + if not _run_still_current(): + return try: # prev_tools may be list[str] or list[dict] with "name"/"result" # keys. Normalise to keep "tool_names" backward-compatible for @@ -9130,7 +9246,7 @@ class GatewayRunner: _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None def _status_callback_sync(event_type: str, message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -9261,12 +9377,16 @@ class GatewayRunner: metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None, ) if _want_stream_deltas: - _stream_delta_cb = _stream_consumer.on_delta + def _stream_delta_cb(text: str) -> None: + if _run_still_current(): + _stream_consumer.on_delta(text) stream_consumer_holder[0] = _stream_consumer except Exception as _sc_err: logger.debug("Could not set up stream consumer: %s", _sc_err) def _interim_assistant_cb(text: str, *, already_streamed: bool = False) -> None: + if not _run_still_current(): + return if _stream_consumer is not None: if already_streamed: _stream_consumer.on_segment_break() @@ -9370,7 +9490,7 @@ class GatewayRunner: _bg_review_pending_lock = threading.Lock() def _deliver_bg_review_message(message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return try: asyncio.run_coroutine_threadsafe( @@ -9394,7 +9514,7 @@ class GatewayRunner: # Background review delivery — send "💾 Memory updated" etc. to user def _bg_review_send(message: str) -> None: - if not _status_adapter: + if not _status_adapter or not _run_still_current(): return if not _bg_review_release.is_set(): with _bg_review_pending_lock: @@ -10076,7 +10196,15 @@ class GatewayRunner: if result and adapter and session_key: pending_event = _dequeue_pending_event(adapter, session_key) if result.get("interrupted") and not pending_event and result.get("interrupt_message"): - pending = result.get("interrupt_message") + interrupt_message = result.get("interrupt_message") + if _is_control_interrupt_message(interrupt_message): + logger.info( + "Ignoring control interrupt message for session %s: %s", + session_key[:20] if session_key else "?", + interrupt_message, + ) + else: + pending = interrupt_message elif pending_event: pending = pending_event.text or _build_media_placeholder(pending_event) logger.debug("Processing queued message after agent completion: '%s...'", pending[:40]) @@ -10229,6 +10357,7 @@ class GatewayRunner: source=next_source, session_id=session_id, session_key=session_key, + run_generation=run_generation, _interrupt_depth=_interrupt_depth + 1, event_message_id=next_message_id, channel_prompt=next_channel_prompt, diff --git a/tests/gateway/test_pending_event_none.py b/tests/gateway/test_pending_event_none.py index b2e1356fa1..e717c88296 100644 --- a/tests/gateway/test_pending_event_none.py +++ b/tests/gateway/test_pending_event_none.py @@ -1,13 +1,18 @@ -"""Tests for the pending_event None guard in recursive _run_agent calls. +"""Tests for pending follow-up extraction in recursive _run_agent calls. When pending_event is None (Path B: pending comes from interrupt_message), accessing pending_event.channel_prompt previously raised AttributeError. This verifies the fix: channel_prompt is captured inside the `if pending_event is not None:` block and falls back to None otherwise. + +Also verifies that internal control interrupt reasons like "Stop requested" +do not get recycled into the pending-user-message follow-up path. """ from types import SimpleNamespace +from gateway.run import _is_control_interrupt_message + def _extract_channel_prompt(pending_event): """Reproduce the fixed logic from gateway/run.py. @@ -21,6 +26,15 @@ def _extract_channel_prompt(pending_event): return next_channel_prompt +def _extract_pending_text(interrupted, pending_event, interrupt_message): + """Reproduce the fixed pending-text selection from gateway/run.py.""" + if interrupted and pending_event is None and interrupt_message: + if _is_control_interrupt_message(interrupt_message): + return None + return interrupt_message + return None + + class TestPendingEventNoneChannelPrompt: """Guard against AttributeError when pending_event is None.""" @@ -40,3 +54,19 @@ class TestPendingEventNoneChannelPrompt: event = SimpleNamespace() result = _extract_channel_prompt(event) assert result is None + + +class TestControlInterruptMessages: + """Control interrupt reasons must not become follow-up user input.""" + + def test_stop_requested_is_not_treated_as_pending_user_message(self): + result = _extract_pending_text(True, None, "Stop requested") + assert result is None + + def test_session_reset_requested_is_not_treated_as_pending_user_message(self): + result = _extract_pending_text(True, None, "Session reset requested") + assert result is None + + def test_real_user_interrupt_message_still_requeues(self): + result = _extract_pending_text(True, None, "actually use postgres instead") + assert result == "actually use postgres instead" diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py index 4878f2faec..59e9fa0408 100644 --- a/tests/gateway/test_run_progress_topics.py +++ b/tests/gateway/test_run_progress_topics.py @@ -51,6 +51,9 @@ class ProgressCaptureAdapter(BasePlatformAdapter): async def send_typing(self, chat_id, metadata=None) -> None: self.typing.append({"chat_id": chat_id, "metadata": metadata}) + async def stop_typing(self, chat_id) -> None: + self.typing.append({"chat_id": chat_id, "metadata": {"stopped": True}}) + async def get_chat_info(self, chat_id: str): return {"id": chat_id} @@ -90,6 +93,40 @@ class LongPreviewAgent: } +class DelayedProgressAgent: + def __init__(self, **kwargs): + self.tool_progress_callback = kwargs.get("tool_progress_callback") + self.tools = [] + + def run_conversation(self, message, conversation_history=None, task_id=None): + self.tool_progress_callback("tool.started", "terminal", "first command", {}) + time.sleep(0.45) + self.tool_progress_callback("tool.started", "terminal", "second command", {}) + time.sleep(0.1) + return { + "final_response": "done", + "messages": [], + "api_calls": 1, + } + + +class DelayedInterimAgent: + def __init__(self, **kwargs): + self.interim_assistant_callback = kwargs.get("interim_assistant_callback") + self.tools = [] + + def run_conversation(self, message, conversation_history=None, task_id=None): + self.interim_assistant_callback("first interim") + time.sleep(0.45) + self.interim_assistant_callback("second interim") + time.sleep(0.1) + return { + "final_response": "done", + "messages": [], + "api_calls": 1, + } + + def _make_runner(adapter): gateway_run = importlib.import_module("gateway.run") GatewayRunner = gateway_run.GatewayRunner @@ -104,6 +141,7 @@ def _make_runner(adapter): runner._fallback_model = None runner._session_db = None runner._running_agents = {} + runner._session_run_generation = {} runner.hooks = SimpleNamespace(loaded_hooks=False) runner.config = SimpleNamespace( thread_sessions_per_user=False, @@ -744,6 +782,154 @@ async def test_base_processing_releases_post_delivery_callback_after_main_send() assert released == [True] +@pytest.mark.asyncio +async def test_run_agent_drops_tool_progress_after_generation_invalidation(monkeypatch, tmp_path): + import yaml + + (tmp_path / "config.yaml").write_text( + yaml.dump({"display": {"tool_progress": "all"}}), + encoding="utf-8", + ) + + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv) + + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = DelayedProgressAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + import tools.terminal_tool # noqa: F401 - register terminal tool metadata + + adapter = ProgressCaptureAdapter(platform=Platform.DISCORD) + runner = _make_runner(adapter) + gateway_run = importlib.import_module("gateway.run") + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + + source = SessionSource( + platform=Platform.DISCORD, + chat_id="dm-1", + chat_type="dm", + thread_id=None, + ) + session_key = "agent:main:discord:dm:dm-1" + runner._session_run_generation[session_key] = 1 + + original_send = adapter.send + invalidated = {"done": False} + + async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None): + result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata) + if "first command" in content and not invalidated["done"]: + invalidated["done"] = True + runner._invalidate_session_run_generation(session_key, reason="test_stop") + return result + + adapter.send = send_and_invalidate + + result = await runner._run_agent( + message="hello", + context_prompt="", + history=[], + source=source, + session_id="sess-progress-stop", + session_key=session_key, + run_generation=1, + ) + + all_progress_text = " ".join(call["content"] for call in adapter.sent) + all_progress_text += " ".join(call["content"] for call in adapter.edits) + assert result["final_response"] == "done" + assert 'first command' in all_progress_text + assert 'second command' not in all_progress_text + + +@pytest.mark.asyncio +async def test_run_agent_drops_interim_commentary_after_generation_invalidation(monkeypatch, tmp_path): + import yaml + + (tmp_path / "config.yaml").write_text( + yaml.dump({"display": {"tool_progress": "off", "interim_assistant_messages": True}}), + encoding="utf-8", + ) + + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv) + + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = DelayedInterimAgent + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + adapter = ProgressCaptureAdapter(platform=Platform.DISCORD) + runner = _make_runner(adapter) + gateway_run = importlib.import_module("gateway.run") + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + + source = SessionSource( + platform=Platform.DISCORD, + chat_id="dm-2", + chat_type="dm", + thread_id=None, + ) + session_key = "agent:main:discord:dm:dm-2" + runner._session_run_generation[session_key] = 1 + + original_send = adapter.send + invalidated = {"done": False} + + async def send_and_invalidate(chat_id, content, reply_to=None, metadata=None): + result = await original_send(chat_id, content, reply_to=reply_to, metadata=metadata) + if content == "first interim" and not invalidated["done"]: + invalidated["done"] = True + runner._invalidate_session_run_generation(session_key, reason="test_stop") + return result + + adapter.send = send_and_invalidate + + result = await runner._run_agent( + message="hello", + context_prompt="", + history=[], + source=source, + session_id="sess-commentary-stop", + session_key=session_key, + run_generation=1, + ) + + sent_texts = [call["content"] for call in adapter.sent] + assert result["final_response"] == "done" + assert "first interim" in sent_texts + assert "second interim" not in sent_texts + + +@pytest.mark.asyncio +async def test_keep_typing_stops_immediately_when_interrupt_event_is_set(): + adapter = ProgressCaptureAdapter(platform=Platform.DISCORD) + stop_event = asyncio.Event() + + task = asyncio.create_task( + adapter._keep_typing( + "dm-typing-stop", + interval=30.0, + stop_event=stop_event, + ) + ) + await asyncio.sleep(0.05) + stop_event.set() + await asyncio.wait_for(task, timeout=0.5) + + normal_typing_calls = [ + call for call in adapter.typing if call.get("metadata") != {"stopped": True} + ] + stopped_calls = [ + call for call in adapter.typing if call.get("metadata") == {"stopped": True} + ] + assert len(normal_typing_calls) == 1 + assert len(stopped_calls) == 1 + + @pytest.mark.asyncio async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path): """Verbose mode with default tool_preview_length (0) should NOT truncate args. diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py index 8c26abec59..fe1ef011a3 100644 --- a/tests/gateway/test_session_race_guard.py +++ b/tests/gateway/test_session_race_guard.py @@ -24,10 +24,18 @@ class _FakeAdapter: def __init__(self): self._pending_messages = {} + self._active_sessions = {} + self.interrupted_sessions = [] async def send(self, chat_id, text, **kwargs): pass + async def interrupt_session_activity(self, session_key, chat_id): + self.interrupted_sessions.append((session_key, chat_id)) + event = self._active_sessions.get(session_key) + if event is not None: + event.set() + def _make_runner(): runner = object.__new__(GatewayRunner) @@ -37,6 +45,7 @@ def _make_runner(): runner.adapters = {Platform.TELEGRAM: _FakeAdapter()} runner._running_agents = {} runner._running_agents_ts = {} + runner._session_run_generation = {} runner._pending_messages = {} runner._pending_approvals = {} runner._voice_mode = {} @@ -81,7 +90,7 @@ async def test_sentinel_placed_before_agent_setup(): # Patch _handle_message_with_agent to capture state at entry sentinel_was_set = False - async def mock_inner(self_inner, ev, src, qk): + async def mock_inner(self_inner, ev, src, qk, generation): nonlocal sentinel_was_set sentinel_was_set = runner._running_agents.get(qk) is _AGENT_PENDING_SENTINEL return "ok" @@ -105,7 +114,7 @@ async def test_sentinel_cleaned_up_after_handler_returns(): event = _make_event() session_key = build_session_key(event.source) - async def mock_inner(self_inner, ev, src, qk): + async def mock_inner(self_inner, ev, src, qk, generation): return "ok" with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner): @@ -127,7 +136,7 @@ async def test_sentinel_cleaned_up_on_exception(): event = _make_event() session_key = build_session_key(event.source) - async def mock_inner(self_inner, ev, src, qk): + async def mock_inner(self_inner, ev, src, qk, generation): raise RuntimeError("boom") with patch.object(GatewayRunner, "_handle_message_with_agent", mock_inner): @@ -154,7 +163,7 @@ async def test_second_message_during_sentinel_queued_not_duplicate(): barrier = asyncio.Event() - async def slow_inner(self_inner, ev, src, qk): + async def slow_inner(self_inner, ev, src, qk, generation): # Simulate slow setup — wait until test tells us to proceed await barrier.wait() return "ok" @@ -333,7 +342,7 @@ async def test_stop_during_sentinel_force_cleans_session(): barrier = asyncio.Event() - async def slow_inner(self_inner, ev, src, qk): + async def slow_inner(self_inner, ev, src, qk, generation): await barrier.wait() return "ok" @@ -381,6 +390,7 @@ async def test_stop_hard_kills_running_agent(): fake_agent = MagicMock() fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0} runner._running_agents[session_key] = fake_agent + runner.adapters[Platform.TELEGRAM]._active_sessions[session_key] = asyncio.Event() # Send /stop stop_event = _make_event(text="/stop") @@ -393,6 +403,10 @@ async def test_stop_hard_kills_running_agent(): assert session_key not in runner._running_agents, ( "/stop must remove the agent from _running_agents so the session is unlocked" ) + assert runner.adapters[Platform.TELEGRAM].interrupted_sessions == [ + (session_key, "12345") + ] + assert runner.adapters[Platform.TELEGRAM]._active_sessions[session_key].is_set() # Must return a confirmation assert result is not None diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py index c4a64f30ab..3cdf637dd9 100644 --- a/tests/gateway/test_status_command.py +++ b/tests/gateway/test_status_command.py @@ -50,6 +50,7 @@ def _make_runner(session_entry: SessionEntry): runner.session_store.rewrite_transcript = MagicMock() runner.session_store.update_session = MagicMock() runner._running_agents = {} + runner._session_run_generation = {} runner._pending_messages = {} runner._pending_approvals = {} runner._session_db = MagicMock() @@ -223,6 +224,52 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch): ) +@pytest.mark.asyncio +async def test_handle_message_discards_stale_result_after_session_invalidation(monkeypatch): + import gateway.run as gateway_run + + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner = _make_runner(session_entry) + runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}] + session_key = session_entry.session_key + runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks = {session_key: object()} + + async def _stale_result(**kwargs): + runner._invalidate_session_run_generation(kwargs["session_key"], reason="test_stale_result") + return { + "final_response": "late reply", + "messages": [], + "tools": [], + "history_offset": 0, + "last_prompt_tokens": 80, + "input_tokens": 120, + "output_tokens": 45, + "model": "openai/test-model", + } + + runner._run_agent = AsyncMock(side_effect=_stale_result) + + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + monkeypatch.setattr( + "agent.model_metadata.get_model_context_length", + lambda *_args, **_kwargs: 100000, + ) + + result = await runner._handle_message(_make_event("hello")) + + assert result is None + runner.session_store.append_to_transcript.assert_not_called() + runner.session_store.update_session.assert_not_called() + assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks + + @pytest.mark.asyncio async def test_status_command_bypasses_active_session_guard(): From 8466268ca58fe1422cadcb6b134b18bc0860a597 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:32:49 -0600 Subject: [PATCH 029/455] fix(gateway): keep typing loop overrides backward-compatible --- gateway/platforms/base.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 1f26ed854e..dc0f22d2a3 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -6,6 +6,7 @@ and implement the required methods. """ import asyncio +import inspect import ipaddress import logging import os @@ -1743,11 +1744,17 @@ class BasePlatformAdapter(ABC): # Start continuous typing indicator (refreshes every 2 seconds) _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None + _keep_typing_kwargs = {"metadata": _thread_metadata} + try: + _keep_typing_sig = inspect.signature(self._keep_typing) + except (TypeError, ValueError): + _keep_typing_sig = None + if _keep_typing_sig is None or "stop_event" in _keep_typing_sig.parameters: + _keep_typing_kwargs["stop_event"] = interrupt_event typing_task = asyncio.create_task( self._keep_typing( event.source.chat_id, - metadata=_thread_metadata, - stop_event=interrupt_event, + **_keep_typing_kwargs, ) ) From 4b6ff0eb7fa287695fa147e7c7622dae4ca5dd51 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:05:14 +0530 Subject: [PATCH 030/455] fix: tighten gateway interrupt salvage follow-ups Follow-up on top of the helix4u #12388 cherry-picks: - make deferred post-delivery callbacks generation-aware end-to-end so stale runs cannot clear callbacks registered by a fresher run for the same session - bind callback ownership to the active session event at run start and snapshot that generation inside base adapter processing so later event mutation cannot retarget cleanup - pass run_generation through proxy mode and drop stale proxy streams / final results the same way local runs are dropped - centralize stop/new interrupt cleanup into one helper and replace the open-coded branches with shared logic - unify internal control interrupt reason strings via shared constants - remove the return from base.py's finally block so cleanup no longer swallows cancellation/exception flow - add focused regressions for generation forwarding, proxy stale suppression, and newer-callback preservation This addresses all review findings from the initial #12388 review while keeping the fix scoped to stale-output/typing-loop interrupt handling. --- gateway/platforms/base.py | 69 ++++++++-- gateway/run.py | 198 ++++++++++++++++++++------- tests/gateway/test_proxy_mode.py | 37 +++++ tests/gateway/test_status_command.py | 69 ++++++++++ 4 files changed, 315 insertions(+), 58 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index dc0f22d2a3..2b8536062c 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -881,10 +881,11 @@ class BasePlatformAdapter(ABC): # working on a task after --replace or manual restarts. self._background_tasks: set[asyncio.Task] = set() # One-shot callbacks to fire after the main response is delivered. - # Keyed by session_key. GatewayRunner uses this to defer - # background-review notifications ("💾 Skill created") until the - # primary reply has been sent. - self._post_delivery_callbacks: Dict[str, Callable] = {} + # Keyed by session_key. Values are either a bare callback (legacy) or + # a ``(generation, callback)`` tuple so GatewayRunner can make deferred + # deliveries generation-aware and avoid stale runs clearing callbacks + # registered by a fresher run for the same session. + self._post_delivery_callbacks: Dict[str, Any] = {} self._expected_cancelled_tasks: set[asyncio.Task] = set() self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None # Chats where auto-TTS on voice input is disabled (set by /voice off) @@ -1471,6 +1472,48 @@ class BasePlatformAdapter(ABC): except Exception: pass + def register_post_delivery_callback( + self, + session_key: str, + callback: Callable, + *, + generation: int | None = None, + ) -> None: + """Register a deferred callback to fire after the main response. + + ``generation`` lets callers tie the callback to a specific gateway run + generation so stale runs cannot clear callbacks owned by a fresher run. + """ + if not session_key or not callable(callback): + return + if generation is None: + self._post_delivery_callbacks[session_key] = callback + else: + self._post_delivery_callbacks[session_key] = (int(generation), callback) + + def pop_post_delivery_callback( + self, + session_key: str, + *, + generation: int | None = None, + ) -> Callable | None: + """Pop a deferred callback, optionally requiring generation ownership.""" + if not session_key: + return None + entry = self._post_delivery_callbacks.get(session_key) + if entry is None: + return None + if isinstance(entry, tuple) and len(entry) == 2: + entry_generation, callback = entry + if generation is not None and int(entry_generation) != int(generation): + return None + self._post_delivery_callbacks.pop(session_key, None) + return callback if callable(callback) else None + if generation is not None: + return None + self._post_delivery_callbacks.pop(session_key, None) + return entry if callable(entry) else None + # ── Processing lifecycle hooks ────────────────────────────────────────── # Subclasses override these to react to message processing events # (e.g. Discord adds 👀/✅/❌ reactions). @@ -1741,6 +1784,7 @@ class BasePlatformAdapter(ABC): # Fall back to a new Event only if the entry was removed externally. interrupt_event = self._active_sessions.get(session_key) or asyncio.Event() self._active_sessions[session_key] = interrupt_event + callback_generation = getattr(interrupt_event, "_hermes_run_generation", None) # Start continuous typing indicator (refreshes every 2 seconds) _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None @@ -2015,7 +2059,14 @@ class BasePlatformAdapter(ABC): finally: # Fire any one-shot post-delivery callback registered for this # session (e.g. deferred background-review notifications). - _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None) + _callback_generation = callback_generation + if hasattr(self, "pop_post_delivery_callback"): + _post_cb = self.pop_post_delivery_callback( + session_key, + generation=_callback_generation, + ) + else: + _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None) if callable(_post_cb): try: _post_cb() @@ -2061,10 +2112,10 @@ class BasePlatformAdapter(ABC): pass # Leave _active_sessions[session_key] populated — the drain # task's own lifecycle will clean it up. - return - # Clean up session tracking - if session_key in self._active_sessions: - del self._active_sessions[session_key] + else: + # Clean up session tracking + if session_key in self._active_sessions: + del self._active_sessions[session_key] async def cancel_background_tasks(self) -> None: """Cancel any in-flight background message-processing tasks. diff --git a/gateway/run.py b/gateway/run.py index ed3b6b5ee3..60c57495b4 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -402,14 +402,21 @@ def _dequeue_pending_event(adapter, session_key: str) -> MessageEvent | None: return adapter.get_pending_message(session_key) +_INTERRUPT_REASON_STOP = "Stop requested" +_INTERRUPT_REASON_RESET = "Session reset requested" +_INTERRUPT_REASON_TIMEOUT = "Execution timed out (inactivity)" +_INTERRUPT_REASON_SSE_DISCONNECT = "SSE client disconnected" +_INTERRUPT_REASON_GATEWAY_SHUTDOWN = "Gateway shutting down" +_INTERRUPT_REASON_GATEWAY_RESTART = "Gateway restarting" + _CONTROL_INTERRUPT_MESSAGES = frozenset( { - "stop requested", - "session reset requested", - "execution timed out (inactivity)", - "sse client disconnected", - "gateway shutting down", - "gateway restarting", + _INTERRUPT_REASON_STOP.lower(), + _INTERRUPT_REASON_RESET.lower(), + _INTERRUPT_REASON_TIMEOUT.lower(), + _INTERRUPT_REASON_SSE_DISCONNECT.lower(), + _INTERRUPT_REASON_GATEWAY_SHUTDOWN.lower(), + _INTERRUPT_REASON_GATEWAY_RESTART.lower(), } ) @@ -2514,7 +2521,7 @@ class GatewayRunner: _sk[:20], _e, ) self._interrupt_running_agents( - "Gateway restarting" if self._restart_requested else "Gateway shutting down" + _INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN ) interrupt_deadline = asyncio.get_running_loop().time() + 5.0 while self._running_agents and asyncio.get_running_loop().time() < interrupt_deadline: @@ -3112,21 +3119,12 @@ class GatewayRunner: # _interrupt_requested. Force-clean _running_agents so the session # is unlocked and subsequent messages are processed normally. if _cmd_def_inner and _cmd_def_inner.name == "stop": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Stop requested") - # Force-clean: remove the session lock regardless of agent state - self._invalidate_session_run_generation( + await self._interrupt_and_clear_session( _quick_key, - reason="stop_command", + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command", ) - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, "interrupt_session_activity"): - await adapter.interrupt_session_activity(_quick_key, source.chat_id) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) - self._release_running_agent_state(_quick_key) logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20]) return "⚡ Stopped. You can continue this session." @@ -3138,23 +3136,15 @@ class GatewayRunner: # doesn't get re-processed as a user message after the # interrupt completes. if _cmd_def_inner and _cmd_def_inner.name == "new": - running_agent = self._running_agents.get(_quick_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - running_agent.interrupt("Session reset requested") # Clear any pending messages so the old text doesn't replay - self._invalidate_session_run_generation( + await self._interrupt_and_clear_session( _quick_key, - reason="new_command", + source, + interrupt_reason=_INTERRUPT_REASON_RESET, + invalidation_reason="new_command", ) - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, "interrupt_session_activity"): - await adapter.interrupt_session_activity(_quick_key, source.chat_id) - if adapter and hasattr(adapter, 'get_pending_message'): - adapter.get_pending_message(_quick_key) # consume and discard - self._pending_messages.pop(_quick_key, None) # Clean up the running agent entry so the reset handler # doesn't think an agent is still active. - self._release_running_agent_state(_quick_key) return await self._handle_reset_command(event) # /queue — queue without interrupting @@ -4266,6 +4256,15 @@ class GatewayRunner: if message_text is None: return + # Bind this gateway run generation to the adapter's active-session + # event so deferred post-delivery callbacks can be released by the + # same run that registered them. + self._bind_adapter_run_generation( + self.adapters.get(source.platform), + session_key, + run_generation, + ) + try: # Emit agent:start hook hook_ctx = { @@ -4304,7 +4303,12 @@ class GatewayRunner: run_generation, ) _stale_adapter = self.adapters.get(source.platform) - if _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"): + if getattr(type(_stale_adapter), "pop_post_delivery_callback", None) is not None: + _stale_adapter.pop_post_delivery_callback( + _quick_key, + generation=run_generation, + ) + elif _stale_adapter and hasattr(_stale_adapter, "_post_delivery_callbacks"): _stale_adapter._post_delivery_callbacks.pop(_quick_key, None) return None @@ -4982,22 +4986,23 @@ class GatewayRunner: agent = self._running_agents.get(session_key) if agent is _AGENT_PENDING_SENTINEL: # Force-clean the sentinel so the session is unlocked. - self._invalidate_session_run_generation(session_key, reason="stop_command_pending") - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, "interrupt_session_activity"): - await adapter.interrupt_session_activity(session_key, source.chat_id) - self._release_running_agent_state(session_key) + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_pending", + ) logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20]) return "⚡ Stopped. The agent hadn't started yet — you can continue this session." if agent: - agent.interrupt("Stop requested") # Force-clean the session lock so a truly hung agent doesn't # keep it locked forever. - self._invalidate_session_run_generation(session_key, reason="stop_command_handler") - adapter = self.adapters.get(source.platform) - if adapter and hasattr(adapter, "interrupt_session_activity"): - await adapter.interrupt_session_activity(session_key, source.chat_id) - self._release_running_agent_state(session_key) + await self._interrupt_and_clear_session( + session_key, + source, + interrupt_reason=_INTERRUPT_REASON_STOP, + invalidation_reason="stop_command_handler", + ) return "⚡ Stopped. You can continue this session." else: return "No active task to stop." @@ -8481,6 +8486,47 @@ class GatewayRunner: generations = self.__dict__.get("_session_run_generation") or {} return int(generations.get(session_key, 0)) == int(generation) + def _bind_adapter_run_generation( + self, + adapter: Any, + session_key: str, + generation: int | None, + ) -> None: + """Bind a gateway run generation to the adapter's active-session event.""" + if not adapter or not session_key or generation is None: + return + try: + interrupt_event = getattr(adapter, "_active_sessions", {}).get(session_key) + if interrupt_event is not None: + setattr(interrupt_event, "_hermes_run_generation", int(generation)) + except Exception: + pass + + async def _interrupt_and_clear_session( + self, + session_key: str, + source: SessionSource, + *, + interrupt_reason: str, + invalidation_reason: str, + release_running_state: bool = True, + ) -> None: + """Interrupt the current run and clear queued session state consistently.""" + if not session_key: + return + running_agent = self._running_agents.get(session_key) + if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: + running_agent.interrupt(interrupt_reason) + self._invalidate_session_run_generation(session_key, reason=invalidation_reason) + adapter = self.adapters.get(source.platform) + if adapter and hasattr(adapter, "interrupt_session_activity"): + await adapter.interrupt_session_activity(session_key, source.chat_id) + if adapter and hasattr(adapter, "get_pending_message"): + adapter.get_pending_message(session_key) # consume and discard + self._pending_messages.pop(session_key, None) + if release_running_state: + self._release_running_agent_state(session_key) + def _evict_cached_agent(self, session_key: str) -> None: """Remove a cached agent for a session (called on /new, /model, etc).""" _lock = getattr(self, "_agent_cache_lock", None) @@ -8662,6 +8708,7 @@ class GatewayRunner: source: "SessionSource", session_id: str, session_key: str = None, + run_generation: Optional[int] = None, event_message_id: Optional[str] = None, ) -> Dict[str, Any]: """Forward the message to a remote Hermes API server instead of @@ -8697,6 +8744,11 @@ class GatewayRunner: proxy_key = os.getenv("GATEWAY_PROXY_KEY", "").strip() + def _run_still_current() -> bool: + if run_generation is None or not session_key: + return True + return self._is_session_run_current(session_key, run_generation) + # Build messages in OpenAI chat format -------------------------- # # The remote api_server can maintain session continuity via @@ -8826,6 +8878,21 @@ class GatewayRunner: # Parse SSE stream buffer = "" async for chunk in resp.content.iter_any(): + if not _run_still_current(): + logger.info( + "Discarding stale proxy stream for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } text = chunk.decode("utf-8", errors="replace") buffer += text @@ -8875,6 +8942,21 @@ class GatewayRunner: stream_task.cancel() _elapsed = time.time() - _start + if not _run_still_current(): + logger.info( + "Discarding stale proxy result for %s — generation %d is no longer current", + session_key[:20] if session_key else "?", + run_generation or 0, + ) + return { + "final_response": "", + "messages": [], + "api_calls": 0, + "tools": [], + "history_offset": len(history), + "session_id": session_id, + "response_previewed": False, + } logger.info( "proxy response: url=%s session=%s time=%.1fs response=%d chars", proxy_url, (session_id or "")[:20], _elapsed, len(full_response), @@ -8929,6 +9011,7 @@ class GatewayRunner: source=source, session_id=session_id, session_key=session_key, + run_generation=run_generation, event_message_id=event_message_id, ) @@ -9527,9 +9610,16 @@ class GatewayRunner: # Register the release hook on the adapter so base.py's finally # block can fire it after delivering the main response. if _status_adapter and session_key: - _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None) - if _pdc is not None: - _pdc[session_key] = _release_bg_review_messages + if getattr(type(_status_adapter), "register_post_delivery_callback", None) is not None: + _status_adapter.register_post_delivery_callback( + session_key, + _release_bg_review_messages, + generation=run_generation, + ) + else: + _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None) + if _pdc is not None: + _pdc[session_key] = _release_bg_review_messages # Store agent reference for interrupt support agent_holder[0] = agent @@ -10131,7 +10221,7 @@ class GatewayRunner: # Interrupt the agent if it's still running so the thread # pool worker is freed. if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"): - _timed_out_agent.interrupt("Execution timed out (inactivity)") + _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT) _timeout_mins = int(_agent_timeout // 60) or 1 @@ -10309,7 +10399,17 @@ class GatewayRunner: # first response has been delivered. Pop from the # adapter's callback dict (prevents double-fire in # base.py's finally block) and call it. - if adapter and hasattr(adapter, "_post_delivery_callbacks"): + if getattr(type(adapter), "pop_post_delivery_callback", None) is not None: + _bg_cb = adapter.pop_post_delivery_callback( + session_key, + generation=run_generation, + ) + if callable(_bg_cb): + try: + _bg_cb() + except Exception: + pass + elif adapter and hasattr(adapter, "_post_delivery_callbacks"): _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None) if callable(_bg_cb): try: diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py index f3024cb09f..11180639e8 100644 --- a/tests/gateway/test_proxy_mode.py +++ b/tests/gateway/test_proxy_mode.py @@ -19,6 +19,7 @@ def _make_runner(proxy_url=None): runner.config = MagicMock() runner.config.streaming = StreamingConfig() runner._running_agents = {} + runner._session_run_generation = {} runner._session_model_overrides = {} runner._agent_cache = {} runner._agent_cache_lock = None @@ -160,10 +161,12 @@ class TestRunAgentProxyDispatch: source=source, session_id="test-session-123", session_key="test-key", + run_generation=7, ) assert result["final_response"] == "Hello from remote!" runner._run_agent_via_proxy.assert_called_once() + assert runner._run_agent_via_proxy.call_args.kwargs["run_generation"] == 7 @pytest.mark.asyncio async def test_run_agent_skips_proxy_when_not_configured(self, monkeypatch): @@ -370,6 +373,40 @@ class TestRunAgentViaProxy: assert "session_id" in result assert result["session_id"] == "sess-123" + @pytest.mark.asyncio + async def test_proxy_stale_generation_returns_empty_result(self, monkeypatch): + monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642") + monkeypatch.delenv("GATEWAY_PROXY_KEY", raising=False) + runner = _make_runner() + source = _make_source() + runner._session_run_generation["test-key"] = 2 + + resp = _FakeSSEResponse( + status=200, + sse_chunks=[ + 'data: {"choices":[{"delta":{"content":"stale"}}]}\n\n', + "data: [DONE]\n\n", + ], + ) + session = _FakeSession(resp) + + with patch("gateway.run._load_gateway_config", return_value={}): + with _patch_aiohttp(session): + with patch("aiohttp.ClientTimeout"): + result = await runner._run_agent_via_proxy( + message="hi", + context_prompt="", + history=[], + source=source, + session_id="sess-123", + session_key="test-key", + run_generation=1, + ) + + assert result["final_response"] == "" + assert result["messages"] == [] + assert result["api_calls"] == 0 + @pytest.mark.asyncio async def test_no_auth_header_without_key(self, monkeypatch): monkeypatch.setenv("GATEWAY_PROXY_URL", "http://host:8642") diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py index 3cdf637dd9..50e1c52cc2 100644 --- a/tests/gateway/test_status_command.py +++ b/tests/gateway/test_status_command.py @@ -270,6 +270,75 @@ async def test_handle_message_discards_stale_result_after_session_invalidation(m assert session_key not in runner.adapters[Platform.TELEGRAM]._post_delivery_callbacks +@pytest.mark.asyncio +async def test_handle_message_stale_result_keeps_newer_generation_callback(monkeypatch): + import gateway.run as gateway_run + + class _Adapter: + def __init__(self): + self._post_delivery_callbacks = {} + + async def send(self, *args, **kwargs): + return None + + def pop_post_delivery_callback(self, session_key, *, generation=None): + entry = self._post_delivery_callbacks.get(session_key) + if entry is None: + return None + if isinstance(entry, tuple): + entry_generation, callback = entry + if generation is not None and entry_generation != generation: + return None + self._post_delivery_callbacks.pop(session_key, None) + return callback + if generation is not None: + return None + return self._post_delivery_callbacks.pop(session_key, None) + + session_entry = SessionEntry( + session_key=build_session_key(_make_source()), + session_id="sess-1", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner = _make_runner(session_entry) + runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}] + session_key = session_entry.session_key + adapter = _Adapter() + runner.adapters[Platform.TELEGRAM] = adapter + + async def _stale_result(**kwargs): + # Simulate a newer run claiming the callback slot before the stale run unwinds. + runner._session_run_generation[session_key] = 2 + adapter._post_delivery_callbacks[session_key] = (2, lambda: None) + return { + "final_response": "late reply", + "messages": [], + "tools": [], + "history_offset": 0, + "last_prompt_tokens": 80, + "input_tokens": 120, + "output_tokens": 45, + "model": "openai/test-model", + } + + runner._run_agent = AsyncMock(side_effect=_stale_result) + + monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"}) + monkeypatch.setattr( + "agent.model_metadata.get_model_context_length", + lambda *_args, **_kwargs: 100000, + ) + + result = await runner._handle_message(_make_event("hello")) + + assert result is None + assert session_key in adapter._post_delivery_callbacks + assert adapter._post_delivery_callbacks[session_key][0] == 2 + + @pytest.mark.asyncio async def test_status_command_bypasses_active_session_guard(): From 4f0e49dc7bd059fada5c6110b7bb14a6fb3b5037 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:34:02 +0530 Subject: [PATCH 031/455] chore: add sgaofen to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index b153140057..9c04c1c6b3 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -77,6 +77,7 @@ AUTHOR_MAP = { "Asunfly@users.noreply.github.com": "Asunfly", "2500400+honghua@users.noreply.github.com": "honghua", "nish3451@users.noreply.github.com": "nish3451", + "135070653+sgaofen@users.noreply.github.com": "sgaofen", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", From cc59d133dc52197a0388f2f3b33911fc15c6c74e Mon Sep 17 00:00:00 2001 From: sgaofen <135070653+sgaofen@users.noreply.github.com> Date: Sun, 12 Apr 2026 15:30:16 -0700 Subject: [PATCH 032/455] fix(feishu): split fenced code blocks in post payload --- gateway/platforms/feishu.py | 64 +++++++++++++++++++++++++++++++----- tests/gateway/test_feishu.py | 63 +++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 8 deletions(-) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 351337e827..6e27d33e09 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -430,23 +430,71 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int: def _build_markdown_post_payload(content: str) -> str: + rows = _build_markdown_post_rows(content) return json.dumps( { "zh_cn": { - "content": [ - [ - { - "tag": "md", - "text": content, - } - ] - ], + "content": rows, } }, ensure_ascii=False, ) +def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: + """Build Feishu post rows while isolating fenced code blocks. + + Feishu's `md` renderer can swallow trailing content when a fenced code block + appears inside one large markdown element. Splitting the reply at code + fences preserves the surrounding markdown while keeping the code block in a + dedicated row. + """ + if not content: + return [[{"tag": "md", "text": ""}]] + if "```" not in content: + return [[{"tag": "md", "text": content}]] + + rows: List[List[Dict[str, str]]] = [] + current: List[str] = [] + in_code_block = False + + for raw_line in content.splitlines(): + line = raw_line.rstrip() + is_fence = line.strip().startswith("```") + + if is_fence: + if not in_code_block and current: + segment = "\n".join(current).strip() + if segment: + rows.append([{"tag": "md", "text": segment}]) + current = [] + current.append(line) + in_code_block = not in_code_block + if not in_code_block: + segment = "\n".join(current).strip() + if segment: + rows.append([{"tag": "md", "text": segment}]) + current = [] + continue + + current.append(line) + + if current: + segment = "\n".join(current).strip() + if segment: + rows.append([{"tag": "md", "text": segment}]) + + return rows or [[{"tag": "md", "text": content}]] + + +def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult: + try: + parsed = json.loads(raw_content) if raw_content else {} + except json.JSONDecodeError: + return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT) + return parse_feishu_post_payload(parsed) + + def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult: resolved = _resolve_post_payload(payload) if not resolved: diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py index 661e37ec1a..47e5a94966 100644 --- a/tests/gateway/test_feishu.py +++ b/tests/gateway/test_feishu.py @@ -2370,6 +2370,69 @@ class TestAdapterBehavior(unittest.TestCase): elements = payload["zh_cn"]["content"][0] self.assertEqual(elements, [{"tag": "md", "text": "可以用 **粗体** 和 *斜体*。"}]) + @patch.dict(os.environ, {}, clear=True) + def test_send_splits_fenced_code_blocks_into_separate_post_rows(self): + from gateway.config import PlatformConfig + from gateway.platforms.feishu import FeishuAdapter + + adapter = FeishuAdapter(PlatformConfig()) + captured = {} + + class _MessageAPI: + def create(self, request): + captured["request"] = request + return SimpleNamespace( + success=lambda: True, + data=SimpleNamespace(message_id="om_codeblock"), + ) + + adapter._client = SimpleNamespace( + im=SimpleNamespace( + v1=SimpleNamespace( + message=_MessageAPI(), + ) + ) + ) + + async def _direct(func, *args, **kwargs): + return func(*args, **kwargs) + + content = ( + "确认已入库 ✓\n" + "文件路径:`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n" + "**解码后的内容:**\n" + "```json\n" + '{"cron": "list"}\n' + "```\n" + "后续说明仍应保留。" + ) + + with patch("gateway.platforms.feishu.asyncio.to_thread", side_effect=_direct): + result = asyncio.run( + adapter.send( + chat_id="oc_chat", + content=content, + ) + ) + + self.assertTrue(result.success) + self.assertEqual(captured["request"].request_body.msg_type, "post") + payload = json.loads(captured["request"].request_body.content) + rows = payload["zh_cn"]["content"] + self.assertEqual( + rows, + [ + [ + { + "tag": "md", + "text": "确认已入库 ✓\n文件路径:`/root/.hermes/profiles/agent_cto/cron/jobs.json`\n**解码后的内容:**", + } + ], + [{"tag": "md", "text": "```json\n{\"cron\": \"list\"}\n```"}], + [{"tag": "md", "text": "后续说明仍应保留。"}], + ], + ) + @patch.dict(os.environ, {}, clear=True) def test_send_falls_back_to_text_when_post_payload_is_rejected(self): from gateway.config import PlatformConfig From a9debf10ffd61e9e502a25b203987335671a805d Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:40:53 +0530 Subject: [PATCH 033/455] fix(feishu): harden fenced post row splitting --- gateway/platforms/feishu.py | 47 ++++++++++++++++++++---------------- tests/gateway/test_feishu.py | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index 6e27d33e09..dc3d799c93 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -119,6 +119,8 @@ _MARKDOWN_HINT_RE = re.compile( re.MULTILINE, ) _MARKDOWN_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)") +_MARKDOWN_FENCE_OPEN_RE = re.compile(r"^```([^\n`]*)\s*$") +_MARKDOWN_FENCE_CLOSE_RE = re.compile(r"^```\s*$") _MENTION_RE = re.compile(r"@_user_\d+") _MULTISPACE_RE = re.compile(r"[ \t]{2,}") _POST_CONTENT_INVALID_RE = re.compile(r"content format of the post type is incorrect", re.IGNORECASE) @@ -445,9 +447,9 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: """Build Feishu post rows while isolating fenced code blocks. Feishu's `md` renderer can swallow trailing content when a fenced code block - appears inside one large markdown element. Splitting the reply at code - fences preserves the surrounding markdown while keeping the code block in a - dedicated row. + appears inside one large markdown element. Split the reply at real fence + lines so prose before/after the code block remains visible while code stays + in a dedicated row. """ if not content: return [[{"tag": "md", "text": ""}]] @@ -458,32 +460,35 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: current: List[str] = [] in_code_block = False + def _flush_current() -> None: + nonlocal current + if not current: + return + segment = "\n".join(current) + if segment.strip(): + rows.append([{"tag": "md", "text": segment}]) + current = [] + for raw_line in content.splitlines(): - line = raw_line.rstrip() - is_fence = line.strip().startswith("```") + stripped_line = raw_line.strip() + is_fence = bool( + _MARKDOWN_FENCE_CLOSE_RE.match(stripped_line) + if in_code_block + else _MARKDOWN_FENCE_OPEN_RE.match(stripped_line) + ) if is_fence: - if not in_code_block and current: - segment = "\n".join(current).strip() - if segment: - rows.append([{"tag": "md", "text": segment}]) - current = [] - current.append(line) + if not in_code_block: + _flush_current() + current.append(raw_line) in_code_block = not in_code_block if not in_code_block: - segment = "\n".join(current).strip() - if segment: - rows.append([{"tag": "md", "text": segment}]) - current = [] + _flush_current() continue - current.append(line) - - if current: - segment = "\n".join(current).strip() - if segment: - rows.append([{"tag": "md", "text": segment}]) + current.append(raw_line) + _flush_current() return rows or [[{"tag": "md", "text": content}]] diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py index 47e5a94966..d5511c064e 100644 --- a/tests/gateway/test_feishu.py +++ b/tests/gateway/test_feishu.py @@ -2433,6 +2433,48 @@ class TestAdapterBehavior(unittest.TestCase): ], ) + @patch.dict(os.environ, {}, clear=True) + def test_build_post_payload_keeps_fence_like_code_lines_inside_code_block(self): + from gateway.config import PlatformConfig + from gateway.platforms.feishu import FeishuAdapter + + adapter = FeishuAdapter(PlatformConfig()) + payload = json.loads( + adapter._build_post_payload( + "before\n```python\n```oops\n```\nafter" + ) + ) + + self.assertEqual( + payload["zh_cn"]["content"], + [ + [{"tag": "md", "text": "before"}], + [{"tag": "md", "text": "```python\n```oops\n```"}], + [{"tag": "md", "text": "after"}], + ], + ) + + @patch.dict(os.environ, {}, clear=True) + def test_build_post_payload_preserves_trailing_spaces_in_code_block(self): + from gateway.config import PlatformConfig + from gateway.platforms.feishu import FeishuAdapter + + adapter = FeishuAdapter(PlatformConfig()) + payload = json.loads( + adapter._build_post_payload( + "before\n```python\nline with two spaces \n```\nafter" + ) + ) + + self.assertEqual( + payload["zh_cn"]["content"], + [ + [{"tag": "md", "text": "before"}], + [{"tag": "md", "text": "```python\nline with two spaces \n```"}], + [{"tag": "md", "text": "after"}], + ], + ) + @patch.dict(os.environ, {}, clear=True) def test_send_falls_back_to_text_when_post_payload_is_rejected(self): from gateway.config import PlatformConfig From 957ca79e8ed2fd1377553d70b9a79232f84b122e Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sun, 19 Apr 2026 15:51:43 +0530 Subject: [PATCH 034/455] fix(feishu): drop dead helper and cover repeated fenced blocks --- gateway/platforms/feishu.py | 8 -------- tests/gateway/test_feishu.py | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index dc3d799c93..3b57db46d3 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -492,14 +492,6 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]: return rows or [[{"tag": "md", "text": content}]] -def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult: - try: - parsed = json.loads(raw_content) if raw_content else {} - except json.JSONDecodeError: - return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT) - return parse_feishu_post_payload(parsed) - - def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult: resolved = _resolve_post_payload(payload) if not resolved: diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py index d5511c064e..14ed9e1715 100644 --- a/tests/gateway/test_feishu.py +++ b/tests/gateway/test_feishu.py @@ -2475,6 +2475,29 @@ class TestAdapterBehavior(unittest.TestCase): ], ) + @patch.dict(os.environ, {}, clear=True) + def test_build_post_payload_splits_multiple_fenced_code_blocks(self): + from gateway.config import PlatformConfig + from gateway.platforms.feishu import FeishuAdapter + + adapter = FeishuAdapter(PlatformConfig()) + payload = json.loads( + adapter._build_post_payload( + "before\n```python\nprint(1)\n```\nmiddle\n```json\n{}\n```\nafter" + ) + ) + + self.assertEqual( + payload["zh_cn"]["content"], + [ + [{"tag": "md", "text": "before"}], + [{"tag": "md", "text": "```python\nprint(1)\n```"}], + [{"tag": "md", "text": "middle"}], + [{"tag": "md", "text": "```json\n{}\n```"}], + [{"tag": "md", "text": "after"}], + ], + ) + @patch.dict(os.environ, {}, clear=True) def test_send_falls_back_to_text_when_post_payload_is_rejected(self): from gateway.config import PlatformConfig From 66ee081dc181fc731994f50bb99b0a52a2761310 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 05:14:17 -0700 Subject: [PATCH 035/455] skills: move 7 niche mlops/mcp skills to optional (#12474) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Built-in → optional-skills/: mlops/training/peft → optional-skills/mlops/peft mlops/training/pytorch-fsdp → optional-skills/mlops/pytorch-fsdp mlops/models/clip → optional-skills/mlops/clip mlops/models/stable-diffusion → optional-skills/mlops/stable-diffusion mlops/models/whisper → optional-skills/mlops/whisper mlops/cloud/modal → optional-skills/mlops/modal mcp/mcporter → optional-skills/mcp/mcporter Built-in mlops training kept: axolotl, trl-fine-tuning, unsloth. Built-in mlops models kept: audiocraft, segment-anything. Built-in mlops evaluation/research/huggingface-hub/inference all kept. native-mcp stays built-in (documents the native MCP tool); mcporter was a redundant alternative CLI. Also: removed now-empty skills/mlops/cloud/ dir, refreshed skills/mlops/models/DESCRIPTION.md and skills/mcp/DESCRIPTION.md to match what's left, and synchronized both catalog pages (skills-catalog.md, optional-skills-catalog.md). --- .../mcp/mcporter/SKILL.md | 0 .../mlops}/clip/SKILL.md | 0 .../mlops}/clip/references/applications.md | 0 .../mlops}/modal/SKILL.md | 0 .../mlops}/modal/references/advanced-usage.md | 0 .../mlops}/modal/references/troubleshooting.md | 0 .../mlops}/peft/SKILL.md | 0 .../mlops}/peft/references/advanced-usage.md | 0 .../mlops}/peft/references/troubleshooting.md | 0 .../mlops}/pytorch-fsdp/SKILL.md | 0 .../mlops}/pytorch-fsdp/references/index.md | 0 .../mlops}/pytorch-fsdp/references/other.md | 0 .../mlops}/stable-diffusion/SKILL.md | 0 .../references/advanced-usage.md | 0 .../references/troubleshooting.md | 0 .../mlops}/whisper/SKILL.md | 0 .../mlops}/whisper/references/languages.md | 0 skills/mcp/DESCRIPTION.md | 2 +- skills/mlops/cloud/DESCRIPTION.md | 3 --- skills/mlops/models/DESCRIPTION.md | 2 +- .../docs/reference/optional-skills-catalog.md | 7 +++++++ website/docs/reference/skills-catalog.md | 16 +--------------- 22 files changed, 10 insertions(+), 20 deletions(-) rename {skills => optional-skills}/mcp/mcporter/SKILL.md (100%) rename {skills/mlops/models => optional-skills/mlops}/clip/SKILL.md (100%) rename {skills/mlops/models => optional-skills/mlops}/clip/references/applications.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/modal/SKILL.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/modal/references/advanced-usage.md (100%) rename {skills/mlops/cloud => optional-skills/mlops}/modal/references/troubleshooting.md (100%) rename {skills/mlops/training => optional-skills/mlops}/peft/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/peft/references/advanced-usage.md (100%) rename {skills/mlops/training => optional-skills/mlops}/peft/references/troubleshooting.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/SKILL.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/references/index.md (100%) rename {skills/mlops/training => optional-skills/mlops}/pytorch-fsdp/references/other.md (100%) rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/SKILL.md (100%) rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/references/advanced-usage.md (100%) rename {skills/mlops/models => optional-skills/mlops}/stable-diffusion/references/troubleshooting.md (100%) rename {skills/mlops/models => optional-skills/mlops}/whisper/SKILL.md (100%) rename {skills/mlops/models => optional-skills/mlops}/whisper/references/languages.md (100%) delete mode 100644 skills/mlops/cloud/DESCRIPTION.md diff --git a/skills/mcp/mcporter/SKILL.md b/optional-skills/mcp/mcporter/SKILL.md similarity index 100% rename from skills/mcp/mcporter/SKILL.md rename to optional-skills/mcp/mcporter/SKILL.md diff --git a/skills/mlops/models/clip/SKILL.md b/optional-skills/mlops/clip/SKILL.md similarity index 100% rename from skills/mlops/models/clip/SKILL.md rename to optional-skills/mlops/clip/SKILL.md diff --git a/skills/mlops/models/clip/references/applications.md b/optional-skills/mlops/clip/references/applications.md similarity index 100% rename from skills/mlops/models/clip/references/applications.md rename to optional-skills/mlops/clip/references/applications.md diff --git a/skills/mlops/cloud/modal/SKILL.md b/optional-skills/mlops/modal/SKILL.md similarity index 100% rename from skills/mlops/cloud/modal/SKILL.md rename to optional-skills/mlops/modal/SKILL.md diff --git a/skills/mlops/cloud/modal/references/advanced-usage.md b/optional-skills/mlops/modal/references/advanced-usage.md similarity index 100% rename from skills/mlops/cloud/modal/references/advanced-usage.md rename to optional-skills/mlops/modal/references/advanced-usage.md diff --git a/skills/mlops/cloud/modal/references/troubleshooting.md b/optional-skills/mlops/modal/references/troubleshooting.md similarity index 100% rename from skills/mlops/cloud/modal/references/troubleshooting.md rename to optional-skills/mlops/modal/references/troubleshooting.md diff --git a/skills/mlops/training/peft/SKILL.md b/optional-skills/mlops/peft/SKILL.md similarity index 100% rename from skills/mlops/training/peft/SKILL.md rename to optional-skills/mlops/peft/SKILL.md diff --git a/skills/mlops/training/peft/references/advanced-usage.md b/optional-skills/mlops/peft/references/advanced-usage.md similarity index 100% rename from skills/mlops/training/peft/references/advanced-usage.md rename to optional-skills/mlops/peft/references/advanced-usage.md diff --git a/skills/mlops/training/peft/references/troubleshooting.md b/optional-skills/mlops/peft/references/troubleshooting.md similarity index 100% rename from skills/mlops/training/peft/references/troubleshooting.md rename to optional-skills/mlops/peft/references/troubleshooting.md diff --git a/skills/mlops/training/pytorch-fsdp/SKILL.md b/optional-skills/mlops/pytorch-fsdp/SKILL.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/SKILL.md rename to optional-skills/mlops/pytorch-fsdp/SKILL.md diff --git a/skills/mlops/training/pytorch-fsdp/references/index.md b/optional-skills/mlops/pytorch-fsdp/references/index.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/index.md rename to optional-skills/mlops/pytorch-fsdp/references/index.md diff --git a/skills/mlops/training/pytorch-fsdp/references/other.md b/optional-skills/mlops/pytorch-fsdp/references/other.md similarity index 100% rename from skills/mlops/training/pytorch-fsdp/references/other.md rename to optional-skills/mlops/pytorch-fsdp/references/other.md diff --git a/skills/mlops/models/stable-diffusion/SKILL.md b/optional-skills/mlops/stable-diffusion/SKILL.md similarity index 100% rename from skills/mlops/models/stable-diffusion/SKILL.md rename to optional-skills/mlops/stable-diffusion/SKILL.md diff --git a/skills/mlops/models/stable-diffusion/references/advanced-usage.md b/optional-skills/mlops/stable-diffusion/references/advanced-usage.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/advanced-usage.md rename to optional-skills/mlops/stable-diffusion/references/advanced-usage.md diff --git a/skills/mlops/models/stable-diffusion/references/troubleshooting.md b/optional-skills/mlops/stable-diffusion/references/troubleshooting.md similarity index 100% rename from skills/mlops/models/stable-diffusion/references/troubleshooting.md rename to optional-skills/mlops/stable-diffusion/references/troubleshooting.md diff --git a/skills/mlops/models/whisper/SKILL.md b/optional-skills/mlops/whisper/SKILL.md similarity index 100% rename from skills/mlops/models/whisper/SKILL.md rename to optional-skills/mlops/whisper/SKILL.md diff --git a/skills/mlops/models/whisper/references/languages.md b/optional-skills/mlops/whisper/references/languages.md similarity index 100% rename from skills/mlops/models/whisper/references/languages.md rename to optional-skills/mlops/whisper/references/languages.md diff --git a/skills/mcp/DESCRIPTION.md b/skills/mcp/DESCRIPTION.md index 627c20ea1b..30a0660333 100644 --- a/skills/mcp/DESCRIPTION.md +++ b/skills/mcp/DESCRIPTION.md @@ -1,3 +1,3 @@ --- -description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Includes the built-in native MCP client (configure servers in config.yaml for automatic tool discovery) and the mcporter CLI bridge for ad-hoc server interaction. +description: Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. Documents the built-in native MCP client — configure servers in config.yaml for automatic tool discovery. --- diff --git a/skills/mlops/cloud/DESCRIPTION.md b/skills/mlops/cloud/DESCRIPTION.md deleted file mode 100644 index 32675823e0..0000000000 --- a/skills/mlops/cloud/DESCRIPTION.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -description: GPU cloud providers and serverless compute platforms for ML workloads. ---- diff --git a/skills/mlops/models/DESCRIPTION.md b/skills/mlops/models/DESCRIPTION.md index 8170b517f5..8f7e669562 100644 --- a/skills/mlops/models/DESCRIPTION.md +++ b/skills/mlops/models/DESCRIPTION.md @@ -1,3 +1,3 @@ --- -description: Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA). +description: Specific model architectures and tools — image segmentation (Segment Anything / SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills. --- diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 044060e9dd..f5dd2ac5bf 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -83,6 +83,7 @@ hermes skills uninstall | Skill | Description | |-------|-------------| | **fastmcp** | Build, test, inspect, install, and deploy MCP servers with FastMCP in Python. Covers wrapping APIs or databases as MCP tools, exposing resources or prompts, and deployment. | +| **mcporter** | The `mcporter` CLI — list, configure, auth, and call MCP servers/tools directly (HTTP or stdio) from the terminal. Useful for ad-hoc MCP interactions; for always-on tool discovery use the built-in `native-mcp` client instead. | ## Migration @@ -98,6 +99,7 @@ The largest optional category — covers the full ML pipeline from data curation |-------|-------------| | **accelerate** | Simplest distributed training API. 4 lines to add distributed support to any PyTorch script. Unified API for DeepSpeed/FSDP/Megatron/DDP. | | **chroma** | Open-source embedding database. Store embeddings and metadata, perform vector and full-text search. Simple 4-function API for RAG and semantic search. | +| **clip** | OpenAI's vision-language model connecting images and text. Zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. | | **faiss** | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). | | **flash-attention** | Optimize transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Supports PyTorch SDPA, flash-attn library, H100 FP8, and sliding window. | | **guidance** | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance — Microsoft Research's constrained generation framework. | @@ -106,15 +108,20 @@ The largest optional category — covers the full ML pipeline from data curation | **instructor** | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, and stream partial results. | | **lambda-labs** | Reserved and on-demand GPU cloud instances for ML training and inference. SSH access, persistent filesystems, and multi-node clusters. | | **llava** | Large Language and Vision Assistant — visual instruction tuning and image-based conversations combining CLIP vision with LLaMA language models. | +| **modal** | Serverless GPU cloud platform for running ML workloads. On-demand GPU access without infrastructure management, ML model deployment as APIs, or batch jobs with automatic scaling. | | **nemo-curator** | GPU-accelerated data curation for LLM training. Fuzzy deduplication (16x faster), quality filtering (30+ heuristics), semantic dedup, PII redaction. Scales with RAPIDS. | +| **peft-fine-tuning** | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Train <1% of parameters with minimal accuracy loss for 7B–70B models on limited GPU memory. HuggingFace's official PEFT library. | | **pinecone** | Managed vector database for production AI. Auto-scaling, hybrid search (dense + sparse), metadata filtering, and low latency (under 100ms p95). | +| **pytorch-fsdp** | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP — parameter sharding, mixed precision, CPU offloading, FSDP2. | | **pytorch-lightning** | High-level PyTorch framework with Trainer class, automatic distributed training (DDP/FSDP/DeepSpeed), callbacks, and minimal boilerplate. | | **qdrant** | High-performance vector similarity search engine. Rust-powered with fast nearest neighbor search, hybrid search with filtering, and scalable vector storage. | | **saelens** | Train and analyze Sparse Autoencoders (SAEs) using SAELens to decompose neural network activations into interpretable features. | | **simpo** | Simple Preference Optimization — reference-free alternative to DPO with better performance (+6.4 pts on AlpacaEval 2.0). No reference model needed. | | **slime** | LLM post-training with RL using Megatron+SGLang framework. Custom data generation workflows and tight Megatron-LM integration for RL scaling. | +| **stable-diffusion-image-generation** | State-of-the-art text-to-image generation with Stable Diffusion via HuggingFace Diffusers. Text-to-image, image-to-image translation, inpainting, and custom diffusion pipelines. | | **tensorrt-llm** | Optimize LLM inference with NVIDIA TensorRT for maximum throughput. 10-100x faster than PyTorch on A100/H100 with quantization (FP8/INT4) and in-flight batching. | | **torchtitan** | PyTorch-native distributed LLM pretraining with 4D parallelism (FSDP2, TP, PP, CP). Scale from 8 to 512+ GPUs with Float8 and torch.compile. | +| **whisper** | OpenAI's general-purpose speech recognition. 99 languages, transcription, translation to English, and language ID. Six model sizes from tiny (39M) to large (1550M). Best for robust multilingual ASR. | ## Productivity diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 16be6a6581..ffe489d360 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -114,7 +114,6 @@ Skills for working with MCP (Model Context Protocol) servers, tools, and integra | Skill | Description | Path | |-------|-------------|------| -| `mcporter` | Use the mcporter CLI to list, configure, auth, and call MCP servers/tools directly (HTTP or stdio), including ad-hoc servers, config edits, and CLI/type generation. | `mcp/mcporter` | | `native-mcp` | Built-in MCP (Model Context Protocol) client that connects to external MCP servers, discovers their tools, and registers them as native Hermes Agent tools. Supports stdio and HTTP transports with automatic reconnection, security filtering, and zero-config tool injection. | `mcp/native-mcp` | ## media @@ -136,14 +135,6 @@ General-purpose ML operations tools — model hub management, dataset operations |-------|-------------|------| | `huggingface-hub` | Hugging Face Hub CLI (hf) — search, download, and upload models and datasets, manage repos, query datasets with SQL, deploy inference endpoints, manage Spaces and buckets. | `mlops/huggingface-hub` | -## mlops/cloud - -GPU cloud providers and serverless compute platforms for ML workloads. - -| Skill | Description | Path | -|-------|-------------|------| -| `modal-serverless-gpu` | Serverless GPU cloud platform for running ML workloads. Use when you need on-demand GPU access without infrastructure management, deploying ML models as APIs, or running batch jobs with automatic scaling. | `mlops/cloud/modal` | - ## mlops/evaluation Model evaluation benchmarks, experiment tracking, and interpretability tools. @@ -166,15 +157,12 @@ Model serving, quantization (GGUF/GPTQ), structured output, inference optimizati ## mlops/models -Specific model architectures — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), and audio generation (AudioCraft). +Specific model architectures — image segmentation (SAM) and audio generation (AudioCraft / MusicGen). Additional model skills (CLIP, Stable Diffusion, Whisper, LLaVA) are available as optional skills. | Skill | Description | Path | |-------|-------------|------| | `audiocraft-audio-generation` | PyTorch library for audio generation including text-to-music (MusicGen) and text-to-sound (AudioGen). Use when you need to generate music from text descriptions, create sound effects, or perform melody-conditioned music generation. | `mlops/models/audiocraft` | -| `clip` | OpenAI's model connecting vision and language. Enables zero-shot image classification, image-text matching, and cross-modal retrieval. Trained on 400M image-text pairs. Use for image search, content moderation, or vision-language tasks without fine-tuning. Best for general-pur… | `mlops/models/clip` | | `segment-anything-model` | Foundation model for image segmentation with zero-shot transfer. Use when you need to segment any object in images using points, boxes, or masks as prompts, or automatically generate all object masks in an image. | `mlops/models/segment-anything` | -| `stable-diffusion-image-generation` | State-of-the-art text-to-image generation with Stable Diffusion models via HuggingFace Diffusers. Use when generating images from text prompts, performing image-to-image translation, inpainting, or building custom diffusion pipelines. | `mlops/models/stable-diffusion` | -| `whisper` | OpenAI's general-purpose speech recognition model. Supports 99 languages, transcription, translation to English, and language identification. Six model sizes from tiny (39M params) to large (1550M params). Use for speech-to-text, podcast transcription, or multilingual audio pr… | `mlops/models/whisper` | ## mlops/research @@ -192,8 +180,6 @@ Fine-tuning, RLHF/DPO/GRPO training, distributed training frameworks, and optimi |-------|-------------|------| | `axolotl` | Expert guidance for fine-tuning LLMs with Axolotl - YAML configs, 100+ models, LoRA/QLoRA, DPO/KTO/ORPO/GRPO, multimodal support | `mlops/training/axolotl` | | `fine-tuning-with-trl` | Fine-tune LLMs using reinforcement learning with TRL - SFT for instruction tuning, DPO for preference alignment, PPO/GRPO for reward optimization, and reward model training. Use when need RLHF, align model with preferences, or train from human feedback. Works with HuggingFace … | `mlops/training/trl-fine-tuning` | -| `peft-fine-tuning` | Parameter-efficient fine-tuning for LLMs using LoRA, QLoRA, and 25+ methods. Use when fine-tuning large models (7B-70B) with limited GPU memory, when you need to train <1% of parameters with minimal accuracy loss, or for multi-adapter serving. HuggingFace's official library… | `mlops/training/peft` | -| `pytorch-fsdp` | Expert guidance for Fully Sharded Data Parallel training with PyTorch FSDP - parameter sharding, mixed precision, CPU offloading, FSDP2 | `mlops/training/pytorch-fsdp` | | `unsloth` | Expert guidance for fast fine-tuning with Unsloth - 2-5x faster training, 50-80% less memory, LoRA/QLoRA optimization | `mlops/training/unsloth` | ## note-taking From 206a449b2991bd9e2b943483ae785a96ec5ce6a2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 05:18:19 -0700 Subject: [PATCH 036/455] feat(webhook): direct delivery mode for zero-LLM push notifications (#12473) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External services can now push plain-text notifications to a user's chat via the webhook adapter without invoking the agent. Set deliver_only=true on a route and the rendered prompt template becomes the literal message body — dispatched directly to the configured target (Telegram, Discord, Slack, GitHub PR comment, etc.). Reuses all existing webhook infrastructure: HMAC-SHA256 signature validation, per-route rate limiting, idempotency cache, body-size limits, template rendering with dot-notation, home-channel fallback. No new HTTP server, no new auth scheme, no new port. Use cases: Supabase/Firebase webhooks → user notifications, monitoring alert forwarding, inter-agent pings, background job completion alerts. Changes: - gateway/platforms/webhook.py: new _direct_deliver() helper + early dispatch branch in _handle_webhook when deliver_only=true. Startup validation rejects deliver_only with deliver=log. - hermes_cli/main.py + hermes_cli/webhook.go: --deliver-only flag on subscribe; list/show output marks direct-delivery routes. - website/docs/user-guide/messaging/webhooks.md: new Direct Delivery Mode section with config example, CLI example, response codes. - skills/devops/webhook-subscriptions/SKILL.md: document --deliver-only with use cases (bumped to v1.1.0). - tests/gateway/test_webhook_deliver_only.py: 14 new tests covering agent bypass, template rendering, status codes, HMAC still enforced, idempotency still applies, rate limit still applies, startup validation, and direct-deliver dispatch. Validation: 78 webhook tests pass (64 existing + 14 new). E2E verified with real aiohttp server + real urllib POST — agent not invoked, target adapter.send() called with rendered template, duplicate delivery_id suppressed. Closes the gap identified in PR #12117 (thanks to @H1an1 / Antenna team) without adding a second HTTP ingress server. --- gateway/platforms/webhook.py | 103 ++++ hermes_cli/main.py | 7 + hermes_cli/webhook.py | 16 +- skills/devops/webhook-subscriptions/SKILL.md | 29 +- tests/gateway/test_webhook_deliver_only.py | 473 ++++++++++++++++++ website/docs/user-guide/messaging/webhooks.md | 75 +++ 6 files changed, 699 insertions(+), 4 deletions(-) create mode 100644 tests/gateway/test_webhook_deliver_only.py diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py index c37445b17e..9995ac3870 100644 --- a/gateway/platforms/webhook.py +++ b/gateway/platforms/webhook.py @@ -13,6 +13,10 @@ Each route defines: - skills: optional list of skills to load for the agent - deliver: where to send the response (github_comment, telegram, etc.) - deliver_extra: additional delivery config (repo, pr_number, chat_id) + - deliver_only: if true, skip the agent — the rendered prompt IS the + message that gets delivered. Use for external push notifications + (Supabase, monitoring alerts, inter-agent pings) where zero LLM cost + and sub-second delivery matter more than agent reasoning. Security: - HMAC secret is required per route (validated at startup) @@ -122,6 +126,19 @@ class WebhookAdapter(BasePlatformAdapter): f"For testing without auth, set secret to '{_INSECURE_NO_AUTH}'." ) + # deliver_only routes bypass the agent — the POST body becomes a + # direct push notification via the configured delivery target. + # Validate up-front so misconfiguration surfaces at startup rather + # than on the first webhook POST. + if route.get("deliver_only"): + deliver = route.get("deliver", "log") + if not deliver or deliver == "log": + raise ValueError( + f"[webhook] Route '{name}' has deliver_only=true but " + f"deliver is '{deliver}'. Direct delivery requires a " + f"real target (telegram, discord, slack, github_comment, etc.)." + ) + app = web.Application() app.router.add_get("/health", self._handle_health) app.router.add_post("/webhooks/{route_name}", self._handle_webhook) @@ -419,6 +436,64 @@ class WebhookAdapter(BasePlatformAdapter): ) self._seen_deliveries[delivery_id] = now + # ── Direct delivery mode (deliver_only) ───────────────── + # Skip the agent entirely — the rendered prompt IS the message we + # deliver. Use case: external services (Supabase, monitoring, + # cron jobs, other agents) that need to push a plain notification + # to a user's chat with zero LLM cost. Reuses the same HMAC auth, + # rate limiting, idempotency, and template rendering as agent mode. + if route_config.get("deliver_only"): + delivery = { + "deliver": route_config.get("deliver", "log"), + "deliver_extra": self._render_delivery_extra( + route_config.get("deliver_extra", {}), payload + ), + "payload": payload, + } + logger.info( + "[webhook] direct-deliver event=%s route=%s target=%s msg_len=%d delivery=%s", + event_type, + route_name, + delivery["deliver"], + len(prompt), + delivery_id, + ) + try: + result = await self._direct_deliver(prompt, delivery) + except Exception: + logger.exception( + "[webhook] direct-deliver failed route=%s delivery=%s", + route_name, + delivery_id, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + + if result.success: + return web.json_response( + { + "status": "delivered", + "route": route_name, + "target": delivery["deliver"], + "delivery_id": delivery_id, + }, + status=200, + ) + # Delivery attempted but target rejected it — surface as 502 + # with a generic error (don't leak adapter-level detail). + logger.warning( + "[webhook] direct-deliver target rejected route=%s target=%s error=%s", + route_name, + delivery["deliver"], + result.error, + ) + return web.json_response( + {"status": "error", "error": "Delivery failed", "delivery_id": delivery_id}, + status=502, + ) + # Use delivery_id in session key so concurrent webhooks on the # same route get independent agent runs (not queued/interrupted). session_chat_id = f"webhook:{route_name}:{delivery_id}" @@ -572,6 +647,34 @@ class WebhookAdapter(BasePlatformAdapter): # Response delivery # ------------------------------------------------------------------ + async def _direct_deliver( + self, content: str, delivery: dict + ) -> SendResult: + """Deliver *content* directly without invoking the agent. + + Used by ``deliver_only`` routes: the rendered template becomes the + literal message body, and we dispatch to the same delivery helpers + that the agent-mode ``send()`` flow uses. All target types that + work in agent mode work here — Telegram, Discord, Slack, GitHub + PR comments, etc. + """ + deliver_type = delivery.get("deliver", "log") + + if deliver_type == "log": + # Shouldn't reach here — startup validation rejects deliver_only + # with deliver=log — but guard defensively. + logger.info("[webhook] direct-deliver log-only: %s", content[:200]) + return SendResult(success=True) + + if deliver_type == "github_comment": + return await self._deliver_github_comment(content, delivery) + + # Fall through to the cross-platform dispatcher, which validates the + # target name and routes via the gateway runner. + return await self._deliver_cross_platform( + deliver_type, content, delivery + ) + async def _deliver_github_comment( self, content: str, delivery: dict ) -> SendResult: diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 7e0220d918..71fc6ae381 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -7002,6 +7002,13 @@ For more help on a command: wh_sub.add_argument( "--secret", default="", help="HMAC secret (auto-generated if omitted)" ) + wh_sub.add_argument( + "--deliver-only", + action="store_true", + help="Skip the agent — deliver the rendered prompt directly as the " + "message. Zero LLM cost. Requires --deliver to be a real target " + "(not 'log').", + ) webhook_subparsers.add_parser( "list", aliases=["ls"], help="List all dynamic subscriptions" diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py index 8ff135e29e..378f11b4a7 100644 --- a/hermes_cli/webhook.py +++ b/hermes_cli/webhook.py @@ -155,6 +155,15 @@ def _cmd_subscribe(args): "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), } + if getattr(args, "deliver_only", False): + if route["deliver"] == "log": + print( + "Error: --deliver-only requires --deliver to be a real target " + "(telegram, discord, slack, github_comment, etc.) — not 'log'." + ) + return + route["deliver_only"] = True + if args.deliver_chat_id: route["deliver_extra"] = {"chat_id": args.deliver_chat_id} @@ -172,9 +181,12 @@ def _cmd_subscribe(args): else: print(" Events: (all)") print(f" Deliver: {route['deliver']}") + if route.get("deliver_only"): + print(" Mode: direct delivery (no agent, zero LLM cost)") if route.get("prompt"): prompt_preview = route["prompt"][:80] + ("..." if len(route["prompt"]) > 80 else "") - print(f" Prompt: {prompt_preview}") + label = "Message" if route.get("deliver_only") else "Prompt" + print(f" {label}: {prompt_preview}") print(f"\n Configure your service to POST to the URL above.") print(f" Use the secret for HMAC-SHA256 signature validation.") print(f" The gateway must be running to receive events (hermes gateway run).\n") @@ -192,6 +204,8 @@ def _cmd_list(args): for name, route in subs.items(): events = ", ".join(route.get("events", [])) or "(all)" deliver = route.get("deliver", "log") + if route.get("deliver_only"): + deliver = f"{deliver} (direct — no agent)" desc = route.get("description", "") print(f" ◆ {name}") if desc: diff --git a/skills/devops/webhook-subscriptions/SKILL.md b/skills/devops/webhook-subscriptions/SKILL.md index e5ab6d5880..dd20a19b41 100644 --- a/skills/devops/webhook-subscriptions/SKILL.md +++ b/skills/devops/webhook-subscriptions/SKILL.md @@ -1,10 +1,10 @@ --- name: webhook-subscriptions -description: Create and manage webhook subscriptions for event-driven agent activation. Use when the user wants external services to trigger agent runs automatically. -version: 1.0.0 +description: Create and manage webhook subscriptions for event-driven agent activation, or for direct push notifications (zero LLM cost). Use when the user wants external services to trigger agent runs OR push notifications to chats. +version: 1.1.0 metadata: hermes: - tags: [webhook, events, automation, integrations] + tags: [webhook, events, automation, integrations, notifications, push] --- # Webhook Subscriptions @@ -154,6 +154,29 @@ hermes webhook subscribe alerts \ --deliver origin ``` +### Direct delivery (no agent, zero LLM cost) + +For use cases where you just want to push a notification through to a user's chat — no reasoning, no agent loop — add `--deliver-only`. The rendered `--prompt` template becomes the literal message body and is dispatched directly to the target adapter. + +Use this for: +- External service push notifications (Supabase/Firebase webhooks → Telegram) +- Monitoring alerts that should forward verbatim +- Inter-agent pings where one agent is telling another agent's user something +- Any webhook where an LLM round trip would be wasted effort + +```bash +hermes webhook subscribe antenna-matches \ + --deliver telegram \ + --deliver-chat-id "123456789" \ + --deliver-only \ + --prompt "🎉 New match: {match.user_name} matched with you!" \ + --description "Antenna match notifications" +``` + +The POST returns `200 OK` on successful delivery, `502` on target failure — so upstream services can retry intelligently. HMAC auth, rate limits, and idempotency still apply. + +Requires `--deliver` to be a real target (telegram, discord, slack, github_comment, etc.) — `--deliver log` is rejected because log-only direct delivery is pointless. + ## Security - Each subscription gets an auto-generated HMAC-SHA256 secret (or provide your own with `--secret`) diff --git a/tests/gateway/test_webhook_deliver_only.py b/tests/gateway/test_webhook_deliver_only.py new file mode 100644 index 0000000000..d73a152015 --- /dev/null +++ b/tests/gateway/test_webhook_deliver_only.py @@ -0,0 +1,473 @@ +"""Tests for the webhook adapter's ``deliver_only`` route mode. + +``deliver_only`` lets external services (Supabase webhooks, monitoring +alerts, background jobs, other agents) push plain-text notifications to +a user's chat via the webhook adapter WITHOUT invoking the agent. The +rendered prompt template becomes the literal message body. + +Covers: +- Agent is NOT invoked (``handle_message`` never called) +- Rendered content is delivered to the target platform adapter +- HTTP returns 200 OK on success, 502 on delivery failure +- Startup validation rejects ``deliver_only`` without a real delivery target +- HMAC auth, rate limiting, and idempotency still apply +""" + +import asyncio +import hashlib +import hmac +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from aiohttp import web +from aiohttp.test_utils import TestClient, TestServer + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import MessageEvent, SendResult +from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_adapter(routes, **extra_kw) -> WebhookAdapter: + extra = {"host": "0.0.0.0", "port": 0, "routes": routes} + extra.update(extra_kw) + config = PlatformConfig(enabled=True, extra=extra) + return WebhookAdapter(config) + + +def _create_app(adapter: WebhookAdapter) -> web.Application: + app = web.Application() + app.router.add_get("/health", adapter._handle_health) + app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook) + return app + + +def _wire_mock_target(adapter: WebhookAdapter, platform_name: str = "telegram"): + """Attach a gateway_runner with a mocked target adapter.""" + mock_target = AsyncMock() + mock_target.send = AsyncMock(return_value=SendResult(success=True)) + + mock_runner = MagicMock() + mock_runner.adapters = {Platform(platform_name): mock_target} + mock_runner.config.get_home_channel.return_value = None + + adapter.gateway_runner = mock_runner + return mock_target + + +# =================================================================== +# Core behaviour: agent bypass +# =================================================================== + +class TestDeliverOnlyBypassesAgent: + """The whole point of the feature — handle_message must not be called.""" + + @pytest.mark.asyncio + async def test_post_delivers_directly_without_agent(self): + routes = { + "match-alert": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "12345"}, + "prompt": "{payload.user} matched with {payload.other}!", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + + # Guard: handle_message must NOT be called in deliver_only mode + handle_message_calls: list[MessageEvent] = [] + + async def _capture(event): + handle_message_calls.append(event) + + adapter.handle_message = _capture + + app = _create_app(adapter) + body = json.dumps( + {"payload": {"user": "alice", "other": "bob"}} + ).encode() + + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/match-alert", + data=body, + headers={ + "Content-Type": "application/json", + "X-GitHub-Delivery": "delivery-1", + }, + ) + assert resp.status == 200 + data = await resp.json() + assert data["status"] == "delivered" + assert data["route"] == "match-alert" + assert data["target"] == "telegram" + + # Let any background tasks settle before asserting no agent call + await asyncio.sleep(0.05) + + # Agent was NOT invoked + assert handle_message_calls == [] + + # Target adapter.send() WAS called with the rendered template + mock_target.send.assert_awaited_once() + call_args = mock_target.send.await_args + chat_id_arg, content_arg = call_args.args[0], call_args.args[1] + assert chat_id_arg == "12345" + assert content_arg == "alice matched with bob!" + + @pytest.mark.asyncio + async def test_template_rendering_works(self): + """Dot-notation template variables resolve in deliver_only mode.""" + routes = { + "alert": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "chat-1"}, + "prompt": "Build {build.number} status: {build.status}", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + app = _create_app(adapter) + + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/alert", + json={"build": {"number": 77, "status": "FAILED"}}, + headers={"X-GitHub-Delivery": "d-render-1"}, + ) + assert resp.status == 200 + + mock_target.send.assert_awaited_once() + content_arg = mock_target.send.await_args.args[1] + assert content_arg == "Build 77 status: FAILED" + + @pytest.mark.asyncio + async def test_thread_id_passed_through(self): + """deliver_extra.thread_id flows through to the target adapter.""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1", "thread_id": "topic-42"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "d-thread-1"}, + ) + assert resp.status == 200 + + assert mock_target.send.await_args.kwargs["metadata"] == { + "thread_id": "topic-42" + } + + +# =================================================================== +# HTTP status codes +# =================================================================== + +class TestDeliverOnlyStatusCodes: + + @pytest.mark.asyncio + async def test_delivery_failure_returns_502(self): + """If the target adapter returns SendResult(success=False), 502.""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + mock_target.send = AsyncMock( + return_value=SendResult(success=False, error="rate limited by tg") + ) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "d-fail-1"}, + ) + assert resp.status == 502 + data = await resp.json() + # Generic error — no adapter-level detail leaks + assert data["error"] == "Delivery failed" + assert "rate limited" not in json.dumps(data) + + @pytest.mark.asyncio + async def test_delivery_exception_returns_502(self): + """If adapter.send() raises, we return 502 (not 500).""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + mock_target.send = AsyncMock(side_effect=RuntimeError("tg exploded")) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "d-exc-1"}, + ) + assert resp.status == 502 + data = await resp.json() + assert data["error"] == "Delivery failed" + # Exception message must not leak + assert "exploded" not in json.dumps(data) + + @pytest.mark.asyncio + async def test_target_platform_not_connected_returns_502(self): + """deliver_only to a platform the gateway doesn't have → 502.""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "discord", # not configured in mock runner + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + _wire_mock_target(adapter, platform_name="telegram") # only TG wired + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "d-no-platform-1"}, + ) + assert resp.status == 502 + + +# =================================================================== +# Startup validation +# =================================================================== + +class TestDeliverOnlyStartupValidation: + + @pytest.mark.asyncio + async def test_deliver_only_with_log_deliver_rejected(self): + """deliver_only=true + deliver=log is nonsense — reject at connect().""" + routes = { + "bad": { + "secret": _INSECURE_NO_AUTH, + "deliver": "log", + "deliver_only": True, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + with pytest.raises(ValueError, match="deliver_only=true but deliver is 'log'"): + await adapter.connect() + + @pytest.mark.asyncio + async def test_deliver_only_with_missing_deliver_rejected(self): + """deliver_only=true with no deliver field defaults to 'log' → reject.""" + routes = { + "bad": { + "secret": _INSECURE_NO_AUTH, + # no deliver field + "deliver_only": True, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + with pytest.raises(ValueError, match="deliver_only=true"): + await adapter.connect() + + @pytest.mark.asyncio + async def test_deliver_only_with_real_target_accepted(self): + """Sanity check — a valid deliver_only config passes validation.""" + routes = { + "good": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + # connect() does more than validation (binds a socket) — we just + # want to verify the validation doesn't raise. Call it and tear + # down immediately. + try: + started = await adapter.connect() + if started: + await adapter.disconnect() + except ValueError: + pytest.fail("valid deliver_only config should not raise ValueError") + + +# =================================================================== +# Security + reliability invariants still hold +# =================================================================== + +class TestDeliverOnlySecurityInvariants: + + @pytest.mark.asyncio + async def test_hmac_still_enforced(self): + """deliver_only does NOT bypass HMAC validation.""" + secret = "real-secret-123" + routes = { + "r": { + "secret": secret, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + # No signature header → reject + resp = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "d-noauth-1"}, + ) + assert resp.status == 401 + + # Target never called + mock_target.send.assert_not_awaited() + + @pytest.mark.asyncio + async def test_idempotency_still_applies(self): + """Same delivery_id posted twice → second is suppressed.""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes) + mock_target = _wire_mock_target(adapter) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + r1 = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "dup-1"}, + ) + assert r1.status == 200 + + r2 = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "dup-1"}, + ) + # Existing webhook adapter treats duplicates as 200 + status=duplicate + assert r2.status == 200 + data = await r2.json() + assert data["status"] == "duplicate" + + # Target was called exactly once + assert mock_target.send.await_count == 1 + + @pytest.mark.asyncio + async def test_rate_limit_still_applies(self): + """Route-level rate limit caps deliver_only POSTs too.""" + routes = { + "r": { + "secret": _INSECURE_NO_AUTH, + "deliver": "telegram", + "deliver_only": True, + "deliver_extra": {"chat_id": "c-1"}, + "prompt": "hi", + } + } + adapter = _make_adapter(routes, rate_limit=2) + _wire_mock_target(adapter) + + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + for i in range(2): + r = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": f"rl-{i}"}, + ) + assert r.status == 200 + + # Third within the window → 429 + r3 = await cli.post( + "/webhooks/r", + json={}, + headers={"X-GitHub-Delivery": "rl-3"}, + ) + assert r3.status == 429 + + +# =================================================================== +# Unit: _direct_deliver dispatch +# =================================================================== + +class TestDirectDeliverUnit: + + @pytest.mark.asyncio + async def test_dispatches_to_cross_platform_for_messaging_targets(self): + adapter = _make_adapter({}) + mock_target = _wire_mock_target(adapter, "telegram") + + result = await adapter._direct_deliver( + "hello", + {"deliver": "telegram", "deliver_extra": {"chat_id": "c-1"}}, + ) + assert result.success is True + mock_target.send.assert_awaited_once_with( + "c-1", "hello", metadata=None + ) + + @pytest.mark.asyncio + async def test_dispatches_to_github_comment(self): + adapter = _make_adapter({}) + with patch.object( + adapter, "_deliver_github_comment", + new=AsyncMock(return_value=SendResult(success=True)), + ) as mock_gh: + result = await adapter._direct_deliver( + "review body", + { + "deliver": "github_comment", + "deliver_extra": {"repo": "org/r", "pr_number": "1"}, + }, + ) + assert result.success is True + mock_gh.assert_awaited_once() diff --git a/website/docs/user-guide/messaging/webhooks.md b/website/docs/user-guide/messaging/webhooks.md index bbf04bcb4f..2c60624fb6 100644 --- a/website/docs/user-guide/messaging/webhooks.md +++ b/website/docs/user-guide/messaging/webhooks.md @@ -72,6 +72,7 @@ Routes define how different webhook sources are handled. Each route is a named e | `skills` | No | List of skill names to load for the agent run. | | `deliver` | No | Where to send the response: `github_comment`, `telegram`, `discord`, `slack`, `signal`, `sms`, `whatsapp`, `matrix`, `mattermost`, `homeassistant`, `email`, `dingtalk`, `feishu`, `wecom`, `weixin`, `bluebubbles`, `qqbot`, or `log` (default). | | `deliver_extra` | No | Additional delivery config — keys depend on `deliver` type (e.g. `repo`, `pr_number`, `chat_id`). Values support the same `{dot.notation}` templates as `prompt`. | +| `deliver_only` | No | If `true`, skip the agent entirely — the rendered `prompt` template becomes the literal message that gets delivered. Zero LLM cost, sub-second delivery. See [Direct Delivery Mode](#direct-delivery-mode) for use cases. Requires `deliver` to be a real target (not `log`). | ### Full example @@ -240,6 +241,80 @@ For cross-platform delivery, the target platform must also be enabled and connec --- +## Direct Delivery Mode {#direct-delivery-mode} + +By default, every webhook POST triggers an agent run — the payload becomes a prompt, the agent processes it, and the agent's response is delivered. This costs LLM tokens on every event. + +For use cases where you just want to **push a plain notification** — no reasoning, no agent loop, just deliver the message — set `deliver_only: true` on the route. The rendered `prompt` template becomes the literal message body, and the adapter dispatches it directly to the configured delivery target. + +### When to use direct delivery + +- **External service push** — Supabase/Firebase webhook fires on a database change → notify a user in Telegram instantly +- **Monitoring alerts** — Datadog/Grafana alert webhook → push to a Discord channel +- **Inter-agent pings** — Agent A notifies Agent B's user that a long-running task finished +- **Background job completion** — Cron job finishes → post result to Slack + +Benefits: + +- **Zero LLM tokens** — the agent is never invoked +- **Sub-second delivery** — a single adapter call, no reasoning loop +- **Same security as agent mode** — HMAC auth, rate limits, idempotency, and body-size limits all still apply +- **Synchronous response** — the POST returns `200 OK` once delivery succeeds, or `502` if the target rejects it, so your upstream service can retry intelligently + +### Example: Telegram push from Supabase + +```yaml +platforms: + webhook: + enabled: true + extra: + port: 8644 + secret: "global-secret" + routes: + antenna-matches: + secret: "antenna-webhook-secret" + deliver: "telegram" + deliver_only: true + prompt: "🎉 New match: {match.user_name} matched with you!" + deliver_extra: + chat_id: "{match.telegram_chat_id}" +``` + +Your Supabase edge function signs the payload with HMAC-SHA256 and POSTs to `https://your-server:8644/webhooks/antenna-matches`. The webhook adapter validates the signature, renders the template from the payload, delivers to Telegram, and returns `200 OK`. + +### Example: Dynamic subscription via CLI + +```bash +hermes webhook subscribe antenna-matches \ + --deliver telegram \ + --deliver-chat-id "123456789" \ + --deliver-only \ + --prompt "🎉 New match: {match.user_name} matched with you!" \ + --description "Antenna match notifications" +``` + +### Response codes + +| Status | Meaning | +|--------|---------| +| `200 OK` | Delivered successfully. Body: `{"status": "delivered", "route": "...", "target": "...", "delivery_id": "..."}` | +| `200 OK` (status=duplicate) | Duplicate `X-GitHub-Delivery` ID within the idempotency TTL (1 hour). Not re-delivered. | +| `401 Unauthorized` | HMAC signature invalid or missing. | +| `400 Bad Request` | Malformed JSON body. | +| `404 Not Found` | Unknown route name. | +| `413 Payload Too Large` | Body exceeded `max_body_bytes`. | +| `429 Too Many Requests` | Route rate limit exceeded. | +| `502 Bad Gateway` | Target adapter rejected the message or raised. The error is logged server-side; the response body is a generic `Delivery failed` to avoid leaking adapter internals. | + +### Configuration gotchas + +- `deliver_only: true` requires `deliver` to be a real target. `deliver: log` (or omitting `deliver`) is rejected at startup — the adapter refuses to start if it finds a misconfigured route. +- The `skills` field is ignored in direct delivery mode (no agent runs, so there's nothing to inject skills into). +- Template rendering uses the same `{dot.notation}` syntax as agent mode, including the `{__raw__}` token. +- Idempotency uses the same `X-GitHub-Delivery` / `X-Request-ID` header — retries with the same ID return `status=duplicate` and do NOT re-deliver. + +--- + ## Dynamic Subscriptions (CLI) {#dynamic-subscriptions} In addition to static routes in `config.yaml`, you can create webhook subscriptions dynamically using the `hermes webhook` CLI command. This is especially useful when the agent itself needs to set up event-driven triggers. From 7fa01fafa557f4cba59eb95a61a7343559bc2b44 Mon Sep 17 00:00:00 2001 From: Mibayy Date: Sun, 29 Mar 2026 22:48:28 -0700 Subject: [PATCH 037/455] feat: add maps skill (OpenStreetMap + Overpass + OSRM, no API key) Adds a maps optional skill with 8 commands, 44 POI categories, and zero external dependencies. Uses free open data: Nominatim, Overpass API, OSRM, and TimeAPI.io. Commands: search, reverse, nearby, distance, directions, timezone, area, bbox. Improvements over original PR #2015: - Fixed directory structure (optional-skills/productivity/maps/) - Fixed distance argparse (--to flag instead of broken dual nargs=+) - Fixed timezone (TimeAPI.io instead of broken worldtimeapi heuristic) - Expanded POI categories from 12 to 44 - Added directions command with turn-by-turn OSRM steps - Added area command (bounding box + dimensions for a named place) - Added bbox command (POI search within a geographic rectangle) - Added 23 unit tests - Improved haversine (atan2 for numerical stability) - Comprehensive SKILL.md with workflow examples Co-authored-by: Mibayy --- optional-skills/productivity/maps/SKILL.md | 153 +++ .../productivity/maps/scripts/maps_client.py | 1143 +++++++++++++++++ .../maps/tests/test_maps_client.py | 177 +++ 3 files changed, 1473 insertions(+) create mode 100644 optional-skills/productivity/maps/SKILL.md create mode 100644 optional-skills/productivity/maps/scripts/maps_client.py create mode 100644 optional-skills/productivity/maps/tests/test_maps_client.py diff --git a/optional-skills/productivity/maps/SKILL.md b/optional-skills/productivity/maps/SKILL.md new file mode 100644 index 0000000000..59e0359d56 --- /dev/null +++ b/optional-skills/productivity/maps/SKILL.md @@ -0,0 +1,153 @@ +--- +name: maps +description: > + Geocoding, reverse geocoding, nearby POI search (44 categories), + distance/routing, turn-by-turn directions, timezone lookup, bounding box + search, and area info. Uses OpenStreetMap + Overpass + OSRM. Free, no API key. +version: 1.1.0 +author: Mibayy +license: MIT +metadata: + hermes: + tags: [maps, geocoding, places, routing, distance, directions, openstreetmap, nominatim, overpass, osrm] + category: productivity + requires_toolsets: [terminal] +--- + +# Maps Skill + +Location intelligence using free, open data sources. 8 commands, 44 POI +categories, zero dependencies (Python stdlib only), no API key required. + +Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io. + +## When to Use + +- User wants coordinates for a place name +- User has coordinates and wants the address +- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. +- User wants driving/walking/cycling distance or travel time +- User wants turn-by-turn directions between two places +- User wants timezone information for a location +- User wants to search for POIs within a geographic area + +## Prerequisites + +Python 3.8+ (stdlib only — no pip installs needed). + +Script path after install: `~/.hermes/skills/maps/scripts/maps_client.py` + +## Commands + +```bash +MAPS=~/.hermes/skills/maps/scripts/maps_client.py +``` + +### search — Geocode a place name + +```bash +python3 $MAPS search "Eiffel Tower" +python3 $MAPS search "1600 Pennsylvania Ave, Washington DC" +``` + +Returns: lat, lon, display name, type, bounding box, importance score. + +### reverse — Coordinates to address + +```bash +python3 $MAPS reverse 48.8584 2.2945 +``` + +Returns: full address breakdown (street, city, state, country, postcode). + +### nearby — Find places by category + +```bash +python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10 +python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000 +python3 $MAPS nearby 51.5074 -0.1278 cafe --limit 5 --radius 300 +``` + +44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket, +atm, gas_station, parking, museum, park, school, university, bank, police, +fire_station, library, airport, train_station, bus_stop, church, mosque, +synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office, +convenience_store, bakery, bookshop, laundry, car_wash, car_rental, +bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub. + +### distance — Travel distance and time + +```bash +python3 $MAPS distance "Paris" --to "Lyon" +python3 $MAPS distance "New York" --to "Boston" --mode driving +python3 $MAPS distance "Big Ben" --to "Tower Bridge" --mode walking +``` + +Modes: driving (default), walking, cycling. Returns road distance, duration, +and straight-line distance for comparison. + +### directions — Turn-by-turn navigation + +```bash +python3 $MAPS directions "Eiffel Tower" --to "Louvre Museum" --mode walking +python3 $MAPS directions "JFK Airport" --to "Times Square" --mode driving +``` + +Returns numbered steps with instruction, distance, duration, road name, and +maneuver type (turn, depart, arrive, etc.). + +### timezone — Timezone for coordinates + +```bash +python3 $MAPS timezone 48.8584 2.2945 +python3 $MAPS timezone 35.6762 139.6503 +``` + +Returns timezone name, UTC offset, and current local time. + +### area — Bounding box and area for a place + +```bash +python3 $MAPS area "Manhattan, New York" +python3 $MAPS area "London" +``` + +Returns bounding box coordinates, width/height in km, and approximate area. +Useful as input for the bbox command. + +### bbox — Search within a bounding box + +```bash +python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20 +``` + +Finds POIs within a geographic rectangle. Use `area` first to get the +bounding box coordinates for a named place. + +## Workflow Examples + +**"Find Italian restaurants near the Colosseum":** +1. `search "Colosseum Rome"` → get lat/lon +2. `nearby LAT LON restaurant --radius 500` + +**"How do I walk from hotel to conference center?":** +1. `directions "Hotel Name" --to "Conference Center" --mode walking` + +**"What restaurants are in downtown Seattle?":** +1. `area "Downtown Seattle"` → get bounding box +2. `bbox S W N E restaurant --limit 30` + +## Pitfalls + +- Nominatim ToS: max 1 req/s (handled automatically by the script) +- `nearby` requires lat/lon — use `search` first to get coordinates +- OSRM routing coverage is best for Europe and North America +- Overpass API can be slow during peak hours (script retries automatically) +- `distance` and `directions` use `--to` flag for the destination (not positional) + +## Verification + +```bash +python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty" +# Should return lat ~40.689, lon ~-74.044 +``` diff --git a/optional-skills/productivity/maps/scripts/maps_client.py b/optional-skills/productivity/maps/scripts/maps_client.py new file mode 100644 index 0000000000..c271570f99 --- /dev/null +++ b/optional-skills/productivity/maps/scripts/maps_client.py @@ -0,0 +1,1143 @@ +#!/usr/bin/env python3 +""" +maps_client.py - CLI tool for maps, geocoding, routing, POI search, and more. +Uses only Python stdlib. Data from OpenStreetMap/Nominatim, Overpass API, OSRM, +and TimeAPI.io. + +Commands: + search - Geocode a place name to coordinates + reverse - Reverse geocode coordinates to an address + nearby - Find nearby POIs by category + distance - Road distance and travel time between two places + directions - Turn-by-turn directions between two places + timezone - Timezone info for coordinates + bbox - Find POIs within a bounding box + area - Get bounding box and area info for a named place +""" + +import argparse +import json +import math +import os +import sys +import time +import urllib.error +import urllib.parse +import urllib.request + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +USER_AGENT = "HermesAgent/1.0 (contact: hermes@agent.ai)" +DATA_SOURCE = "OpenStreetMap/Nominatim" + +NOMINATIM_SEARCH = "https://nominatim.openstreetmap.org/search" +NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse" +OVERPASS_API = "https://overpass-api.de/api/interpreter" +OSRM_BASE = "https://router.project-osrm.org/route/v1" +TIMEAPI_BASE = "https://timeapi.io/api/timezone/coordinate" + +# Seconds to sleep between Nominatim requests (ToS requirement) +NOMINATIM_RATE_LIMIT = 1.0 + +# Maximum retries for HTTP errors +MAX_RETRIES = 3 +RETRY_DELAY = 2.0 # seconds + +# Category -> (OSM tag key, OSM tag value) +CATEGORY_TAGS = { + # Food & Drink + "restaurant": ("amenity", "restaurant"), + "cafe": ("amenity", "cafe"), + "bar": ("amenity", "bar"), + "bakery": ("shop", "bakery"), + "convenience_store": ("shop", "convenience"), + # Health + "hospital": ("amenity", "hospital"), + "pharmacy": ("amenity", "pharmacy"), + "dentist": ("amenity", "dentist"), + "doctor": ("amenity", "doctors"), + "veterinary": ("amenity", "veterinary"), + # Accommodation + "hotel": ("tourism", "hotel"), + # Shopping & Services + "supermarket": ("shop", "supermarket"), + "bookshop": ("shop", "books"), + "laundry": ("shop", "laundry"), + # Finance + "atm": ("amenity", "atm"), + "bank": ("amenity", "bank"), + # Transport + "gas_station": ("amenity", "fuel"), + "parking": ("amenity", "parking"), + "airport": ("aeroway", "aerodrome"), + "train_station": ("railway", "station"), + "bus_stop": ("highway", "bus_stop"), + "taxi": ("amenity", "taxi"), + "car_wash": ("amenity", "car_wash"), + "car_rental": ("amenity", "car_rental"), + "bicycle_rental": ("amenity", "bicycle_rental"), + # Culture & Entertainment + "museum": ("tourism", "museum"), + "cinema": ("amenity", "cinema"), + "theatre": ("amenity", "theatre"), + "nightclub": ("amenity", "nightclub"), + "zoo": ("tourism", "zoo"), + # Education + "school": ("amenity", "school"), + "university": ("amenity", "university"), + "library": ("amenity", "library"), + # Public Services + "police": ("amenity", "police"), + "fire_station": ("amenity", "fire_station"), + "post_office": ("amenity", "post_office"), + # Religion + "church": ("amenity", "place_of_worship"), # refined by religion tag + "mosque": ("amenity", "place_of_worship"), + "synagogue": ("amenity", "place_of_worship"), + # Recreation + "park": ("leisure", "park"), + "gym": ("leisure", "fitness_centre"), + "swimming_pool": ("leisure", "swimming_pool"), + "playground": ("leisure", "playground"), + "stadium": ("leisure", "stadium"), +} + +# Religion-specific overrides for place_of_worship categories +RELIGION_FILTER = { + "church": "christian", + "mosque": "muslim", + "synagogue": "jewish", +} + +VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys()) + +OSRM_PROFILES = { + "driving": "driving", + "walking": "foot", + "cycling": "bike", +} + +# --------------------------------------------------------------------------- +# Output helpers +# --------------------------------------------------------------------------- + +def print_json(data): + """Print data as pretty-printed JSON to stdout.""" + print(json.dumps(data, indent=2, ensure_ascii=False)) + + +def error_exit(message, code=1): + """Print an error result as JSON and exit.""" + print_json({"error": message, "status": "error"}) + sys.exit(code) + + +# --------------------------------------------------------------------------- +# HTTP helpers +# --------------------------------------------------------------------------- + +def http_get(url, params=None, retries=MAX_RETRIES, silent=False): + """ + Perform an HTTP GET request, returning parsed JSON. + Adds the required User-Agent header. Retries on transient errors. + If silent=True, raises RuntimeError instead of calling error_exit. + """ + if params: + url = url + "?" + urllib.parse.urlencode(params) + + req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) + + last_error = None + for attempt in range(1, retries + 1): + try: + with urllib.request.urlopen(req, timeout=15) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) + except urllib.error.HTTPError as exc: + last_error = f"HTTP {exc.code}: {exc.reason} for {url}" + if exc.code in (429, 503, 502, 504): + time.sleep(RETRY_DELAY * attempt) + else: + if silent: + raise RuntimeError(last_error) + error_exit(last_error) + except urllib.error.URLError as exc: + last_error = f"URL error: {exc.reason}" + time.sleep(RETRY_DELAY * attempt) + except json.JSONDecodeError as exc: + last_error = f"JSON parse error: {exc}" + time.sleep(RETRY_DELAY * attempt) + + msg = f"Request failed after {retries} attempts. Last error: {last_error}" + if silent: + raise RuntimeError(msg) + error_exit(msg) + + +def http_get_text(url, params=None, retries=MAX_RETRIES, silent=False): + """ + Like http_get but returns raw text instead of parsed JSON. + Useful for APIs that may return non-JSON responses. + """ + if params: + url = url + "?" + urllib.parse.urlencode(params) + + req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) + + last_error = None + for attempt in range(1, retries + 1): + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return resp.read().decode("utf-8") + except urllib.error.HTTPError as exc: + last_error = f"HTTP {exc.code}: {exc.reason} for {url}" + if exc.code in (429, 503, 502, 504): + time.sleep(RETRY_DELAY * attempt) + else: + if silent: + raise RuntimeError(last_error) + error_exit(last_error) + except urllib.error.URLError as exc: + last_error = f"URL error: {exc.reason}" + time.sleep(RETRY_DELAY * attempt) + + msg = f"Request failed after {retries} attempts. Last error: {last_error}" + if silent: + raise RuntimeError(msg) + error_exit(msg) + + +def http_post(url, data_str, retries=MAX_RETRIES): + """ + Perform an HTTP POST with a plain-text body (for Overpass QL). + Returns parsed JSON. + """ + encoded = data_str.encode("utf-8") + req = urllib.request.Request( + url, + data=encoded, + headers={ + "User-Agent": USER_AGENT, + "Content-Type": "application/x-www-form-urlencoded", + }, + ) + + last_error = None + for attempt in range(1, retries + 1): + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) + except urllib.error.HTTPError as exc: + last_error = f"HTTP {exc.code}: {exc.reason}" + if exc.code in (429, 503, 502, 504): + time.sleep(RETRY_DELAY * attempt) + else: + error_exit(last_error) + except urllib.error.URLError as exc: + last_error = f"URL error: {exc.reason}" + time.sleep(RETRY_DELAY * attempt) + except json.JSONDecodeError as exc: + last_error = f"JSON parse error: {exc}" + time.sleep(RETRY_DELAY * attempt) + + error_exit(f"POST failed after {retries} attempts. Last error: {last_error}") + + +# --------------------------------------------------------------------------- +# Geo math +# --------------------------------------------------------------------------- + +def haversine_m(lat1, lon1, lat2, lon2): + """Return distance in metres between two lat/lon points (Haversine).""" + R = 6_371_000 # Earth mean radius in metres + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = (math.sin(dphi / 2) ** 2 + + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2) + return 2 * R * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +# --------------------------------------------------------------------------- +# Nominatim helpers +# --------------------------------------------------------------------------- + +def nominatim_search(query, limit=5): + """Geocode a free-text query. Returns list of result dicts.""" + params = { + "q": query, + "format": "json", + "limit": limit, + "addressdetails": 1, + } + time.sleep(NOMINATIM_RATE_LIMIT) + return http_get(NOMINATIM_SEARCH, params=params) + + +def nominatim_reverse(lat, lon): + """Reverse geocode lat/lon. Returns a single result dict.""" + params = { + "lat": lat, + "lon": lon, + "format": "json", + "addressdetails": 1, + } + time.sleep(NOMINATIM_RATE_LIMIT) + return http_get(NOMINATIM_REVERSE, params=params) + + +def geocode_single(query): + """ + Geocode a query and return (lat, lon, display_name). + Exits with error if nothing found. + """ + results = nominatim_search(query, limit=1) + if not results: + error_exit(f"Could not geocode: {query}") + r = results[0] + return float(r["lat"]), float(r["lon"]), r.get("display_name", query) + + +# --------------------------------------------------------------------------- +# Overpass helpers +# --------------------------------------------------------------------------- + +def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, + religion=None): + """Build an Overpass QL query for nearby POIs around a point.""" + religion_filter = "" + if religion: + religion_filter = f'["religion"="{religion}"]' + return ( + f'[out:json][timeout:25];\n' + f'(\n' + f' node["{tag_key}"="{tag_val}"]{religion_filter}' + f'(around:{radius},{lat},{lon});\n' + f' way["{tag_key}"="{tag_val}"]{religion_filter}' + f'(around:{radius},{lat},{lon});\n' + f');\n' + f'out center {limit};\n' + ) + + +def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit, + religion=None): + """Build an Overpass QL query for POIs within a bounding box.""" + religion_filter = "" + if religion: + religion_filter = f'["religion"="{religion}"]' + return ( + f'[out:json][timeout:25];\n' + f'(\n' + f' node["{tag_key}"="{tag_val}"]{religion_filter}' + f'({south},{west},{north},{east});\n' + f' way["{tag_key}"="{tag_val}"]{religion_filter}' + f'({south},{west},{north},{east});\n' + f');\n' + f'out center {limit};\n' + ) + + +def parse_overpass_elements(elements, ref_lat=None, ref_lon=None): + """ + Parse Overpass elements into a clean list of POI dicts. + If ref_lat/ref_lon are provided, computes distance and sorts by it. + """ + places = [] + for el in elements: + # Ways have a "center" sub-dict; nodes have lat/lon directly + if el["type"] == "way": + center = el.get("center", {}) + el_lat = center.get("lat") + el_lon = center.get("lon") + else: + el_lat = el.get("lat") + el_lon = el.get("lon") + + if el_lat is None or el_lon is None: + continue + + tags = el.get("tags", {}) + name = tags.get("name") or tags.get("name:en") or "" + + # Build a short address from available tags + addr_parts = [] + for part_key in ("addr:housenumber", "addr:street", "addr:city"): + val = tags.get(part_key) + if val: + addr_parts.append(val) + address_str = ", ".join(addr_parts) if addr_parts else "" + + place = { + "name": name, + "address": address_str, + "lat": el_lat, + "lon": el_lon, + "osm_type": el.get("type", ""), + "osm_id": el.get("id", ""), + "tags": { + k: v for k, v in tags.items() + if k not in ("name", "name:en", + "addr:housenumber", "addr:street", "addr:city") + }, + } + + if ref_lat is not None and ref_lon is not None: + dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon) + place["distance_m"] = round(dist_m, 1) + + places.append(place) + + # Sort by distance if available + if places and "distance_m" in places[0]: + places.sort(key=lambda p: p["distance_m"]) + + return places + + +# --------------------------------------------------------------------------- +# Command: search +# --------------------------------------------------------------------------- + +def cmd_search(args): + """Geocode a place name and return top results.""" + query = " ".join(args.query) + raw = nominatim_search(query, limit=5) + + if not raw: + print_json({ + "query": query, + "results": [], + "count": 0, + "data_source": DATA_SOURCE, + }) + return + + results = [] + for item in raw: + bb = item.get("boundingbox", []) + results.append({ + "name": item.get("name") or item.get("display_name", ""), + "display_name": item.get("display_name", ""), + "lat": float(item["lat"]), + "lon": float(item["lon"]), + "type": item.get("type", ""), + "category": item.get("category", ""), + "osm_type": item.get("osm_type", ""), + "osm_id": item.get("osm_id", ""), + "bounding_box": { + "min_lat": float(bb[0]) if len(bb) > 0 else None, + "max_lat": float(bb[1]) if len(bb) > 1 else None, + "min_lon": float(bb[2]) if len(bb) > 2 else None, + "max_lon": float(bb[3]) if len(bb) > 3 else None, + }, + "importance": item.get("importance"), + }) + + print_json({ + "query": query, + "results": results, + "count": len(results), + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: reverse +# --------------------------------------------------------------------------- + +def cmd_reverse(args): + """Reverse geocode coordinates to a human-readable address.""" + try: + lat = float(args.lat) + lon = float(args.lon) + except ValueError: + error_exit("LAT and LON must be numeric values.") + + if not (-90 <= lat <= 90): + error_exit("Latitude must be between -90 and 90.") + if not (-180 <= lon <= 180): + error_exit("Longitude must be between -180 and 180.") + + data = nominatim_reverse(lat, lon) + + if "error" in data: + error_exit(f"Reverse geocode failed: {data['error']}") + + address = data.get("address", {}) + + print_json({ + "lat": lat, + "lon": lon, + "display_name": data.get("display_name", ""), + "address": { + "house_number": address.get("house_number", ""), + "road": address.get("road", ""), + "neighbourhood": address.get("neighbourhood", ""), + "suburb": address.get("suburb", ""), + "city": (address.get("city") + or address.get("town") + or address.get("village", "")), + "county": address.get("county", ""), + "state": address.get("state", ""), + "postcode": address.get("postcode", ""), + "country": address.get("country", ""), + "country_code": address.get("country_code", ""), + }, + "osm_type": data.get("osm_type", ""), + "osm_id": data.get("osm_id", ""), + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: nearby +# --------------------------------------------------------------------------- + +def cmd_nearby(args): + """Find nearby POIs using the Overpass API.""" + try: + lat = float(args.lat) + lon = float(args.lon) + except ValueError: + error_exit("LAT and LON must be numeric values.") + + category = args.category.lower() + if category not in CATEGORY_TAGS: + error_exit( + f"Unknown category '{category}'. " + f"Valid categories: {', '.join(VALID_CATEGORIES)}" + ) + + radius = int(args.radius) + limit = int(args.limit) + + if radius <= 0: + error_exit("Radius must be a positive integer (metres).") + if limit <= 0: + error_exit("Limit must be a positive integer.") + + tag_key, tag_val = CATEGORY_TAGS[category] + religion = RELIGION_FILTER.get(category) + query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, + religion=religion) + + post_data = "data=" + urllib.parse.quote(query) + raw = http_post(OVERPASS_API, post_data) + + elements = raw.get("elements", []) + places = parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon) + + # Add category to each result + for p in places: + p["category"] = category + + print_json({ + "center_lat": lat, + "center_lon": lon, + "category": category, + "radius_m": radius, + "count": len(places), + "results": places, + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: distance +# --------------------------------------------------------------------------- + +def cmd_distance(args): + """Calculate road distance and travel time between two places.""" + origin_query = " ".join(args.origin) + destination_query = " ".join(args.to) + mode = args.mode.lower() + + if mode not in OSRM_PROFILES: + error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}") + + # Geocode origin and destination + o_lat, o_lon, o_name = geocode_single(origin_query) + d_lat, d_lon, d_name = geocode_single(destination_query) + + profile = OSRM_PROFILES[mode] + url = ( + f"{OSRM_BASE}/{profile}/" + f"{o_lon},{o_lat};{d_lon},{d_lat}" + f"?overview=false&steps=false" + ) + + osrm_data = http_get(url) + + if osrm_data.get("code") != "Ok": + error_exit( + f"OSRM routing failed: " + f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}" + ) + + routes = osrm_data.get("routes", []) + if not routes: + error_exit("No route found between the two locations.") + + route = routes[0] + distance_m = route.get("distance", 0) + duration_s = route.get("duration", 0) + distance_km = round(distance_m / 1000, 3) + duration_min = round(duration_s / 60, 2) + + # Straight-line distance for reference + straight_m = haversine_m(o_lat, o_lon, d_lat, d_lon) + + print_json({ + "origin": { + "query": origin_query, + "display_name": o_name, + "lat": o_lat, + "lon": o_lon, + }, + "destination": { + "query": destination_query, + "display_name": d_name, + "lat": d_lat, + "lon": d_lon, + }, + "mode": mode, + "distance_km": distance_km, + "distance_m": round(distance_m, 1), + "duration_minutes": duration_min, + "duration_seconds": round(duration_s, 1), + "straight_line_km": round(straight_m / 1000, 3), + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: directions +# --------------------------------------------------------------------------- + +def _format_duration(seconds): + """Format seconds into a human-readable string.""" + if seconds < 60: + return f"{round(seconds)}s" + minutes = seconds / 60 + if minutes < 60: + return f"{round(minutes, 1)} min" + hours = int(minutes // 60) + remaining = round(minutes % 60) + return f"{hours}h {remaining}min" + + +def _format_distance(metres): + """Format metres into a human-readable string.""" + if metres < 1000: + return f"{round(metres)} m" + return f"{round(metres / 1000, 2)} km" + + +def cmd_directions(args): + """Get turn-by-turn directions between two places via OSRM.""" + origin_query = " ".join(args.origin) + destination_query = " ".join(args.to) + mode = args.mode.lower() + + if mode not in OSRM_PROFILES: + error_exit(f"Invalid mode '{mode}'. Choose from: {', '.join(OSRM_PROFILES)}") + + # Geocode origin and destination + o_lat, o_lon, o_name = geocode_single(origin_query) + d_lat, d_lon, d_name = geocode_single(destination_query) + + profile = OSRM_PROFILES[mode] + url = ( + f"{OSRM_BASE}/{profile}/" + f"{o_lon},{o_lat};{d_lon},{d_lat}" + f"?overview=false&steps=true" + ) + + osrm_data = http_get(url) + + if osrm_data.get("code") != "Ok": + error_exit( + f"OSRM routing failed: " + f"{osrm_data.get('message', osrm_data.get('code', 'unknown error'))}" + ) + + routes = osrm_data.get("routes", []) + if not routes: + error_exit("No route found between the two locations.") + + route = routes[0] + distance_m = route.get("distance", 0) + duration_s = route.get("duration", 0) + + # Extract steps from all legs + steps = [] + step_num = 0 + for leg in route.get("legs", []): + for step in leg.get("steps", []): + maneuver = step.get("maneuver", {}) + step_dist = step.get("distance", 0) + step_dur = step.get("duration", 0) + step_name = step.get("name", "") + modifier = maneuver.get("modifier", "") + m_type = maneuver.get("type", "") + + # Build instruction text + if m_type == "depart": + instruction = f"Depart on {step_name}" if step_name else "Depart" + elif m_type == "arrive": + instruction = "Arrive at destination" + elif m_type == "turn": + instruction = f"Turn {modifier} onto {step_name}" if step_name else f"Turn {modifier}" + elif m_type == "new name": + instruction = f"Continue onto {step_name}" if step_name else "Continue" + elif m_type == "merge": + instruction = f"Merge {modifier} onto {step_name}" if step_name else f"Merge {modifier}" + elif m_type == "fork": + instruction = f"Take the {modifier} fork onto {step_name}" if step_name else f"Take the {modifier} fork" + elif m_type == "roundabout": + instruction = f"Enter roundabout, exit onto {step_name}" if step_name else "Enter roundabout" + elif m_type == "rotary": + instruction = f"Enter rotary, exit onto {step_name}" if step_name else "Enter rotary" + elif m_type == "end of road": + instruction = f"At end of road, turn {modifier} onto {step_name}" if step_name else f"At end of road, turn {modifier}" + elif m_type == "continue": + instruction = f"Continue {modifier} on {step_name}" if step_name else f"Continue {modifier}" + elif m_type == "on ramp": + instruction = f"Take ramp onto {step_name}" if step_name else "Take ramp" + elif m_type == "off ramp": + instruction = f"Take exit onto {step_name}" if step_name else "Take exit" + else: + instruction = f"{m_type} {modifier} {step_name}".strip() + + step_num += 1 + steps.append({ + "step": step_num, + "instruction": instruction, + "distance": _format_distance(step_dist), + "distance_m": round(step_dist, 1), + "duration": _format_duration(step_dur), + "duration_s": round(step_dur, 1), + "road_name": step_name, + "maneuver": m_type, + }) + + print_json({ + "origin": { + "query": origin_query, + "display_name": o_name, + "lat": o_lat, + "lon": o_lon, + }, + "destination": { + "query": destination_query, + "display_name": d_name, + "lat": d_lat, + "lon": d_lon, + }, + "mode": mode, + "total_distance": _format_distance(distance_m), + "total_distance_m": round(distance_m, 1), + "total_duration": _format_duration(duration_s), + "total_duration_s": round(duration_s, 1), + "steps": steps, + "step_count": len(steps), + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: timezone +# --------------------------------------------------------------------------- + +def cmd_timezone(args): + """ + Get timezone information for a lat/lon coordinate. + + Strategy: + 1. Try TimeAPI.io (free, no key, supports coordinate-based lookup). + 2. Fallback: derive UTC offset approximation from longitude. + """ + try: + lat = float(args.lat) + lon = float(args.lon) + except ValueError: + error_exit("LAT and LON must be numeric values.") + + if not (-90 <= lat <= 90): + error_exit("Latitude must be between -90 and 90.") + if not (-180 <= lon <= 180): + error_exit("Longitude must be between -180 and 180.") + + timezone_str = None + timezone_src = None + current_time = None + utc_offset = None + + # --- Strategy 1: TimeAPI.io coordinate lookup --- + try: + params = {"latitude": lat, "longitude": lon} + tz_data = http_get(TIMEAPI_BASE, params=params, silent=True) + if isinstance(tz_data, dict): + timezone_str = tz_data.get("timeZone") + current_time = tz_data.get("currentLocalTime") + # Build utc_offset from currentUtcOffset if available + offset_info = tz_data.get("currentUtcOffset", {}) + if isinstance(offset_info, dict): + oh = offset_info.get("hours", 0) + om = abs(offset_info.get("minutes", 0)) + os_ = offset_info.get("seconds", 0) + sign = "+" if oh >= 0 else "-" + utc_offset = f"{sign}{abs(oh):02d}:{om:02d}" + elif tz_data.get("standardUtcOffset"): + offset_info2 = tz_data["standardUtcOffset"] + if isinstance(offset_info2, dict): + oh = offset_info2.get("hours", 0) + om = abs(offset_info2.get("minutes", 0)) + sign = "+" if oh >= 0 else "-" + utc_offset = f"{sign}{abs(oh):02d}:{om:02d}" + timezone_src = "timeapi.io" + except (RuntimeError, KeyError, TypeError): + pass # API may be down; continue to fallback + + # --- Strategy 2: longitude-based UTC offset approximation --- + if not timezone_str: + approx_offset_h = round(lon / 15) + if approx_offset_h >= 0: + utc_offset = f"+{approx_offset_h:02d}:00" + else: + utc_offset = f"-{abs(approx_offset_h):02d}:00" + timezone_str = f"UTC{utc_offset}" + timezone_src = "longitude approximation (longitude/15)" + + print_json({ + "lat": lat, + "lon": lon, + "timezone": timezone_str, + "utc_offset": utc_offset, + "current_time": current_time, + "source": timezone_src, + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: bbox +# --------------------------------------------------------------------------- + +def cmd_bbox(args): + """Find POIs within a bounding box using the Overpass API.""" + try: + lat1 = float(args.lat1) + lon1 = float(args.lon1) + lat2 = float(args.lat2) + lon2 = float(args.lon2) + except ValueError: + error_exit("All coordinate arguments must be numeric values.") + + # Normalize: south/west < north/east + south = min(lat1, lat2) + north = max(lat1, lat2) + west = min(lon1, lon2) + east = max(lon1, lon2) + + category = args.category.lower() + if category not in CATEGORY_TAGS: + error_exit( + f"Unknown category '{category}'. " + f"Valid categories: {', '.join(VALID_CATEGORIES)}" + ) + + limit = int(args.limit) + if limit <= 0: + error_exit("Limit must be a positive integer.") + + tag_key, tag_val = CATEGORY_TAGS[category] + religion = RELIGION_FILTER.get(category) + query = build_overpass_bbox(tag_key, tag_val, south, west, north, east, + limit, religion=religion) + + post_data = "data=" + urllib.parse.quote(query) + raw = http_post(OVERPASS_API, post_data) + + elements = raw.get("elements", []) + + # Use center of bbox as reference for distance sorting + center_lat = (south + north) / 2 + center_lon = (west + east) / 2 + places = parse_overpass_elements(elements, ref_lat=center_lat, + ref_lon=center_lon) + + for p in places: + p["category"] = category + + print_json({ + "bounding_box": { + "south": south, + "west": west, + "north": north, + "east": east, + }, + "category": category, + "count": len(places), + "results": places, + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# Command: area +# --------------------------------------------------------------------------- + +def cmd_area(args): + """Get bounding box and area info for a named place.""" + query = " ".join(args.place) + raw = nominatim_search(query, limit=1) + + if not raw: + error_exit(f"Could not find place: {query}") + + item = raw[0] + bb = item.get("boundingbox", []) + + if len(bb) < 4: + error_exit(f"No bounding box data available for: {query}") + + min_lat = float(bb[0]) + max_lat = float(bb[1]) + min_lon = float(bb[2]) + max_lon = float(bb[3]) + + # Approximate area in km² using the bounding box + # Width in km at the average latitude + avg_lat = (min_lat + max_lat) / 2 + height_km = haversine_m(min_lat, min_lon, max_lat, min_lon) / 1000 + width_km = haversine_m(avg_lat, min_lon, avg_lat, max_lon) / 1000 + approx_area_km2 = round(height_km * width_km, 3) + + print_json({ + "query": query, + "display_name": item.get("display_name", ""), + "lat": float(item["lat"]), + "lon": float(item["lon"]), + "type": item.get("type", ""), + "category": item.get("category", ""), + "bounding_box": { + "south": min_lat, + "north": max_lat, + "west": min_lon, + "east": max_lon, + }, + "dimensions": { + "width_km": round(width_km, 3), + "height_km": round(height_km, 3), + }, + "approx_area_km2": approx_area_km2, + "osm_type": item.get("osm_type", ""), + "osm_id": item.get("osm_id", ""), + "data_source": DATA_SOURCE, + }) + + +# --------------------------------------------------------------------------- +# CLI setup +# --------------------------------------------------------------------------- + +def build_parser(): + parser = argparse.ArgumentParser( + prog="maps_client.py", + description=( + "CLI maps tool: geocoding, reverse geocoding, POI search, " + "routing, directions, timezone, and area lookup. " + "Powered by OpenStreetMap, OSRM, Overpass, and TimeAPI.io. " + "No API keys required." + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=( + "Examples:\n" + " maps_client.py search Times Square\n" + " maps_client.py reverse 40.758 -73.985\n" + " maps_client.py nearby 40.758 -73.985 restaurant --radius 800\n" + " maps_client.py distance New York --to Los Angeles --mode driving\n" + " maps_client.py directions Paris --to Berlin --mode driving\n" + " maps_client.py timezone 48.8566 2.3522\n" + " maps_client.py bbox 40.70 -74.02 40.78 -73.95 restaurant\n" + " maps_client.py area Manhattan" + ), + ) + sub = parser.add_subparsers(dest="command", required=True, + metavar="COMMAND") + + # -- search -- + p_search = sub.add_parser( + "search", + help="Geocode a place name to coordinates.", + description="Search for a place by name and return coordinates and details.", + ) + p_search.add_argument( + "query", nargs="+", + help="Place name or address to search.", + ) + + # -- reverse -- + p_reverse = sub.add_parser( + "reverse", + help="Reverse geocode coordinates to an address.", + description="Convert latitude/longitude coordinates to a human-readable address.", + ) + p_reverse.add_argument("lat", help="Latitude (decimal degrees).") + p_reverse.add_argument("lon", help="Longitude (decimal degrees).") + + # -- nearby -- + p_nearby = sub.add_parser( + "nearby", + help="Find nearby places of a given category.", + description=( + "Find points of interest near a location using the Overpass API.\n" + f"Categories: {', '.join(VALID_CATEGORIES)}" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p_nearby.add_argument("lat", help="Center latitude (decimal degrees).") + p_nearby.add_argument("lon", help="Center longitude (decimal degrees).") + p_nearby.add_argument( + "category", + help="POI category (use --help to see full list).", + ) + p_nearby.add_argument( + "--radius", "-r", + default=500, type=int, metavar="METRES", + help="Search radius in metres (default: 500).", + ) + p_nearby.add_argument( + "--limit", "-n", + default=10, type=int, metavar="N", + help="Maximum number of results (default: 10).", + ) + + # -- distance -- + p_dist = sub.add_parser( + "distance", + help="Calculate road distance and travel time.", + description=( + "Calculate road distance and estimated travel time between two places.\n" + "Example: maps_client.py distance New York --to Los Angeles" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p_dist.add_argument( + "origin", nargs="+", + help="Origin address or place name.", + ) + p_dist.add_argument( + "--to", nargs="+", required=True, metavar="DEST", + help="Destination address or place name (required).", + ) + p_dist.add_argument( + "--mode", "-m", + default="driving", + choices=list(OSRM_PROFILES.keys()), + help="Travel mode (default: driving).", + ) + + # -- directions -- + p_dir = sub.add_parser( + "directions", + help="Get turn-by-turn directions between two places.", + description=( + "Get step-by-step navigation directions between two places.\n" + "Example: maps_client.py directions Paris --to Berlin --mode driving" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p_dir.add_argument( + "origin", nargs="+", + help="Origin address or place name.", + ) + p_dir.add_argument( + "--to", nargs="+", required=True, metavar="DEST", + help="Destination address or place name (required).", + ) + p_dir.add_argument( + "--mode", "-m", + default="driving", + choices=list(OSRM_PROFILES.keys()), + help="Travel mode (default: driving).", + ) + + # -- timezone -- + p_tz = sub.add_parser( + "timezone", + help="Get timezone information for coordinates.", + description="Look up timezone and current local time for a lat/lon coordinate.", + ) + p_tz.add_argument("lat", help="Latitude (decimal degrees).") + p_tz.add_argument("lon", help="Longitude (decimal degrees).") + + # -- bbox -- + p_bbox = sub.add_parser( + "bbox", + help="Find POIs within a bounding box.", + description=( + "Search for points of interest within a geographic bounding box.\n" + "Tip: use the 'area' command to find bounding boxes for named places.\n" + f"Categories: {', '.join(VALID_CATEGORIES)}" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + p_bbox.add_argument("lat1", help="First corner latitude.") + p_bbox.add_argument("lon1", help="First corner longitude.") + p_bbox.add_argument("lat2", help="Second corner latitude.") + p_bbox.add_argument("lon2", help="Second corner longitude.") + p_bbox.add_argument("category", help="POI category to search for.") + p_bbox.add_argument( + "--limit", "-n", + default=20, type=int, metavar="N", + help="Maximum number of results (default: 20).", + ) + + # -- area -- + p_area = sub.add_parser( + "area", + help="Get bounding box and area info for a named place.", + description=( + "Look up a place by name and return its bounding box, dimensions, " + "and approximate area. Useful as input to the 'bbox' command." + ), + ) + p_area.add_argument( + "place", nargs="+", + help="Place name to look up (e.g., 'Manhattan' or 'downtown Seattle').", + ) + + return parser + + +def main(): + parser = build_parser() + args = parser.parse_args() + + dispatch = { + "search": cmd_search, + "reverse": cmd_reverse, + "nearby": cmd_nearby, + "distance": cmd_distance, + "directions": cmd_directions, + "timezone": cmd_timezone, + "bbox": cmd_bbox, + "area": cmd_area, + } + + handler = dispatch.get(args.command) + if handler is None: + error_exit(f"Unknown command: {args.command}") + + handler(args) + + +if __name__ == "__main__": + main() diff --git a/optional-skills/productivity/maps/tests/test_maps_client.py b/optional-skills/productivity/maps/tests/test_maps_client.py new file mode 100644 index 0000000000..0400d51b7d --- /dev/null +++ b/optional-skills/productivity/maps/tests/test_maps_client.py @@ -0,0 +1,177 @@ +"""Unit tests for maps_client.py pure functions.""" + +import json +import math +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest + +# Add the scripts directory to the path so we can import maps_client +SCRIPTS_DIR = str(Path(__file__).resolve().parent.parent / "scripts") +sys.path.insert(0, SCRIPTS_DIR) + +import maps_client as mc + + +# ── Haversine ──────────────────────────────────────────────────────────── + + +class TestHaversine: + def test_same_point_is_zero(self): + assert mc.haversine_m(48.8584, 2.2945, 48.8584, 2.2945) == 0.0 + + def test_known_distance_paris_lyon(self): + # Paris to Lyon is ~393 km straight line + dist = mc.haversine_m(48.8566, 2.3522, 45.7640, 4.8357) + assert 390_000 < dist < 400_000 + + def test_antipodal_points(self): + # North pole to south pole ~20,000 km + dist = mc.haversine_m(90, 0, -90, 0) + assert 20_000_000 < dist < 20_100_000 + + def test_equator_quarter(self): + # 0,0 to 0,90 is ~10,000 km + dist = mc.haversine_m(0, 0, 0, 90) + assert 10_000_000 < dist < 10_100_000 + + def test_symmetry(self): + d1 = mc.haversine_m(40.7128, -74.0060, 51.5074, -0.1278) + d2 = mc.haversine_m(51.5074, -0.1278, 40.7128, -74.0060) + assert d1 == pytest.approx(d2) + + +# ── Overpass query builder ─────────────────────────────────────────────── + + +class TestBuildOverpassQuery: + def test_basic_query_structure(self): + q = mc.build_overpass_nearby("amenity", "restaurant", 48.85, 2.29, 500, 10) + assert "[out:json]" in q + assert '"amenity"="restaurant"' in q + assert "around:500,48.85,2.29" in q + assert "out center 10" in q + + def test_contains_node_and_way(self): + q = mc.build_overpass_nearby("tourism", "hotel", 40.0, -74.0, 1000, 5) + assert "node[" in q + assert "way[" in q + + def test_bbox_query_structure(self): + q = mc.build_overpass_bbox("amenity", "cafe", 40.75, -74.00, 40.77, -73.98, 20) + assert "[out:json]" in q + assert '"amenity"="cafe"' in q + assert "40.75,-74.0,40.77,-73.98" in q + + +# ── Category validation ────────────────────────────────────────────────── + + +class TestCategories: + def test_original_12_categories_exist(self): + original = [ + "restaurant", "cafe", "bar", "hospital", "pharmacy", "hotel", + "supermarket", "atm", "gas_station", "parking", "museum", "park", + ] + for cat in original: + assert cat in mc.CATEGORY_TAGS, f"Missing original category: {cat}" + + def test_new_categories_exist(self): + new_cats = [ + "school", "university", "bank", "police", "fire_station", + "library", "airport", "train_station", "bus_stop", "dentist", + "doctor", "cinema", "theatre", "gym", "post_office", + "convenience_store", "bakery", "nightclub", "zoo", "playground", + ] + for cat in new_cats: + assert cat in mc.CATEGORY_TAGS, f"Missing new category: {cat}" + + def test_all_categories_have_valid_tags(self): + for cat, tag in mc.CATEGORY_TAGS.items(): + assert isinstance(tag, tuple), f"{cat}: tag should be tuple" + assert len(tag) == 2, f"{cat}: tag should be (key, value)" + assert isinstance(tag[0], str) and isinstance(tag[1], str) + + def test_at_least_40_categories(self): + assert len(mc.CATEGORY_TAGS) >= 40 + + +# ── OSRM profiles ──────────────────────────────────────────────────────── + + +class TestOSRMProfiles: + def test_driving_walking_cycling(self): + assert "driving" in mc.OSRM_PROFILES + assert "walking" in mc.OSRM_PROFILES + assert "cycling" in mc.OSRM_PROFILES + + def test_profile_mappings(self): + assert mc.OSRM_PROFILES["driving"] == "driving" + assert mc.OSRM_PROFILES["walking"] == "foot" + assert mc.OSRM_PROFILES["cycling"] == "bike" + + +# ── Argparse ───────────────────────────────────────────────────────────── + + +class TestArgparse: + def test_distance_uses_to_flag(self): + """The distance command should use --to, not two positional nargs='+'.""" + parser = mc.build_parser() + args = parser.parse_args(["distance", "Paris", "--to", "Lyon"]) + assert args.command == "distance" + assert args.origin == ["Paris"] + assert args.to == ["Lyon"] + + def test_distance_multiword_origin(self): + parser = mc.build_parser() + args = parser.parse_args(["distance", "New", "York", "--to", "Boston"]) + assert args.origin == ["New", "York"] + assert args.to == ["Boston"] + + def test_directions_uses_to_flag(self): + parser = mc.build_parser() + args = parser.parse_args(["directions", "Big Ben", "--to", "Tower Bridge"]) + assert args.command == "directions" + + def test_search_accepts_query(self): + parser = mc.build_parser() + args = parser.parse_args(["search", "Eiffel", "Tower"]) + assert args.command == "search" + assert args.query == ["Eiffel", "Tower"] + + def test_nearby_accepts_category(self): + parser = mc.build_parser() + args = parser.parse_args(["nearby", "48.85", "2.29", "restaurant"]) + assert args.command == "nearby" + assert args.category == "restaurant" + + def test_bbox_accepts_coordinates(self): + parser = mc.build_parser() + args = parser.parse_args(["bbox", "40.75", "-74.00", "40.77", "-73.98", "cafe"]) + assert args.command == "bbox" + assert args.category == "cafe" + + def test_area_accepts_query(self): + parser = mc.build_parser() + args = parser.parse_args(["area", "Manhattan"]) + assert args.command == "area" + + +# ── Output helpers ─────────────────────────────────────────────────────── + + +class TestOutputHelpers: + def test_print_json_outputs_valid_json(self, capsys): + mc.print_json({"key": "value", "num": 42}) + captured = capsys.readouterr() + data = json.loads(captured.out) + assert data["key"] == "value" + assert data["num"] == 42 + + def test_error_exit_outputs_error_json(self): + with pytest.raises(SystemExit) as exc_info: + mc.error_exit("something went wrong") + assert exc_info.value.code == 1 From de491fdf0e4a35a91b447f8f077af4961a59b7b3 Mon Sep 17 00:00:00 2001 From: Teknium Date: Mon, 30 Mar 2026 00:10:04 -0700 Subject: [PATCH 038/455] chore: remove unit tests from maps skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skills are self-contained scripts — they don't need test suites in the repo. --- .../maps/tests/test_maps_client.py | 177 ------------------ 1 file changed, 177 deletions(-) delete mode 100644 optional-skills/productivity/maps/tests/test_maps_client.py diff --git a/optional-skills/productivity/maps/tests/test_maps_client.py b/optional-skills/productivity/maps/tests/test_maps_client.py deleted file mode 100644 index 0400d51b7d..0000000000 --- a/optional-skills/productivity/maps/tests/test_maps_client.py +++ /dev/null @@ -1,177 +0,0 @@ -"""Unit tests for maps_client.py pure functions.""" - -import json -import math -import sys -from pathlib import Path -from unittest.mock import patch, MagicMock - -import pytest - -# Add the scripts directory to the path so we can import maps_client -SCRIPTS_DIR = str(Path(__file__).resolve().parent.parent / "scripts") -sys.path.insert(0, SCRIPTS_DIR) - -import maps_client as mc - - -# ── Haversine ──────────────────────────────────────────────────────────── - - -class TestHaversine: - def test_same_point_is_zero(self): - assert mc.haversine_m(48.8584, 2.2945, 48.8584, 2.2945) == 0.0 - - def test_known_distance_paris_lyon(self): - # Paris to Lyon is ~393 km straight line - dist = mc.haversine_m(48.8566, 2.3522, 45.7640, 4.8357) - assert 390_000 < dist < 400_000 - - def test_antipodal_points(self): - # North pole to south pole ~20,000 km - dist = mc.haversine_m(90, 0, -90, 0) - assert 20_000_000 < dist < 20_100_000 - - def test_equator_quarter(self): - # 0,0 to 0,90 is ~10,000 km - dist = mc.haversine_m(0, 0, 0, 90) - assert 10_000_000 < dist < 10_100_000 - - def test_symmetry(self): - d1 = mc.haversine_m(40.7128, -74.0060, 51.5074, -0.1278) - d2 = mc.haversine_m(51.5074, -0.1278, 40.7128, -74.0060) - assert d1 == pytest.approx(d2) - - -# ── Overpass query builder ─────────────────────────────────────────────── - - -class TestBuildOverpassQuery: - def test_basic_query_structure(self): - q = mc.build_overpass_nearby("amenity", "restaurant", 48.85, 2.29, 500, 10) - assert "[out:json]" in q - assert '"amenity"="restaurant"' in q - assert "around:500,48.85,2.29" in q - assert "out center 10" in q - - def test_contains_node_and_way(self): - q = mc.build_overpass_nearby("tourism", "hotel", 40.0, -74.0, 1000, 5) - assert "node[" in q - assert "way[" in q - - def test_bbox_query_structure(self): - q = mc.build_overpass_bbox("amenity", "cafe", 40.75, -74.00, 40.77, -73.98, 20) - assert "[out:json]" in q - assert '"amenity"="cafe"' in q - assert "40.75,-74.0,40.77,-73.98" in q - - -# ── Category validation ────────────────────────────────────────────────── - - -class TestCategories: - def test_original_12_categories_exist(self): - original = [ - "restaurant", "cafe", "bar", "hospital", "pharmacy", "hotel", - "supermarket", "atm", "gas_station", "parking", "museum", "park", - ] - for cat in original: - assert cat in mc.CATEGORY_TAGS, f"Missing original category: {cat}" - - def test_new_categories_exist(self): - new_cats = [ - "school", "university", "bank", "police", "fire_station", - "library", "airport", "train_station", "bus_stop", "dentist", - "doctor", "cinema", "theatre", "gym", "post_office", - "convenience_store", "bakery", "nightclub", "zoo", "playground", - ] - for cat in new_cats: - assert cat in mc.CATEGORY_TAGS, f"Missing new category: {cat}" - - def test_all_categories_have_valid_tags(self): - for cat, tag in mc.CATEGORY_TAGS.items(): - assert isinstance(tag, tuple), f"{cat}: tag should be tuple" - assert len(tag) == 2, f"{cat}: tag should be (key, value)" - assert isinstance(tag[0], str) and isinstance(tag[1], str) - - def test_at_least_40_categories(self): - assert len(mc.CATEGORY_TAGS) >= 40 - - -# ── OSRM profiles ──────────────────────────────────────────────────────── - - -class TestOSRMProfiles: - def test_driving_walking_cycling(self): - assert "driving" in mc.OSRM_PROFILES - assert "walking" in mc.OSRM_PROFILES - assert "cycling" in mc.OSRM_PROFILES - - def test_profile_mappings(self): - assert mc.OSRM_PROFILES["driving"] == "driving" - assert mc.OSRM_PROFILES["walking"] == "foot" - assert mc.OSRM_PROFILES["cycling"] == "bike" - - -# ── Argparse ───────────────────────────────────────────────────────────── - - -class TestArgparse: - def test_distance_uses_to_flag(self): - """The distance command should use --to, not two positional nargs='+'.""" - parser = mc.build_parser() - args = parser.parse_args(["distance", "Paris", "--to", "Lyon"]) - assert args.command == "distance" - assert args.origin == ["Paris"] - assert args.to == ["Lyon"] - - def test_distance_multiword_origin(self): - parser = mc.build_parser() - args = parser.parse_args(["distance", "New", "York", "--to", "Boston"]) - assert args.origin == ["New", "York"] - assert args.to == ["Boston"] - - def test_directions_uses_to_flag(self): - parser = mc.build_parser() - args = parser.parse_args(["directions", "Big Ben", "--to", "Tower Bridge"]) - assert args.command == "directions" - - def test_search_accepts_query(self): - parser = mc.build_parser() - args = parser.parse_args(["search", "Eiffel", "Tower"]) - assert args.command == "search" - assert args.query == ["Eiffel", "Tower"] - - def test_nearby_accepts_category(self): - parser = mc.build_parser() - args = parser.parse_args(["nearby", "48.85", "2.29", "restaurant"]) - assert args.command == "nearby" - assert args.category == "restaurant" - - def test_bbox_accepts_coordinates(self): - parser = mc.build_parser() - args = parser.parse_args(["bbox", "40.75", "-74.00", "40.77", "-73.98", "cafe"]) - assert args.command == "bbox" - assert args.category == "cafe" - - def test_area_accepts_query(self): - parser = mc.build_parser() - args = parser.parse_args(["area", "Manhattan"]) - assert args.command == "area" - - -# ── Output helpers ─────────────────────────────────────────────────────── - - -class TestOutputHelpers: - def test_print_json_outputs_valid_json(self, capsys): - mc.print_json({"key": "value", "num": 42}) - captured = capsys.readouterr() - data = json.loads(captured.out) - assert data["key"] == "value" - assert data["num"] == 42 - - def test_error_exit_outputs_error_json(self): - with pytest.raises(SystemExit) as exc_info: - mc.error_exit("something went wrong") - assert exc_info.value.code == 1 From ea0bd81b84e460368c35432472ef6e8cbdf6c541 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 19 Apr 2026 05:17:39 -0700 Subject: [PATCH 039/455] feat(skills): consolidate find-nearby into maps as a single location skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit find-nearby and the (new) maps optional skill both used OpenStreetMap's Overpass + Nominatim to answer the same question — 'what's near this location?' — so shipping both would be duplicate code for overlapping capability. Consolidate into one active-by-default skill at skills/productivity/maps/ that is a strict superset of find-nearby. Moves + deletions: - optional-skills/productivity/maps/ → skills/productivity/maps/ (active, no install step needed) - skills/leisure/find-nearby/ → DELETED (fully superseded) Upgrades to maps_client.py so it covers everything find-nearby did: - Overpass server failover — tries overpass-api.de then overpass.kumi.systems so a single-mirror outage doesn't break the skill (new overpass_query helper, used by both nearby and bbox) - nearby now accepts --near "
" as a shortcut that auto-geocodes, so one command replaces the old 'search → copy coords → nearby' chain - nearby now accepts --category (repeatable) for multi-type queries in one call (e.g. --category restaurant --category bar), results merged and deduped by (osm_type, osm_id), sorted by distance, capped at --limit - Each nearby result now includes maps_url (clickable Google Maps search link) and directions_url (Google Maps directions from the search point — only when a ref point is known) - Promoted commonly-useful OSM tags to top-level fields on each result: cuisine, hours (opening_hours), phone, website — instead of forcing callers to dig into the raw tags dict SKILL.md: - Version bumped 1.1.0 → 1.2.0, description rewritten to lead with capability surface - New 'Working With Telegram Location Pins' section replacing find-nearby's equivalent workflow - metadata.hermes.supersedes: [find-nearby] so tooling can flag any lingering references to the old skill External references updated: - optional-skills/productivity/telephony/SKILL.md — related_skills find-nearby → maps - website/docs/reference/skills-catalog.md — removed the (now-empty) 'leisure' section, added 'maps' row under productivity - website/docs/user-guide/features/cron.md — find-nearby example usages swapped to maps - tests/tools/test_cronjob_tools.py, tests/hermes_cli/test_cron.py, tests/cron/test_scheduler.py — fixture string values swapped - cli.py:5290 — /cron help-hint example swapped Not touched: - RELEASE_v0.2.0.md — historical record, left intact E2E-verified live (Nominatim + Overpass, one query each): - nearby --near "Times Square" --category restaurant --category bar → 3 results, sorted by distance, all with maps_url, directions_url, cuisine, phone, website where OSM had the tags All 111 targeted tests pass across tests/cron/, tests/tools/, tests/hermes_cli/. --- cli.py | 2 +- .../productivity/telephony/SKILL.md | 2 +- skills/leisure/find-nearby/SKILL.md | 69 ------- .../find-nearby/scripts/find_nearby.py | 184 ------------------ .../productivity/maps/SKILL.md | 81 ++++++-- .../productivity/maps/scripts/maps_client.py | 168 +++++++++++++--- tests/cron/test_scheduler.py | 8 +- tests/hermes_cli/test_cron.py | 8 +- tests/tools/test_cronjob_tools.py | 10 +- website/docs/reference/skills-catalog.md | 9 +- website/docs/user-guide/features/cron.md | 12 +- 11 files changed, 222 insertions(+), 331 deletions(-) delete mode 100644 skills/leisure/find-nearby/SKILL.md delete mode 100644 skills/leisure/find-nearby/scripts/find_nearby.py rename {optional-skills => skills}/productivity/maps/SKILL.md (53%) rename {optional-skills => skills}/productivity/maps/scripts/maps_client.py (86%) diff --git a/cli.py b/cli.py index e814e35b12..0e5e9ff660 100644 --- a/cli.py +++ b/cli.py @@ -5287,7 +5287,7 @@ class HermesCLI: print(" /cron list") print(' /cron add "every 2h" "Check server status" [--skill blogwatcher]') print(' /cron edit --schedule "every 4h" --prompt "New task"') - print(" /cron edit --skill blogwatcher --skill find-nearby") + print(" /cron edit --skill blogwatcher --skill maps") print(" /cron edit --remove-skill blogwatcher") print(" /cron edit --clear-skills") print(" /cron pause ") diff --git a/optional-skills/productivity/telephony/SKILL.md b/optional-skills/productivity/telephony/SKILL.md index c74a369209..6c457592a9 100644 --- a/optional-skills/productivity/telephony/SKILL.md +++ b/optional-skills/productivity/telephony/SKILL.md @@ -7,7 +7,7 @@ license: MIT metadata: hermes: tags: [telephony, phone, sms, mms, voice, twilio, bland.ai, vapi, calling, texting] - related_skills: [find-nearby, google-workspace, agentmail] + related_skills: [maps, google-workspace, agentmail] category: productivity --- diff --git a/skills/leisure/find-nearby/SKILL.md b/skills/leisure/find-nearby/SKILL.md deleted file mode 100644 index f0ecdbf531..0000000000 --- a/skills/leisure/find-nearby/SKILL.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -name: find-nearby -description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. -version: 1.0.0 -metadata: - hermes: - tags: [location, maps, nearby, places, restaurants, local] - related_skills: [] ---- - -# Find Nearby — Local Place Discovery - -Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with: - -- **Coordinates** from Telegram location pins (latitude/longitude in conversation) -- **Addresses** ("near 123 Main St, Springfield") -- **Cities** ("restaurants in downtown Austin") -- **Zip codes** ("pharmacies near 90210") -- **Landmarks** ("cafes near Times Square") - -## Quick Reference - -```bash -# By coordinates (from Telegram location pin or user-provided) -python3 SKILL_DIR/scripts/find_nearby.py --lat --lon --type restaurant --radius 1500 - -# By address, city, or landmark (auto-geocoded) -python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe - -# Multiple place types -python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10 - -# JSON output -python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json -``` - -### Parameters - -| Flag | Description | Default | -|------|-------------|---------| -| `--lat`, `--lon` | Exact coordinates | — | -| `--near` | Address, city, zip, or landmark (geocoded) | — | -| `--type` | Place type (repeatable for multiple) | restaurant | -| `--radius` | Search radius in meters | 1500 | -| `--limit` | Max results | 15 | -| `--json` | Machine-readable JSON output | off | - -### Common Place Types - -`restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel` - -## Workflow - -1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip. - -2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.). - -3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically. - -4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`. - -5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=,&destination=,` - -## Tips - -- If results are sparse, widen the radius (1500 → 3000m) -- For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete -- Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong -- The script uses OpenStreetMap data which is community-maintained; coverage varies by region diff --git a/skills/leisure/find-nearby/scripts/find_nearby.py b/skills/leisure/find-nearby/scripts/find_nearby.py deleted file mode 100644 index 9d7fed78f4..0000000000 --- a/skills/leisure/find-nearby/scripts/find_nearby.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -"""Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed. - -Usage: - # By coordinates - python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500 - - # By address/city/zip (auto-geocoded) - python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000 - python find_nearby.py --near "90210" --type pharmacy - - # Multiple types - python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar - - # JSON output for programmatic use - python find_nearby.py --near "downtown las vegas" --type restaurant --json -""" - -import argparse -import json -import math -import sys -import urllib.parse -import urllib.request -from typing import Any - -OVERPASS_URLS = [ - "https://overpass-api.de/api/interpreter", - "https://overpass.kumi.systems/api/interpreter", -] -NOMINATIM_URL = "https://nominatim.openstreetmap.org/search" -USER_AGENT = "HermesAgent/1.0 (find-nearby skill)" -TIMEOUT = 15 - - -def _http_get(url: str) -> Any: - req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) - with urllib.request.urlopen(req, timeout=TIMEOUT) as r: - return json.loads(r.read()) - - -def _http_post(url: str, data: str) -> Any: - req = urllib.request.Request( - url, data=data.encode(), headers={"User-Agent": USER_AGENT} - ) - with urllib.request.urlopen(req, timeout=TIMEOUT) as r: - return json.loads(r.read()) - - -def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float: - """Distance in meters between two coordinates.""" - R = 6_371_000 - rlat1, rlat2 = math.radians(lat1), math.radians(lat2) - dlat = math.radians(lat2 - lat1) - dlon = math.radians(lon2 - lon1) - a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2 - return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) - - -def geocode(query: str) -> tuple[float, float]: - """Convert address/city/zip to coordinates via Nominatim.""" - params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1}) - results = _http_get(f"{NOMINATIM_URL}?{params}") - if not results: - print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr) - sys.exit(1) - return float(results[0]["lat"]), float(results[0]["lon"]) - - -def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]: - """Query Overpass for nearby amenities.""" - # Build Overpass QL query - type_filters = "".join( - f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types - ) - query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;" - - # Try each Overpass server - data = None - for url in OVERPASS_URLS: - try: - data = _http_post(url, f"data={urllib.parse.quote(query)}") - break - except Exception: - continue - - if not data: - return [] - - # Parse results - places = [] - for el in data.get("elements", []): - tags = el.get("tags", {}) - name = tags.get("name") - if not name: - continue - - # Get coordinates (nodes have lat/lon directly, ways/relations use center) - plat = el.get("lat") or (el.get("center", {}) or {}).get("lat") - plon = el.get("lon") or (el.get("center", {}) or {}).get("lon") - if plat is None or plon is None: - continue - - dist = haversine(lat, lon, plat, plon) - - place = { - "name": name, - "type": tags.get("amenity", ""), - "distance_m": round(dist), - "lat": plat, - "lon": plon, - "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}", - "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}", - } - - # Add useful optional fields - if tags.get("cuisine"): - place["cuisine"] = tags["cuisine"] - if tags.get("opening_hours"): - place["hours"] = tags["opening_hours"] - if tags.get("phone"): - place["phone"] = tags["phone"] - if tags.get("website"): - place["website"] = tags["website"] - if tags.get("addr:street"): - addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")] - if tags.get("addr:city"): - addr_parts.append(tags["addr:city"]) - place["address"] = " ".join(p for p in addr_parts if p) - - places.append(place) - - # Sort by distance, limit results - places.sort(key=lambda p: p["distance_m"]) - return places[:limit] - - -def main(): - parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap") - parser.add_argument("--lat", type=float, help="Latitude") - parser.add_argument("--lon", type=float, help="Longitude") - parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)") - parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)") - parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)") - parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)") - parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON") - args = parser.parse_args() - - # Resolve coordinates - if args.near: - lat, lon = geocode(args.near) - elif args.lat is not None and args.lon is not None: - lat, lon = args.lat, args.lon - else: - print("Error: Provide --lat/--lon or --near", file=sys.stderr) - sys.exit(1) - - if not args.types: - args.types = ["restaurant"] - - places = find_nearby(lat, lon, args.types, args.radius, args.limit) - - if args.json_output: - print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2)) - else: - if not places: - print(f"No {'/'.join(args.types)} found within {args.radius}m") - return - print(f"Found {len(places)} places within {args.radius}m:\n") - for i, p in enumerate(places, 1): - dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km" - print(f" {i}. {p['name']} ({p['type']}) — {dist_str}") - if p.get("cuisine"): - print(f" Cuisine: {p['cuisine']}") - if p.get("hours"): - print(f" Hours: {p['hours']}") - if p.get("address"): - print(f" Address: {p['address']}") - print(f" Map: {p['maps_url']}") - print() - - -if __name__ == "__main__": - main() diff --git a/optional-skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md similarity index 53% rename from optional-skills/productivity/maps/SKILL.md rename to skills/productivity/maps/SKILL.md index 59e0359d56..9eded20866 100644 --- a/optional-skills/productivity/maps/SKILL.md +++ b/skills/productivity/maps/SKILL.md @@ -1,17 +1,20 @@ --- name: maps description: > - Geocoding, reverse geocoding, nearby POI search (44 categories), - distance/routing, turn-by-turn directions, timezone lookup, bounding box - search, and area info. Uses OpenStreetMap + Overpass + OSRM. Free, no API key. -version: 1.1.0 + Location intelligence — geocode a place, reverse-geocode coordinates, + find nearby places (44 POI categories), driving/walking/cycling + distance + time, turn-by-turn directions, timezone lookup, bounding + box + area for a named place, and POI search within a rectangle. + Uses OpenStreetMap + Overpass + OSRM. Free, no API key. +version: 1.2.0 author: Mibayy license: MIT metadata: hermes: - tags: [maps, geocoding, places, routing, distance, directions, openstreetmap, nominatim, overpass, osrm] + tags: [maps, geocoding, places, routing, distance, directions, nearby, location, openstreetmap, nominatim, overpass, osrm] category: productivity requires_toolsets: [terminal] + supersedes: [find-nearby] --- # Maps Skill @@ -21,21 +24,26 @@ categories, zero dependencies (Python stdlib only), no API key required. Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io. +This skill supersedes the old `find-nearby` skill — all of find-nearby's +functionality is covered by the `nearby` command below, with the same +`--near ""` shortcut and multi-category support. + ## When to Use -- User wants coordinates for a place name -- User has coordinates and wants the address -- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. -- User wants driving/walking/cycling distance or travel time -- User wants turn-by-turn directions between two places -- User wants timezone information for a location -- User wants to search for POIs within a geographic area +- User sends a Telegram location pin (latitude/longitude in the message) → `nearby` +- User wants coordinates for a place name → `search` +- User has coordinates and wants the address → `reverse` +- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. → `nearby` +- User wants driving/walking/cycling distance or travel time → `distance` +- User wants turn-by-turn directions between two places → `directions` +- User wants timezone information for a location → `timezone` +- User wants to search for POIs within a geographic area → `area` + `bbox` ## Prerequisites Python 3.8+ (stdlib only — no pip installs needed). -Script path after install: `~/.hermes/skills/maps/scripts/maps_client.py` +Script path: `~/.hermes/skills/maps/scripts/maps_client.py` ## Commands @@ -63,9 +71,16 @@ Returns: full address breakdown (street, city, state, country, postcode). ### nearby — Find places by category ```bash +# By coordinates (from a Telegram location pin, for example) python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10 python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000 -python3 $MAPS nearby 51.5074 -0.1278 cafe --limit 5 --radius 300 + +# By address / city / zip / landmark — --near auto-geocodes +python3 $MAPS nearby --near "Times Square, New York" --category cafe +python3 $MAPS nearby --near "90210" --category pharmacy + +# Multiple categories merged into one query +python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10 ``` 44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket, @@ -75,6 +90,11 @@ synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office, convenience_store, bakery, bookshop, laundry, car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub. +Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`, +`maps_url` (clickable Google Maps link), `directions_url` (Google Maps +directions from the search point), and promoted tags when available — +`cuisine`, `hours` (opening_hours), `phone`, `website`. + ### distance — Travel distance and time ```bash @@ -124,11 +144,31 @@ python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20 Finds POIs within a geographic rectangle. Use `area` first to get the bounding box coordinates for a named place. +## Working With Telegram Location Pins + +When a user sends a location pin, the message contains `latitude:` and +`longitude:` fields. Extract those and pass them straight to `nearby`: + +```bash +# User sent a pin at 36.17, -115.14 and asked "find cafes nearby" +python3 $MAPS nearby 36.17 -115.14 cafe --radius 1500 +``` + +Present results as a numbered list with names, distances, and the +`maps_url` field so the user gets a tap-to-open link in chat. For "open +now?" questions, check the `hours` field; if missing or unclear, verify +with `web_search` since OSM hours are community-maintained and not always +current. + ## Workflow Examples **"Find Italian restaurants near the Colosseum":** -1. `search "Colosseum Rome"` → get lat/lon -2. `nearby LAT LON restaurant --radius 500` +1. `nearby --near "Colosseum Rome" --category restaurant --radius 500` + — one command, auto-geocoded + +**"What's near this location pin they sent?":** +1. Extract lat/lon from the Telegram message +2. `nearby LAT LON cafe --radius 1500` **"How do I walk from hotel to conference center?":** 1. `directions "Hotel Name" --to "Conference Center" --mode walking` @@ -140,14 +180,19 @@ bounding box coordinates for a named place. ## Pitfalls - Nominatim ToS: max 1 req/s (handled automatically by the script) -- `nearby` requires lat/lon — use `search` first to get coordinates +- `nearby` requires lat/lon OR `--near "
"` — one of the two is needed - OSRM routing coverage is best for Europe and North America -- Overpass API can be slow during peak hours (script retries automatically) +- Overpass API can be slow during peak hours; the script automatically + falls back between mirrors (overpass-api.de → overpass.kumi.systems) - `distance` and `directions` use `--to` flag for the destination (not positional) +- If a zip code alone gives ambiguous results globally, include country/state ## Verification ```bash python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty" # Should return lat ~40.689, lon ~-74.044 + +python3 ~/.hermes/skills/maps/scripts/maps_client.py nearby --near "Times Square" --category restaurant --limit 3 +# Should return a list of restaurants within ~500m of Times Square ``` diff --git a/optional-skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py similarity index 86% rename from optional-skills/productivity/maps/scripts/maps_client.py rename to skills/productivity/maps/scripts/maps_client.py index c271570f99..db0de82d6d 100644 --- a/optional-skills/productivity/maps/scripts/maps_client.py +++ b/skills/productivity/maps/scripts/maps_client.py @@ -34,7 +34,14 @@ DATA_SOURCE = "OpenStreetMap/Nominatim" NOMINATIM_SEARCH = "https://nominatim.openstreetmap.org/search" NOMINATIM_REVERSE = "https://nominatim.openstreetmap.org/reverse" -OVERPASS_API = "https://overpass-api.de/api/interpreter" +# Public Overpass endpoints. We try them in order so a single server +# outage doesn't break the skill — kumi.systems is a well-known mirror. +OVERPASS_URLS = [ + "https://overpass-api.de/api/interpreter", + "https://overpass.kumi.systems/api/interpreter", +] +# Backward-compat alias for any caller that imports OVERPASS_API directly. +OVERPASS_API = OVERPASS_URLS[0] OSRM_BASE = "https://router.project-osrm.org/route/v1" TIMEAPI_BASE = "https://timeapi.io/api/timezone/coordinate" @@ -246,6 +253,30 @@ def http_post(url, data_str, retries=MAX_RETRIES): error_exit(f"POST failed after {retries} attempts. Last error: {last_error}") +def overpass_query(query): + """POST an Overpass QL query, trying each URL in OVERPASS_URLS in turn. + + A single public Overpass mirror can be rate-limited or down; trying the + next mirror before giving up turns a flaky outage into a retry. Returns + parsed JSON. Falls through to error_exit if every mirror fails. + """ + post_data = "data=" + urllib.parse.quote(query) + last_error = None + for url in OVERPASS_URLS: + try: + return http_post(url, post_data, retries=1) + except SystemExit: + # error_exit inside http_post — keep trying the next mirror. + last_error = f"mirror {url} exhausted retries" + continue + except Exception as exc: + last_error = f"{url}: {exc}" + continue + error_exit( + f"All Overpass mirrors failed. Last error: {last_error or 'unknown'}" + ) + + # --------------------------------------------------------------------------- # Geo math # --------------------------------------------------------------------------- @@ -379,6 +410,9 @@ def parse_overpass_elements(elements, ref_lat=None, ref_lon=None): "lon": el_lon, "osm_type": el.get("type", ""), "osm_id": el.get("id", ""), + # Clickable Google Maps link so the agent can render a tap-to-open + # URL in chat without composing one downstream. + "maps_url": f"https://www.google.com/maps/search/?api=1&query={el_lat},{el_lon}", "tags": { k: v for k, v in tags.items() if k not in ("name", "name:en", @@ -386,9 +420,27 @@ def parse_overpass_elements(elements, ref_lat=None, ref_lon=None): }, } + # Promote commonly-useful tags to top-level fields so agents can + # reference them without digging into the raw ``tags`` dict. + for src_key, dst_key in ( + ("cuisine", "cuisine"), + ("opening_hours", "hours"), + ("phone", "phone"), + ("website", "website"), + ): + val = tags.get(src_key) + if val: + place[dst_key] = val + if ref_lat is not None and ref_lon is not None: dist_m = haversine_m(ref_lat, ref_lon, el_lat, el_lon) place["distance_m"] = round(dist_m, 1) + # With a reference point we can also hand back a directions URL. + place["directions_url"] = ( + f"https://www.google.com/maps/dir/?api=1" + f"&origin={ref_lat},{ref_lon}" + f"&destination={el_lat},{el_lon}" + ) places.append(place) @@ -499,47 +551,84 @@ def cmd_reverse(args): # --------------------------------------------------------------------------- def cmd_nearby(args): - """Find nearby POIs using the Overpass API.""" - try: - lat = float(args.lat) - lon = float(args.lon) - except ValueError: - error_exit("LAT and LON must be numeric values.") + """Find nearby POIs using the Overpass API. - category = args.category.lower() - if category not in CATEGORY_TAGS: + Accepts either explicit coordinates (``lat``/``lon``) or a free-form + address via ``--near`` (auto-geocoded through Nominatim). Supports + multiple categories in one call — results are merged, deduplicated + by ``osm_type+osm_id``, sorted by distance. + """ + # Resolve the center point. --near takes precedence if provided so the + # agent can ask "cafes near Times Square" in one command without having + # to geocode first. + if getattr(args, "near", None): + near_query = " ".join(args.near).strip() if isinstance(args.near, list) else str(args.near).strip() + if not near_query: + error_exit("--near must be a non-empty address or place name.") + lat, lon, _ = geocode_single(near_query) + else: + try: + lat = float(args.lat) + lon = float(args.lon) + except (TypeError, ValueError): + error_exit("Provide numeric LAT and LON, or use --near \"
\".") + + # Categories: support both legacy single positional ``category`` and the + # new repeatable ``--category`` flag. Users can ask for multiple place + # types in one query. + categories = [] + if getattr(args, "category_list", None): + categories.extend(args.category_list) + if getattr(args, "category", None): + categories.append(args.category) + # Deduplicate, preserve order, lower-case. + categories = list(dict.fromkeys(c.lower() for c in categories if c)) + if not categories: + error_exit("Provide at least one category (positional or --category).") + unknown = [c for c in categories if c not in CATEGORY_TAGS] + if unknown: error_exit( - f"Unknown category '{category}'. " + f"Unknown categor{'ies' if len(unknown) > 1 else 'y'} " + f"{', '.join(repr(c) for c in unknown)}. " f"Valid categories: {', '.join(VALID_CATEGORIES)}" ) radius = int(args.radius) limit = int(args.limit) - if radius <= 0: error_exit("Radius must be a positive integer (metres).") if limit <= 0: error_exit("Limit must be a positive integer.") - tag_key, tag_val = CATEGORY_TAGS[category] - religion = RELIGION_FILTER.get(category) - query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, - religion=religion) + # Query each category against the Overpass fallback chain, merge results, + # dedupe by OSM identity so POIs tagged under multiple categories don't + # appear twice. + merged = {} + for category in categories: + tag_key, tag_val = CATEGORY_TAGS[category] + religion = RELIGION_FILTER.get(category) + query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, + religion=religion) + raw = overpass_query(query) + elements = raw.get("elements", []) + for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon): + place["category"] = category + key = (place.get("osm_type", ""), place.get("osm_id", "")) + # Prefer the entry that actually has a distance_m attached (first + # pass through the ref_lat/ref_lon branch), then first-seen wins. + if key not in merged: + merged[key] = place - post_data = "data=" + urllib.parse.quote(query) - raw = http_post(OVERPASS_API, post_data) - - elements = raw.get("elements", []) - places = parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon) - - # Add category to each result - for p in places: - p["category"] = category + # Sort merged by distance when we have ref lat/lon, then cap at ``limit``. + places = sorted( + merged.values(), + key=lambda p: p.get("distance_m", float("inf")), + )[:limit] print_json({ "center_lat": lat, "center_lon": lon, - "category": category, + "categories": categories, "radius_m": radius, "count": len(places), "results": places, @@ -861,8 +950,7 @@ def cmd_bbox(args): query = build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit, religion=religion) - post_data = "data=" + urllib.parse.quote(query) - raw = http_post(OVERPASS_API, post_data) + raw = overpass_query(query) elements = raw.get("elements", []) @@ -998,15 +1086,33 @@ def build_parser(): help="Find nearby places of a given category.", description=( "Find points of interest near a location using the Overpass API.\n" + "Provide either LAT/LON, or use --near \"
\" to auto-geocode.\n" + "Categories can be specified positionally OR repeated via --category\n" + "to merge multiple types in one query (e.g. --category bar --category cafe).\n" f"Categories: {', '.join(VALID_CATEGORIES)}" ), formatter_class=argparse.RawDescriptionHelpFormatter, ) - p_nearby.add_argument("lat", help="Center latitude (decimal degrees).") - p_nearby.add_argument("lon", help="Center longitude (decimal degrees).") p_nearby.add_argument( - "category", - help="POI category (use --help to see full list).", + "lat", nargs="?", default=None, + help="Center latitude (decimal degrees). Omit if using --near.", + ) + p_nearby.add_argument( + "lon", nargs="?", default=None, + help="Center longitude (decimal degrees). Omit if using --near.", + ) + p_nearby.add_argument( + "category", nargs="?", default=None, + help="POI category (use --help for full list). Omit if using --category flags.", + ) + p_nearby.add_argument( + "--near", nargs="+", metavar="PLACE", + help="Address, city, or landmark to search around (geocoded via Nominatim).", + ) + p_nearby.add_argument( + "--category", action="append", dest="category_list", default=[], + metavar="CAT", + help="POI category (repeatable — adds a type to the search).", ) p_nearby.add_argument( "--radius", "-r", diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index b889ede372..c083a4a80e 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1024,7 +1024,7 @@ class TestRunJobSkillBacked: "id": "multi-skill-job", "name": "multi skill test", "prompt": "Combine the results.", - "skills": ["blogwatcher", "find-nearby"], + "skills": ["blogwatcher", "maps"], } fake_db = MagicMock() @@ -1057,12 +1057,12 @@ class TestRunJobSkillBacked: assert error is None assert final_response == "ok" assert skill_view_mock.call_count == 2 - assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "find-nearby"] + assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "maps"] prompt_arg = mock_agent.run_conversation.call_args.args[0] - assert prompt_arg.index("blogwatcher") < prompt_arg.index("find-nearby") + assert prompt_arg.index("blogwatcher") < prompt_arg.index("maps") assert "Instructions for blogwatcher." in prompt_arg - assert "Instructions for find-nearby." in prompt_arg + assert "Instructions for maps." in prompt_arg assert "Combine the results." in prompt_arg diff --git a/tests/hermes_cli/test_cron.py b/tests/hermes_cli/test_cron.py index 9ae9204827..8593195a1b 100644 --- a/tests/hermes_cli/test_cron.py +++ b/tests/hermes_cli/test_cron.py @@ -54,12 +54,12 @@ class TestCronCommandLifecycle: deliver=None, repeat=None, skill=None, - skills=["find-nearby", "blogwatcher"], + skills=["maps", "blogwatcher"], clear_skills=False, ) ) updated = get_job(job["id"]) - assert updated["skills"] == ["find-nearby", "blogwatcher"] + assert updated["skills"] == ["maps", "blogwatcher"] assert updated["name"] == "Edited Job" assert updated["prompt"] == "Revised prompt" assert updated["schedule_display"] == "every 120m" @@ -95,7 +95,7 @@ class TestCronCommandLifecycle: deliver=None, repeat=None, skill=None, - skills=["blogwatcher", "find-nearby"], + skills=["blogwatcher", "maps"], ) ) out = capsys.readouterr().out @@ -103,5 +103,5 @@ class TestCronCommandLifecycle: jobs = list_jobs() assert len(jobs) == 1 - assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"] + assert jobs[0]["skills"] == ["blogwatcher", "maps"] assert jobs[0]["name"] == "Skill combo" diff --git a/tests/tools/test_cronjob_tools.py b/tests/tools/test_cronjob_tools.py index dd6b0101b1..38fc12cc8c 100644 --- a/tests/tools/test_cronjob_tools.py +++ b/tests/tools/test_cronjob_tools.py @@ -192,23 +192,23 @@ class TestUnifiedCronjobTool: result = json.loads( cronjob( action="create", - skills=["blogwatcher", "find-nearby"], + skills=["blogwatcher", "maps"], prompt="Use both skills and combine the result.", schedule="every 1h", name="Combo job", ) ) assert result["success"] is True - assert result["skills"] == ["blogwatcher", "find-nearby"] + assert result["skills"] == ["blogwatcher", "maps"] listing = json.loads(cronjob(action="list")) - assert listing["jobs"][0]["skills"] == ["blogwatcher", "find-nearby"] + assert listing["jobs"][0]["skills"] == ["blogwatcher", "maps"] def test_multi_skill_default_name_prefers_prompt_when_present(self): result = json.loads( cronjob( action="create", - skills=["blogwatcher", "find-nearby"], + skills=["blogwatcher", "maps"], prompt="Use both skills and combine the result.", schedule="every 1h", ) @@ -220,7 +220,7 @@ class TestUnifiedCronjobTool: created = json.loads( cronjob( action="create", - skills=["blogwatcher", "find-nearby"], + skills=["blogwatcher", "maps"], prompt="Use both skills and combine the result.", schedule="every 1h", ) diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index ffe489d360..46c29929f9 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -100,14 +100,6 @@ GitHub workflow skills for managing repositories, pull requests, code reviews, i | `github-pr-workflow` | Full pull request lifecycle — create branches, commit changes, open PRs, monitor CI status, auto-fix failures, and merge. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-pr-workflow` | | `github-repo-management` | Clone, create, fork, configure, and manage GitHub repositories. Manage remotes, secrets, releases, and workflows. Works with gh CLI or falls back to git + GitHub REST API via curl. | `github/github-repo-management` | -## leisure - -Skills for discovery and everyday tasks. - -| Skill | Description | Path | -|-------|-------------|------| -| `find-nearby` | Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed. | `leisure/find-nearby` | - ## mcp Skills for working with MCP (Model Context Protocol) servers, tools, and integrations. @@ -198,6 +190,7 @@ Skills for document creation, presentations, spreadsheets, and other productivit |-------|-------------|------| | `google-workspace` | Gmail, Calendar, Drive, Contacts, Sheets, and Docs integration for Hermes. Uses Hermes-managed OAuth2 setup, prefers the Google Workspace CLI (`gws`) when available for broader API coverage, and falls back to the Python client libraries otherwise. | `productivity/google-workspace` | | `linear` | Manage Linear issues, projects, and teams via the GraphQL API. Create, update, search, and organize issues. Uses API key auth (no OAuth needed). All operations via curl — no dependencies. | `productivity/linear` | +| `maps` | Location intelligence — geocode, reverse-geocode, nearby POI search (44 categories, coordinates or address via `--near`), driving/walking/cycling distance + time, turn-by-turn directions, timezone, bounding box + area, POI search in a rectangle. Uses OpenStreetMap + Overpass + OSRM. No API key needed. Telegram location-pin friendly. | `productivity/maps` | | `nano-pdf` | Edit PDFs with natural-language instructions using the nano-pdf CLI. Modify text, fix typos, update titles, and make content changes to specific pages without manual editing. | `productivity/nano-pdf` | | `notion` | Notion API for creating and managing pages, databases, and blocks via curl. Search, create, update, and query Notion workspaces directly from the terminal. | `productivity/notion` | | `ocr-and-documents` | Extract text from PDFs and scanned documents. Use web_extract for remote URLs, pymupdf for local text-based PDFs, marker-pdf for OCR/scanned docs. For DOCX use python-docx, for PPTX see the powerpoint skill. | `productivity/ocr-and-documents` | diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index 222c00827c..4628fcc639 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -30,7 +30,7 @@ Cron-run sessions cannot recursively create more cron jobs. Hermes disables cron /cron add 30m "Remind me to check the build" /cron add "every 2h" "Check server status" /cron add "every 1h" "Summarize new feed items" --skill blogwatcher -/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill find-nearby +/cron add "every 1h" "Use both skills and combine the result" --skill blogwatcher --skill maps ``` ### From the standalone CLI @@ -40,7 +40,7 @@ hermes cron create "every 2h" "Check server status" hermes cron create "every 1h" "Summarize new feed items" --skill blogwatcher hermes cron create "every 1h" "Use both skills and combine the result" \ --skill blogwatcher \ - --skill find-nearby \ + --skill maps \ --name "Skill combo" ``` @@ -77,7 +77,7 @@ Skills are loaded in order. The prompt becomes the task instruction layered on t ```python cronjob( action="create", - skills=["blogwatcher", "find-nearby"], + skills=["blogwatcher", "maps"], prompt="Look for new local events and interesting nearby places, then combine them into one short brief.", schedule="every 6h", name="Local brief", @@ -95,7 +95,7 @@ You do not need to delete and recreate jobs just to change them. ```bash /cron edit --schedule "every 4h" /cron edit --prompt "Use the revised task" -/cron edit --skill blogwatcher --skill find-nearby +/cron edit --skill blogwatcher --skill maps /cron edit --remove-skill blogwatcher /cron edit --clear-skills ``` @@ -105,8 +105,8 @@ You do not need to delete and recreate jobs just to change them. ```bash hermes cron edit --schedule "every 4h" hermes cron edit --prompt "Use the revised task" -hermes cron edit --skill blogwatcher --skill find-nearby -hermes cron edit --add-skill find-nearby +hermes cron edit --skill blogwatcher --skill maps +hermes cron edit --add-skill maps hermes cron edit --remove-skill blogwatcher hermes cron edit --clear-skills ``` From a3b76ae36d37124638b3e547b608b266f230c679 Mon Sep 17 00:00:00 2001 From: Teknium Date: Sun, 19 Apr 2026 05:19:51 -0700 Subject: [PATCH 040/455] chore(attribution): add AUTHOR_MAP entry for Mibayy Adds the Mibayy noreply email to the AUTHOR_MAP so CI attribution checks pass for the #3884 maps skill feat commit (7fa01faf). --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 9c04c1c6b3..a20c3c134f 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -77,6 +77,7 @@ AUTHOR_MAP = { "Asunfly@users.noreply.github.com": "Asunfly", "2500400+honghua@users.noreply.github.com": "honghua", "nish3451@users.noreply.github.com": "nish3451", + "Mibayy@users.noreply.github.com": "Mibayy", "135070653+sgaofen@users.noreply.github.com": "sgaofen", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", From d5fc8a5e00dfd396cd188f605ff2abc76fce3c2e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 05:19:57 -0700 Subject: [PATCH 041/455] fix(tui): reject /model and agent-mutating slash passthroughs while running (#12548) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit agent.switch_model() mutates self.model, self.provider, self.base_url, self.api_key, self.api_mode, and rebuilds self.client / self._anthropic_client in place. The worker thread running agent.run_conversation reads those fields on every iteration. A concurrent config.set key=model or slash- worker-mirrored /model / /personality / /prompt / /compress can send an HTTP request with mismatched model + base_url (or the old client keeps running against a new endpoint) — 400/404s the user never asked for. Fix: same pattern as the session.undo / session.compress guards (PR #12416) and the gateway runner's running-agent /model guard (PR #12334). Reject with 4009 'session busy' when session.running is True. Two call sites guarded: - config.set with key=model: primary /model entry point from Ink - _mirror_slash_side_effects for model / personality / prompt / compress: slash-worker passthrough path that applies live-agent side effects Idle sessions still switch models normally — regression guard test verifies this. Tests (tests/test_tui_gateway_server.py): 4 new cases. - test_config_set_model_rejects_while_running - test_config_set_model_allowed_when_idle (regression guard) - test_mirror_slash_side_effects_rejects_mutating_commands_while_running - test_mirror_slash_side_effects_allowed_when_idle (regression guard) Validated: against unpatched server.py, the two 'rejects_while_running' tests fail with the exact race they assert against. With the fix all 4 pass. Live E2E against the live Python environment confirmed both guards enforce 4009 / 'session busy' exactly as designed. --- tests/test_tui_gateway_server.py | 121 +++++++++++++++++++++++++++++++ tui_gateway/server.py | 24 ++++++ 2 files changed, 145 insertions(+) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 07a68ac9e9..c0f5239035 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -828,3 +828,124 @@ def test_respond_unpacks_sid_tuple_correctly(): server._pending.pop("rid-x", None) server._answers.pop("rid-x", None) + + +# --------------------------------------------------------------------------- +# /model switch and other agent-mutating commands must reject while the +# session is running. agent.switch_model() mutates self.model, self.provider, +# self.base_url, self.client etc. in place — the worker thread running +# agent.run_conversation is reading those on every iteration. Same class of +# bug as the session.undo / session.compress mid-run silent-drop; same fix +# pattern: reject with 4009 while running. +# --------------------------------------------------------------------------- + + +def test_config_set_model_rejects_while_running(monkeypatch): + """/model via config.set must reject during an in-flight turn.""" + seen = {"called": False} + + def _fake_apply(sid, session, raw): + seen["called"] = True + return {"value": raw, "warning": ""} + + monkeypatch.setattr(server, "_apply_model_switch", _fake_apply) + + server._sessions["sid"] = _session(running=True) + try: + resp = server.handle_request({ + "id": "1", "method": "config.set", + "params": {"session_id": "sid", "key": "model", "value": "anthropic/claude-sonnet-4.6"}, + }) + assert resp.get("error") + assert resp["error"]["code"] == 4009 + assert "session busy" in resp["error"]["message"] + assert not seen["called"], ( + "_apply_model_switch was called mid-turn — would race with " + "the worker thread reading agent.model / agent.client" + ) + finally: + server._sessions.pop("sid", None) + + +def test_config_set_model_allowed_when_idle(monkeypatch): + """Regression guard: idle sessions can still switch models.""" + seen = {"called": False} + + def _fake_apply(sid, session, raw): + seen["called"] = True + return {"value": "newmodel", "warning": ""} + + monkeypatch.setattr(server, "_apply_model_switch", _fake_apply) + + server._sessions["sid"] = _session(running=False) + try: + resp = server.handle_request({ + "id": "1", "method": "config.set", + "params": {"session_id": "sid", "key": "model", "value": "newmodel"}, + }) + assert resp.get("result") + assert resp["result"]["value"] == "newmodel" + assert seen["called"] + finally: + server._sessions.pop("sid", None) + + +def test_mirror_slash_side_effects_rejects_mutating_commands_while_running(monkeypatch): + """Slash worker passthrough (e.g. /model, /personality, /prompt, + /compress) must reject during an in-flight turn. Same race as + config.set — mutates live agent state while run_conversation is + reading it.""" + import types + + applied = {"model": False, "compress": False} + + def _fake_apply_model(sid, session, arg): + applied["model"] = True + return {"value": arg, "warning": ""} + + def _fake_compress(session, focus): + applied["compress"] = True + return (0, {}) + + monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model) + monkeypatch.setattr(server, "_compress_session_history", _fake_compress) + + session = _session(running=True) + session["agent"] = types.SimpleNamespace(model="x") + + for cmd, expected_name in [ + ("/model new/model", "model"), + ("/personality default", "personality"), + ("/prompt", "prompt"), + ("/compress", "compress"), + ]: + warning = server._mirror_slash_side_effects("sid", session, cmd) + assert "session busy" in warning, ( + f"{cmd} should have returned busy warning, got: {warning!r}" + ) + assert f"/{expected_name}" in warning + + # None of the mutating side-effect helpers should have fired. + assert not applied["model"], "model switch fired despite running session" + assert not applied["compress"], "compress fired despite running session" + + +def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch): + """Regression guard: idle session still runs the side effects.""" + import types + + applied = {"model": False} + + def _fake_apply_model(sid, session, arg): + applied["model"] = True + return {"value": arg, "warning": ""} + + monkeypatch.setattr(server, "_apply_model_switch", _fake_apply_model) + + session = _session(running=False) + session["agent"] = types.SimpleNamespace(model="x") + + warning = server._mirror_slash_side_effects("sid", session, "/model foo") + # Should NOT contain "session busy" — the switch went through. + assert "session busy" not in warning + assert applied["model"] diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 921f868a3c..00f8346191 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1743,6 +1743,19 @@ def _(rid, params: dict) -> dict: if not value: return _err(rid, 4002, "model value required") if session: + # Reject during an in-flight turn. agent.switch_model() + # mutates self.model / self.provider / self.base_url / + # self.client in place; the worker thread running + # agent.run_conversation is reading those on every + # iteration. A mid-turn swap can send an HTTP request + # with the new base_url but old model (or vice versa), + # producing 400/404s the user never asked for. Parity + # with the gateway's running-agent /model guard. + if session.get("running"): + return _err( + rid, 4009, + "session busy — /interrupt the current turn before switching models", + ) result = _apply_model_switch(params.get("session_id", ""), session, value) else: result = _apply_model_switch("", {"agent": None}, value) @@ -2446,6 +2459,17 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str: return "" name, arg, agent = parts[0], (parts[1].strip() if len(parts) > 1 else ""), session.get("agent") + # Reject agent-mutating commands during an in-flight turn. These + # all do read-then-mutate on live agent/session state that the + # worker thread running agent.run_conversation is using. Parity + # with the session.compress / session.undo guards and the gateway + # runner's running-agent /model guard. + _MUTATES_WHILE_RUNNING = {"model", "personality", "prompt", "compress"} + if name in _MUTATES_WHILE_RUNNING and session.get("running"): + return ( + f"session busy — /interrupt the current turn before running /{name}" + ) + try: if name == "model" and arg and agent: result = _apply_model_switch(sid, session, arg) From 37524a574ec94adcd40e65d4cbb847e84153aa92 Mon Sep 17 00:00:00 2001 From: Teknium Date: Thu, 9 Apr 2026 03:16:04 -0700 Subject: [PATCH 042/455] docs: add PR review guides, rework quickstart, slim down installation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two complementary GitHub PR review guides from contest submissions: - Cron-based PR review agent (from PR #5836 by @dieutx) — polls on a schedule, no server needed, teaches skills + memory authoring - Webhook-based PR review (from PR #6503 by @gaijinkush) — real-time via GitHub webhooks, documents previously undocumented webhook feature Both guides are cross-linked so users can pick the approach that fits. Reworks quickstart.md by integrating the best content from PR #5744 by @aidil2105: - Opinionated decision table ('The fastest path') - Common failure modes table with causes and fixes - Recovery toolkit sequence - Session lifecycle verification step - Better first-chat guidance with example prompts Slims down installation.md: - Removes 10-step manual/dev install section (already covered in developer-guide/contributing.md) - Links to Contributing guide for dev setup - Keeps focused on the automated installer + prerequisites + troubleshooting --- website/docs/getting-started/installation.md | 199 +---------- website/docs/getting-started/quickstart.md | 255 ++++++++------ website/docs/guides/github-pr-review-agent.md | 300 ++++++++++++++++ .../docs/guides/webhook-github-pr-review.md | 329 ++++++++++++++++++ website/sidebars.ts | 2 + 5 files changed, 784 insertions(+), 301 deletions(-) create mode 100644 website/docs/guides/github-pr-review-agent.md create mode 100644 website/docs/guides/webhook-github-pr-review.md diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index a28b1256e6..219c1e7d55 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -6,7 +6,7 @@ description: "Install Hermes Agent on Linux, macOS, WSL2, or Android via Termux" # Installation -Get Hermes Agent up and running in under two minutes with the one-line installer, or follow the manual steps for full control. +Get Hermes Agent up and running in under two minutes with the one-line installer. ## Quick Install @@ -82,202 +82,9 @@ If you use Nix (on NixOS, macOS, or Linux), there's a dedicated setup path with --- -## Manual Installation +## Manual / Developer Installation -If you prefer full control over the installation process, follow these steps. - -### Step 1: Clone the Repository - -Clone with `--recurse-submodules` to pull the required submodules: - -```bash -git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git -cd hermes-agent -``` - -If you already cloned without `--recurse-submodules`: -```bash -git submodule update --init --recursive -``` - -### Step 2: Install uv & Create Virtual Environment - -```bash -# Install uv (if not already installed) -curl -LsSf https://astral.sh/uv/install.sh | sh - -# Create venv with Python 3.11 (uv downloads it if not present — no sudo needed) -uv venv venv --python 3.11 -``` - -:::tip -You do **not** need to activate the venv to use `hermes`. The entry point has a hardcoded shebang pointing to the venv Python, so it works globally once symlinked. -::: - -### Step 3: Install Python Dependencies - -```bash -# Tell uv which venv to install into -export VIRTUAL_ENV="$(pwd)/venv" - -# Install with all extras -uv pip install -e ".[all]" -``` - -If you only want the core agent (no Telegram/Discord/cron support): -```bash -uv pip install -e "." -``` - -
-Optional extras breakdown - -| Extra | What it adds | Install command | -|-------|-------------|-----------------| -| `all` | Everything below | `uv pip install -e ".[all]"` | -| `messaging` | Telegram, Discord & Slack gateway | `uv pip install -e ".[messaging]"` | -| `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` | -| `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` | -| `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` | -| `tts-premium` | ElevenLabs premium voices | `uv pip install -e ".[tts-premium]"` | -| `voice` | CLI microphone input + audio playback | `uv pip install -e ".[voice]"` | -| `pty` | PTY terminal support | `uv pip install -e ".[pty]"` | -| `termux` | Tested Android / Termux bundle (`cron`, `cli`, `pty`, `mcp`, `honcho`, `acp`) | `python -m pip install -e ".[termux]" -c constraints-termux.txt` | -| `honcho` | AI-native memory (Honcho integration) | `uv pip install -e ".[honcho]"` | -| `mcp` | Model Context Protocol support | `uv pip install -e ".[mcp]"` | -| `homeassistant` | Home Assistant integration | `uv pip install -e ".[homeassistant]"` | -| `acp` | ACP editor integration support | `uv pip install -e ".[acp]"` | -| `slack` | Slack messaging | `uv pip install -e ".[slack]"` | -| `dev` | pytest & test utilities | `uv pip install -e ".[dev]"` | - -You can combine extras: `uv pip install -e ".[messaging,cron]"` - -:::tip Termux users -`.[all]` is not currently available on Android because the `voice` extra pulls `faster-whisper`, which depends on `ctranslate2` wheels that are not published for Android. Use `.[termux]` for the tested mobile install path, then add individual extras only as needed. -::: - -
- -### Step 4: Install Optional Submodules (if needed) - -```bash -# RL training backend (optional) -uv pip install -e "./tinker-atropos" -``` - -Both are optional — if you skip them, the corresponding toolsets simply won't be available. - -### Step 5: Install Node.js Dependencies (Optional) - -Only needed for **browser automation** (Browserbase-powered) and **WhatsApp bridge**: - -```bash -npm install -``` - -### Step 6: Create the Configuration Directory - -```bash -# Create the directory structure -mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session} - -# Copy the example config file -cp cli-config.yaml.example ~/.hermes/config.yaml - -# Create an empty .env file for API keys -touch ~/.hermes/.env -``` - -### Step 7: Add Your API Keys - -Open `~/.hermes/.env` and add at minimum an LLM provider key: - -```bash -# Required — at least one LLM provider: -OPENROUTER_API_KEY=sk-or-v1-your-key-here - -# Optional — enable additional tools: -FIRECRAWL_API_KEY=fc-your-key # Web search & scraping (or self-host, see docs) -FAL_KEY=your-fal-key # Image generation (FLUX) -``` - -Or set them via the CLI: -```bash -hermes config set OPENROUTER_API_KEY sk-or-v1-your-key-here -``` - -### Step 8: Add `hermes` to Your PATH - -```bash -mkdir -p ~/.local/bin -ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes -``` - -If `~/.local/bin` isn't on your PATH, add it to your shell config: - -```bash -# Bash -echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc && source ~/.bashrc - -# Zsh -echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.zshrc && source ~/.zshrc - -# Fish -fish_add_path $HOME/.local/bin -``` - -### Step 9: Configure Your Provider - -```bash -hermes model # Select your LLM provider and model -``` - -### Step 10: Verify the Installation - -```bash -hermes version # Check that the command is available -hermes doctor # Run diagnostics to verify everything is working -hermes status # Check your configuration -hermes chat -q "Hello! What tools do you have available?" -``` - ---- - -## Quick-Reference: Manual Install (Condensed) - -For those who just want the commands: - -```bash -# Install uv -curl -LsSf https://astral.sh/uv/install.sh | sh - -# Clone & enter -git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git -cd hermes-agent - -# Create venv with Python 3.11 -uv venv venv --python 3.11 -export VIRTUAL_ENV="$(pwd)/venv" - -# Install everything -uv pip install -e ".[all]" -uv pip install -e "./tinker-atropos" -npm install # optional, for browser tools and WhatsApp - -# Configure -mkdir -p ~/.hermes/{cron,sessions,logs,memories,skills,pairing,hooks,image_cache,audio_cache,whatsapp/session} -cp cli-config.yaml.example ~/.hermes/config.yaml -touch ~/.hermes/.env -echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env - -# Make hermes available globally -mkdir -p ~/.local/bin -ln -sf "$(pwd)/venv/bin/hermes" ~/.local/bin/hermes - -# Verify -hermes doctor -hermes -``` +If you want to clone the repo and install from source — for contributing, running from a specific branch, or having full control over the virtual environment — see the [Development Setup](../developer-guide/contributing.md#development-setup) section in the Contributing guide. --- diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 8a39c49f1e..b67f63ae36 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -1,12 +1,35 @@ --- sidebar_position: 1 title: "Quickstart" -description: "Your first conversation with Hermes Agent — from install to chatting in 2 minutes" +description: "Your first conversation with Hermes Agent — from install to chatting in under 5 minutes" --- # Quickstart -This guide walks you through installing Hermes Agent, setting up a provider, and having your first conversation. By the end, you'll know the key features and how to explore further. +This guide gets you from zero to a working Hermes setup that survives real use. Install, choose a provider, verify a working chat, and know exactly what to do when something breaks. + +## Who this is for + +- Brand new and want the shortest path to a working setup +- Switching providers and don't want to lose time to config mistakes +- Setting up Hermes for a team, bot, or always-on workflow +- Tired of "it installed, but it still does nothing" + +## The fastest path + +Pick the row that matches your goal: + +| Goal | Do this first | Then do this | +|---|---|---| +| I just want Hermes working on my machine | `hermes setup` | Run a real chat and verify it responds | +| I already know my provider | `hermes model` | Save the config, then start chatting | +| I want a bot or always-on setup | `hermes gateway setup` after CLI works | Connect Telegram, Discord, Slack, or another platform | +| I want a local or self-hosted model | `hermes model` → custom endpoint | Verify the endpoint, model name, and context length | +| I want multi-provider fallback | `hermes model` first | Add routing and fallback only after the base chat works | + +**Rule of thumb:** if Hermes cannot complete a normal chat, do not add more features yet. Get one clean conversation working first, then layer on gateway, cron, skills, voice, or routing. + +--- ## 1. Install Hermes Agent @@ -31,86 +54,109 @@ After it finishes, reload your shell: source ~/.bashrc # or source ~/.zshrc ``` -## 2. Set Up a Provider +For detailed installation options, prerequisites, and troubleshooting, see the [Installation guide](./installation.md). -The installer configures your LLM provider automatically. To change it later, use one of these commands: +## 2. Choose a Provider + +The single most important setup step. Use `hermes model` to walk through the choice interactively: ```bash -hermes model # Choose your LLM provider and model -hermes tools # Configure which tools are enabled -hermes setup # Or configure everything at once +hermes model ``` -`hermes model` walks you through selecting an inference provider: +Good defaults: -| Provider | What it is | How to set up | -|----------|-----------|---------------| -| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` | -| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` | -| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key | -| **OpenRouter** | Multi-provider routing across many models | Enter your API key | -| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` | -| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` | -| **Kimi / Moonshot China** | China-region Moonshot endpoint | Set `KIMI_CN_API_KEY` | -| **Arcee AI** | Trinity models | Set `ARCEEAI_API_KEY` | -| **Xiaomi MiMo** | Xiaomi MiMo models via [platform.xiaomimimo.com](https://platform.xiaomimimo.com) | Set `XIAOMI_API_KEY` | -| **AWS Bedrock** | Anthropic Claude, Amazon Nova, DeepSeek v3.2, and Meta Llama via AWS | Standard boto3 auth (`AWS_PROFILE` or `AWS_ACCESS_KEY_ID` + `AWS_REGION`) | -| **Qwen Portal (OAuth)** | Qwen 3.5 / Qwen-Coder models via Alibaba's consumer Qwen Portal | OAuth via `hermes model` (optional: `HERMES_QWEN_BASE_URL`) | -| **MiniMax** | International MiniMax endpoint | Set `MINIMAX_API_KEY` | -| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` | -| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` | -| **Hugging Face** | 20+ open models via unified router (Qwen, DeepSeek, Kimi, etc.) | Set `HF_TOKEN` | -| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` | -| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` | -| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` | -| **DeepSeek** | Direct DeepSeek API access | Set `DEEPSEEK_API_KEY` | -| **NVIDIA NIM** | Nemotron models via build.nvidia.com or local NIM | Set `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) | -| **Ollama Cloud** | Managed Ollama catalog without local GPU | Set `OLLAMA_API_KEY` (or pick **Ollama Cloud** in `hermes model`) | -| **Google Gemini (OAuth)** | Gemini via Cloud Code Assist — free and paid tiers | OAuth via `hermes model` (optional: `HERMES_GEMINI_PROJECT_ID` for paid tiers) | -| **xAI (Grok)** | Grok 4 models via Responses API + prompt caching | Set `XAI_API_KEY` (alias: `grok`) | -| **GitHub Copilot** | GitHub Copilot subscription (GPT-5.x, Claude, Gemini, etc.) | OAuth via `hermes model`, or `COPILOT_GITHUB_TOKEN` / `GH_TOKEN` | -| **GitHub Copilot ACP** | Copilot ACP agent backend (spawns local `copilot` CLI) | `hermes model` (requires `copilot` CLI + `copilot login`) | -| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` | -| **Custom Endpoint** | VLLM, SGLang, Ollama, or any OpenAI-compatible API | Set base URL + API key | +| Situation | Recommended path | +|---|---| +| Least friction | Nous Portal or OpenRouter | +| You already have Claude or Codex auth | Anthropic or OpenAI Codex | +| You want local/private inference | Ollama or any custom OpenAI-compatible endpoint | +| You want multi-provider routing | OpenRouter | +| You have a custom GPU server | vLLM, SGLang, LiteLLM, or any OpenAI-compatible endpoint | + +For most first-time users: choose a provider, accept the defaults unless you know why you're changing them. The full provider catalog with env vars and setup steps lives on the [Providers](../integrations/providers.md) page. :::caution Minimum context: 64K tokens Hermes Agent requires a model with at least **64,000 tokens** of context. Models with smaller windows cannot maintain enough working memory for multi-step tool-calling workflows and will be rejected at startup. Most hosted models (Claude, GPT, Gemini, Qwen, DeepSeek) meet this easily. If you're running a local model, set its context size to at least 64K (e.g. `--ctx-size 65536` for llama.cpp or `-c 65536` for Ollama). ::: :::tip -You can switch providers at any time with `hermes model` — no code changes, no lock-in. When configuring a custom endpoint, Hermes will prompt for the context window size and auto-detect it when possible. See [Context Length Detection](../integrations/providers.md#context-length-detection) for details. +You can switch providers at any time with `hermes model` — no lock-in. For a full list of all supported providers and setup details, see [AI Providers](../integrations/providers.md). ::: -## 3. Start Chatting +### How settings are stored + +Hermes separates secrets from normal config: + +- **Secrets and tokens** → `~/.hermes/.env` +- **Non-secret settings** → `~/.hermes/config.yaml` + +The easiest way to set values correctly is through the CLI: + +```bash +hermes config set model anthropic/claude-opus-4.6 +hermes config set terminal.backend docker +hermes config set OPENROUTER_API_KEY sk-or-... +``` + +The right value goes to the right file automatically. + +## 3. Run Your First Chat ```bash hermes # classic CLI hermes --tui # modern TUI (recommended) ``` -That's it! You'll see a welcome banner with your model, available tools, and skills. Type a message and press Enter. +You'll see a welcome banner with your model, available tools, and skills. Use a prompt that's specific and easy to verify: :::tip Pick your interface Hermes ships with two terminal interfaces: the classic `prompt_toolkit` CLI and a newer [TUI](../user-guide/tui.md) with modal overlays, mouse selection, and non-blocking input. Both share the same sessions, slash commands, and config — try each with `hermes` vs `hermes --tui`. ::: ``` -❯ What can you help me with? +Summarize this repo in 5 bullets and tell me what the main entrypoint is. ``` -The agent has access to tools for web search, file operations, terminal commands, and more — all out of the box. +``` +Check my current directory and tell me what looks like the main project file. +``` -## 4. Try Key Features +``` +Help me set up a clean GitHub PR workflow for this codebase. +``` -### Ask it to use the terminal +**What success looks like:** + +- The banner shows your chosen model/provider +- Hermes replies without error +- It can use a tool if needed (terminal, file read, web search) +- The conversation continues normally for more than one turn + +If that works, you're past the hardest part. + +## 4. Verify Sessions Work + +Before moving on, make sure resume works: + +```bash +hermes --continue # Resume the most recent session +hermes -c # Short form +``` + +That should bring you back to the session you just had. If it doesn't, check whether you're in the same profile and whether the session actually saved. This matters later when you're juggling multiple setups or machines. + +## 5. Try Key Features + +### Use the terminal ``` ❯ What's my disk usage? Show the top 5 largest directories. ``` -The agent will run terminal commands on your behalf and show you the results. +The agent runs terminal commands on your behalf and shows results. -### Use slash commands +### Slash commands Type `/` to see an autocomplete dropdown of all commands: @@ -128,22 +174,27 @@ Press `Alt+Enter` or `Ctrl+J` to add a new line. Great for pasting code or writi ### Interrupt the agent -If the agent is taking too long, just type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works. +If the agent is taking too long, type a new message and press Enter — it interrupts the current task and switches to your new instructions. `Ctrl+C` also works. -### Resume a session +## 6. Add the Next Layer -When you exit, hermes prints a resume command: +Only after the base chat works. Pick what you need: + +### Bot or shared assistant ```bash -hermes --continue # Resume the most recent session -hermes -c # Short form +hermes gateway setup # Interactive platform configuration ``` -## 5. Explore Further +Connect [Telegram](/docs/user-guide/messaging/telegram), [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), [WhatsApp](/docs/user-guide/messaging/whatsapp), [Signal](/docs/user-guide/messaging/signal), [Email](/docs/user-guide/messaging/email), or [Home Assistant](/docs/user-guide/messaging/homeassistant). -Here are some things to try next: +### Automation and tools -### Set up a sandboxed terminal +- `hermes tools` — tune tool access per platform +- `hermes skills` — browse and install reusable workflows +- Cron — only after your bot or CLI setup is stable + +### Sandboxed terminal For safety, run the agent in a Docker container or on a remote server: @@ -152,71 +203,25 @@ hermes config set terminal.backend docker # Docker isolation hermes config set terminal.backend ssh # Remote server ``` -### Connect messaging platforms - -Chat with Hermes from your phone or other surfaces via Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant: - -```bash -hermes gateway setup # Interactive platform configuration -``` - -### Add voice mode - -Want microphone input in the CLI or spoken replies in messaging? +### Voice mode ```bash pip install "hermes-agent[voice]" # Includes faster-whisper for free local speech-to-text ``` -Then start Hermes and enable it inside the CLI: +Then in the CLI: `/voice on`. Press `Ctrl+B` to record. See [Voice Mode](../user-guide/features/voice-mode.md). -```text -/voice on -``` - -Press `Ctrl+B` to record, or use `/voice tts` to have Hermes speak its replies. See [Voice Mode](../user-guide/features/voice-mode.md) for the full setup across CLI, Telegram, Discord, and Discord voice channels. - -### Schedule automated tasks - -``` -❯ Every morning at 9am, check Hacker News for AI news and send me a summary on Telegram. -``` - -The agent will set up a cron job that runs automatically via the gateway. - -### Browse and install skills +### Skills ```bash hermes skills search kubernetes -hermes skills search react --source skills-sh -hermes skills search https://mintlify.com/docs --source well-known hermes skills install openai/skills/k8s -hermes skills install official/security/1password -hermes skills install skills-sh/vercel-labs/json-render/json-render-react --force ``` -Tips: -- Use `--source skills-sh` to search the public `skills.sh` directory. -- Use `--source well-known` with a docs/site URL to discover skills from `/.well-known/skills/index.json`. -- Use `--force` only after reviewing a third-party skill. It can override non-dangerous policy blocks, but not a `dangerous` scan verdict. +Or use `/skills` inside a chat session. -Or use the `/skills` slash command inside chat. - -### Use Hermes inside an editor via ACP - -Hermes can also run as an ACP server for ACP-compatible editors like VS Code, Zed, and JetBrains: - -```bash -pip install -e '.[acp]' -hermes acp -``` - -See [ACP Editor Integration](../user-guide/features/acp.md) for setup details. - -### Try MCP servers - -Connect to external tools via the Model Context Protocol: +### MCP servers ```yaml # Add to ~/.hermes/config.yaml @@ -228,6 +233,43 @@ mcp_servers: GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_xxx" ``` +### Editor integration (ACP) + +```bash +pip install -e '.[acp]' +hermes acp +``` + +See [ACP Editor Integration](../user-guide/features/acp.md). + +--- + +## Common Failure Modes + +These are the problems that waste the most time: + +| Symptom | Likely cause | Fix | +|---|---|---| +| Hermes opens but gives empty or broken replies | Provider auth or model selection is wrong | Run `hermes model` again and confirm provider, model, and auth | +| Custom endpoint "works" but returns garbage | Wrong base URL, model name, or not actually OpenAI-compatible | Verify the endpoint in a separate client first | +| Gateway starts but nobody can message it | Bot token, allowlist, or platform setup is incomplete | Re-run `hermes gateway setup` and check `hermes gateway status` | +| `hermes --continue` can't find old session | Switched profiles or session never saved | Check `hermes sessions list` and confirm you're in the right profile | +| Model unavailable or odd fallback behavior | Provider routing or fallback settings are too aggressive | Keep routing off until the base provider is stable | +| `hermes doctor` flags config problems | Config values are missing or stale | Fix the config, retest a plain chat before adding features | + +## Recovery Toolkit + +When something feels off, use this order: + +1. `hermes doctor` +2. `hermes model` +3. `hermes setup` +4. `hermes sessions list` +5. `hermes --continue` +6. `hermes gateway status` + +That sequence gets you from "broken vibes" back to a known state fast. + --- ## Quick Reference @@ -249,3 +291,6 @@ mcp_servers: - **[Configuration](../user-guide/configuration.md)** — Customize your setup - **[Messaging Gateway](../user-guide/messaging/index.md)** — Connect Telegram, Discord, Slack, WhatsApp, Signal, Email, or Home Assistant - **[Tools & Toolsets](../user-guide/features/tools.md)** — Explore available capabilities +- **[AI Providers](../integrations/providers.md)** — Full provider list and setup details +- **[Skills System](../user-guide/features/skills.md)** — Reusable workflows and knowledge +- **[Tips & Best Practices](../guides/tips.md)** — Power user tips diff --git a/website/docs/guides/github-pr-review-agent.md b/website/docs/guides/github-pr-review-agent.md new file mode 100644 index 0000000000..530d8d6df0 --- /dev/null +++ b/website/docs/guides/github-pr-review-agent.md @@ -0,0 +1,300 @@ +--- +sidebar_position: 10 +title: "Tutorial: GitHub PR Review Agent" +description: "Build an automated AI code reviewer that monitors your repos, reviews pull requests, and delivers feedback — hands-free" +--- + +# Tutorial: Build a GitHub PR Review Agent + +**The problem:** Your team opens PRs faster than you can review them. PRs sit for days waiting for eyeballs. Junior devs merge bugs because nobody had time to check. You spend your mornings catching up on diffs instead of building. + +**The solution:** An AI agent that watches your repos around the clock, reviews every new PR for bugs, security issues, and code quality, and sends you a summary — so you only spend time on PRs that actually need human judgment. + +**What you'll build:** + +``` +┌──────────────┐ ┌───────────────┐ ┌──────────────┐ ┌──────────────┐ +│ Cron Timer │────▶│ Hermes Agent │────▶│ GitHub API │────▶│ Review to │ +│ (every 2h) │ │ + gh CLI │ │ (PR diffs) │ │ Telegram/ │ +│ │ │ + skill │ │ │ │ Discord/ │ +│ │ │ + memory │ │ │ │ local file │ +└──────────────┘ └───────────────┘ └──────────────┘ └──────────────┘ +``` + +This guide uses **cron jobs** to poll for PRs on a schedule — no server or public endpoint needed. Works behind NAT and firewalls. + +:::tip Want real-time reviews instead? +If you have a public endpoint available, check out [Automated GitHub PR Comments with Webhooks](./webhook-github-pr-review.md) — GitHub pushes events to Hermes instantly when PRs are opened or updated. +::: + +--- + +## Prerequisites + +- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation) +- **Gateway running** for cron jobs: + ```bash + hermes gateway install # Install as a service + # or + hermes gateway # Run in foreground + ``` +- **GitHub CLI (`gh`) installed and authenticated**: + ```bash + # Install + brew install gh # macOS + sudo apt install gh # Ubuntu/Debian + + # Authenticate + gh auth login + ``` +- **Messaging configured** (optional) — [Telegram](/docs/user-guide/messaging/telegram) or [Discord](/docs/user-guide/messaging/discord) + +:::tip No messaging? No problem +Use `deliver: "local"` to save reviews to `~/.hermes/cron/output/`. Great for testing before wiring up notifications. +::: + +--- + +## Step 1: Verify the Setup + +Make sure Hermes can access GitHub. Start a chat: + +```bash +hermes +``` + +Test with a simple command: + +``` +Run: gh pr list --repo NousResearch/hermes-agent --state open --limit 3 +``` + +You should see a list of open PRs. If this works, you're ready. + +--- + +## Step 2: Try a Manual Review + +Still in the chat, ask Hermes to review a real PR: + +``` +Review this pull request. Read the diff, check for bugs, security issues, +and code quality. Be specific about line numbers and quote problematic code. + +Run: gh pr diff 3888 --repo NousResearch/hermes-agent +``` + +Hermes will: +1. Execute `gh pr diff` to fetch the code changes +2. Read through the entire diff +3. Produce a structured review with specific findings + +If you're happy with the quality, time to automate it. + +--- + +## Step 3: Create a Review Skill + +A skill gives Hermes consistent review guidelines that persist across sessions and cron runs. Without one, review quality varies. + +```bash +mkdir -p ~/.hermes/skills/code-review +``` + +Create `~/.hermes/skills/code-review/SKILL.md`: + +```markdown +--- +name: code-review +description: Review pull requests for bugs, security issues, and code quality +--- + +# Code Review Guidelines + +When reviewing a pull request: + +## What to Check +1. **Bugs** — Logic errors, off-by-one, null/undefined handling +2. **Security** — Injection, auth bypass, secrets in code, SSRF +3. **Performance** — N+1 queries, unbounded loops, memory leaks +4. **Style** — Naming conventions, dead code, missing error handling +5. **Tests** — Are changes tested? Do tests cover edge cases? + +## Output Format +For each finding: +- **File:Line** — exact location +- **Severity** — Critical / Warning / Suggestion +- **What's wrong** — one sentence +- **Fix** — how to fix it + +## Rules +- Be specific. Quote the problematic code. +- Don't flag style nitpicks unless they affect readability. +- If the PR looks good, say so. Don't invent problems. +- End with: APPROVE / REQUEST_CHANGES / COMMENT +``` + +Verify it loaded — start `hermes` and you should see `code-review` in the skills list at startup. + +--- + +## Step 4: Teach It Your Conventions + +This is what makes the reviewer actually useful. Start a session and teach Hermes your team's standards: + +``` +Remember: In our backend repo, we use Python with FastAPI. +All endpoints must have type annotations and Pydantic models. +We don't allow raw SQL — only SQLAlchemy ORM. +Test files go in tests/ and must use pytest fixtures. +``` + +``` +Remember: In our frontend repo, we use TypeScript with React. +No `any` types allowed. All components must have props interfaces. +We use React Query for data fetching, never useEffect for API calls. +``` + +These memories persist forever — the reviewer will enforce your conventions without being told each time. + +--- + +## Step 5: Create the Automated Cron Job + +Now wire it all together. Create a cron job that runs every 2 hours: + +```bash +hermes cron create "0 */2 * * *" \ + "Check for new open PRs and review them. + +Repos to monitor: +- myorg/backend-api +- myorg/frontend-app + +Steps: +1. Run: gh pr list --repo REPO --state open --limit 5 --json number,title,author,createdAt +2. For each PR created or updated in the last 4 hours: + - Run: gh pr diff NUMBER --repo REPO + - Review the diff using the code-review guidelines +3. Format output as: + +## PR Reviews — today + +### [repo] #[number]: [title] +**Author:** [name] | **Verdict:** APPROVE/REQUEST_CHANGES/COMMENT +[findings] + +If no new PRs found, say: No new PRs to review." \ + --name "pr-review" \ + --deliver telegram \ + --skill code-review +``` + +Verify it's scheduled: + +```bash +hermes cron list +``` + +### Other useful schedules + +| Schedule | When | +|----------|------| +| `0 */2 * * *` | Every 2 hours | +| `0 9,13,17 * * 1-5` | Three times a day, weekdays only | +| `0 9 * * 1` | Weekly Monday morning roundup | +| `30m` | Every 30 minutes (high-traffic repos) | + +--- + +## Step 6: Run It On Demand + +Don't want to wait for the schedule? Trigger it manually: + +```bash +hermes cron run pr-review +``` + +Or from within a chat session: + +``` +/cron run pr-review +``` + +--- + +## Going Further + +### Post Reviews Directly to GitHub + +Instead of delivering to Telegram, have the agent comment on the PR itself: + +Add this to your cron prompt: + +``` +After reviewing, post your review: +- For issues: gh pr review NUMBER --repo REPO --comment --body "YOUR_REVIEW" +- For critical issues: gh pr review NUMBER --repo REPO --request-changes --body "YOUR_REVIEW" +- For clean PRs: gh pr review NUMBER --repo REPO --approve --body "Looks good" +``` + +:::caution +Make sure `gh` has a token with `repo` scope. Reviews are posted as whoever `gh` is authenticated as. +::: + +### Weekly PR Dashboard + +Create a Monday morning overview of all your repos: + +```bash +hermes cron create "0 9 * * 1" \ + "Generate a weekly PR dashboard: +- myorg/backend-api +- myorg/frontend-app +- myorg/infra + +For each repo show: +1. Open PR count and oldest PR age +2. PRs merged this week +3. Stale PRs (older than 5 days) +4. PRs with no reviewer assigned + +Format as a clean summary." \ + --name "weekly-dashboard" \ + --deliver telegram +``` + +### Multi-Repo Monitoring + +Scale up by adding more repos to the prompt. The agent processes them sequentially — no extra setup needed. + +--- + +## Troubleshooting + +### "gh: command not found" +The gateway runs in a minimal environment. Ensure `gh` is in the system PATH and restart the gateway. + +### Reviews are too generic +1. Add the `code-review` skill (Step 3) +2. Teach Hermes your conventions via memory (Step 4) +3. The more context it has about your stack, the better the reviews + +### Cron job doesn't run +```bash +hermes gateway status # Is the gateway running? +hermes cron list # Is the job enabled? +``` + +### Rate limits +GitHub allows 5,000 API requests/hour for authenticated users. Each PR review uses ~3-5 requests (list + diff + optional comments). Even reviewing 100 PRs/day stays well within limits. + +--- + +## What's Next? + +- **[Webhook-Based PR Reviews](./webhook-github-pr-review.md)** — get instant reviews when PRs are opened (requires a public endpoint) +- **[Daily Briefing Bot](/docs/guides/daily-briefing-bot)** — combine PR reviews with your morning news digest +- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — wrap the review logic into a shareable plugin +- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config +- **[Fallback Providers](/docs/user-guide/features/fallback-providers)** — ensure reviews run even when one provider is down diff --git a/website/docs/guides/webhook-github-pr-review.md b/website/docs/guides/webhook-github-pr-review.md new file mode 100644 index 0000000000..b0dd15ecea --- /dev/null +++ b/website/docs/guides/webhook-github-pr-review.md @@ -0,0 +1,329 @@ +--- +sidebar_position: 11 +sidebar_label: "GitHub PR Reviews via Webhook" +title: "Automated GitHub PR Comments with Webhooks" +description: "Connect Hermes to GitHub so it automatically fetches PR diffs, reviews code changes, and posts comments — triggered by webhooks with no manual prompting" +--- + +# Automated GitHub PR Comments with Webhooks + +This guide walks you through connecting Hermes Agent to GitHub so it automatically fetches a pull request's diff, analyzes the code changes, and posts a comment — triggered by a webhook event with no manual prompting. + +When a PR is opened or updated, GitHub sends a webhook POST to your Hermes instance. Hermes runs the agent with a prompt that instructs it to retrieve the diff via the `gh` CLI, and the response is posted back to the PR thread. + +:::tip Want a simpler setup without a public endpoint? +If you don't have a public URL or just want to get started quickly, check out [Build a GitHub PR Review Agent](./github-pr-review-agent.md) — uses cron jobs to poll for PRs on a schedule, works behind NAT and firewalls. +::: + +:::info Reference docs +For the full webhook platform reference (all config options, delivery types, dynamic subscriptions, security model) see [Webhooks](/docs/user-guide/messaging/webhooks). +::: + +:::warning Prompt injection risk +Webhook payloads contain attacker-controlled data — PR titles, commit messages, and descriptions can contain malicious instructions. When your webhook endpoint is exposed to the internet, run the gateway in a sandboxed environment (Docker, SSH backend). See the [security section](#security-notes) below. +::: + +--- + +## Prerequisites + +- Hermes Agent installed and running (`hermes gateway`) +- [`gh` CLI](https://cli.github.com/) installed and authenticated on the gateway host (`gh auth login`) +- A publicly reachable URL for your Hermes instance (see [Local testing with ngrok](#local-testing-with-ngrok) if running locally) +- Admin access to the GitHub repository (required to manage webhooks) + +--- + +## Step 1 — Enable the webhook platform + +Add the following to your `~/.hermes/config.yaml`: + +```yaml +platforms: + webhook: + enabled: true + extra: + port: 8644 # default; change if another service occupies this port + rate_limit: 30 # max requests per minute per route (not a global cap) + + routes: + github-pr-review: + secret: "your-webhook-secret-here" # must match the GitHub webhook secret exactly + events: + - pull_request + + # The agent is instructed to fetch the actual diff before reviewing. + # {number} and {repository.full_name} are resolved from the GitHub payload. + prompt: | + A pull request event was received (action: {action}). + + PR #{number}: {pull_request.title} + Author: {pull_request.user.login} + Branch: {pull_request.head.ref} → {pull_request.base.ref} + Description: {pull_request.body} + URL: {pull_request.html_url} + + If the action is "closed" or "labeled", stop here and do not post a comment. + + Otherwise: + 1. Run: gh pr diff {number} --repo {repository.full_name} + 2. Review the code changes for correctness, security issues, and clarity. + 3. Write a concise, actionable review comment and post it. + + deliver: github_comment + deliver_extra: + repo: "{repository.full_name}" + pr_number: "{number}" +``` + +**Key fields:** + +| Field | Description | +|---|---| +| `secret` (route-level) | HMAC secret for this route. Falls back to `extra.secret` global if omitted. | +| `events` | List of `X-GitHub-Event` header values to accept. Empty list = accept all. | +| `prompt` | Template; `{field}` and `{nested.field}` resolve from the GitHub payload. | +| `deliver` | `github_comment` posts via `gh pr comment`. `log` just writes to the gateway log. | +| `deliver_extra.repo` | Resolves to e.g. `org/repo` from the payload. | +| `deliver_extra.pr_number` | Resolves to the PR number from the payload. | + +:::note The payload does not contain code +The GitHub webhook payload includes PR metadata (title, description, branch names, URLs) but **not the diff**. The prompt above instructs the agent to run `gh pr diff` to fetch the actual changes. The `terminal` tool is included in the default `hermes-webhook` toolset, so no extra configuration is needed. +::: + +--- + +## Step 2 — Start the gateway + +```bash +hermes gateway +``` + +You should see: + +``` +[webhook] Listening on 0.0.0.0:8644 — routes: github-pr-review +``` + +Verify it's running: + +```bash +curl http://localhost:8644/health +# {"status": "ok", "platform": "webhook"} +``` + +--- + +## Step 3 — Register the webhook on GitHub + +1. Go to your repository → **Settings** → **Webhooks** → **Add webhook** +2. Fill in: + - **Payload URL:** `https://your-public-url.example.com/webhooks/github-pr-review` + - **Content type:** `application/json` + - **Secret:** the same value you set for `secret` in the route config + - **Which events?** → Select individual events → check **Pull requests** +3. Click **Add webhook** + +GitHub will immediately send a `ping` event to confirm the connection. It is safely ignored — `ping` is not in your `events` list — and returns `{"status": "ignored", "event": "ping"}`. It is only logged at DEBUG level, so it won't appear in the console at the default log level. + +--- + +## Step 4 — Open a test PR + +Create a branch, push a change, and open a PR. Within 30–90 seconds (depending on PR size and model), Hermes should post a review comment. + +To follow the agent's progress in real time: + +```bash +tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log" +``` + +--- + +## Local testing with ngrok + +If Hermes is running on your laptop, use [ngrok](https://ngrok.com/) to expose it: + +```bash +ngrok http 8644 +``` + +Copy the `https://...ngrok-free.app` URL and use it as your GitHub Payload URL. On the free ngrok tier the URL changes each time ngrok restarts — update your GitHub webhook each session. Paid ngrok accounts get a static domain. + +You can smoke-test a static route directly with `curl` — no GitHub account or real PR needed. + +:::tip Use `deliver: log` when testing locally +Change `deliver: github_comment` to `deliver: log` in your config while testing. Otherwise the agent will attempt to post a comment to the fake `org/repo#99` repo in the test payload, which will fail. Switch back to `deliver: github_comment` once you're satisfied with the prompt output. +::: + +```bash +SECRET="your-webhook-secret-here" +BODY='{"action":"opened","number":99,"pull_request":{"title":"Test PR","body":"Adds a feature.","user":{"login":"testuser"},"head":{"ref":"feat/x"},"base":{"ref":"main"},"html_url":"https://github.com/org/repo/pull/99"},"repository":{"full_name":"org/repo"}}' +SIG=$(printf '%s' "$BODY" | openssl dgst -sha256 -hmac "$SECRET" -hex | awk '{print "sha256="$2}') + +curl -s -X POST http://localhost:8644/webhooks/github-pr-review \ + -H "Content-Type: application/json" \ + -H "X-GitHub-Event: pull_request" \ + -H "X-Hub-Signature-256: $SIG" \ + -d "$BODY" +# Expected: {"status":"accepted","route":"github-pr-review","event":"pull_request","delivery_id":"..."} +``` + +Then watch the agent run: +```bash +tail -f "${HERMES_HOME:-$HOME/.hermes}/logs/gateway.log" +``` + +:::note +`hermes webhook test ` only works for **dynamic subscriptions** created with `hermes webhook subscribe`. It does not read routes from `config.yaml`. +::: + +--- + +## Filtering to specific actions + +GitHub sends `pull_request` events for many actions: `opened`, `synchronize`, `reopened`, `closed`, `labeled`, etc. The `events` list filters only by the `X-GitHub-Event` header value — it cannot filter by action sub-type at the routing level. + +The prompt in Step 1 already handles this by instructing the agent to stop early for `closed` and `labeled` events. + +:::warning The agent still runs and consumes tokens +The "stop here" instruction prevents a meaningful review, but the agent still runs to completion for every `pull_request` event regardless of action. GitHub webhooks can only filter by event type (`pull_request`, `push`, `issues`, etc.) — not by action sub-type (`opened`, `closed`, `labeled`). There is no routing-level filter for sub-actions. For high-volume repos, accept this cost or filter upstream with a GitHub Actions workflow that calls your webhook URL conditionally. +::: + +> There is no Jinja2 or conditional template syntax. `{field}` and `{nested.field}` are the only substitutions supported. Anything else is passed verbatim to the agent. + +--- + +## Using a skill for consistent review style + +Load a [Hermes skill](/docs/user-guide/features/skills) to give the agent a consistent review persona. Add `skills` to your route inside `platforms.webhook.extra.routes` in `config.yaml`: + +```yaml +platforms: + webhook: + enabled: true + extra: + routes: + github-pr-review: + secret: "your-webhook-secret-here" + events: [pull_request] + prompt: | + A pull request event was received (action: {action}). + PR #{number}: {pull_request.title} by {pull_request.user.login} + URL: {pull_request.html_url} + + If the action is "closed" or "labeled", stop here and do not post a comment. + + Otherwise: + 1. Run: gh pr diff {number} --repo {repository.full_name} + 2. Review the diff using your review guidelines. + 3. Write a concise, actionable review comment and post it. + skills: + - review + deliver: github_comment + deliver_extra: + repo: "{repository.full_name}" + pr_number: "{number}" +``` + +> **Note:** Only the first skill in the list that is found is loaded. Hermes does not stack multiple skills — subsequent entries are ignored. + +--- + +## Sending responses to Slack or Discord instead + +Replace the `deliver` and `deliver_extra` fields inside your route with your target platform: + +```yaml +# Inside platforms.webhook.extra.routes.: + +# Slack +deliver: slack +deliver_extra: + chat_id: "C0123456789" # Slack channel ID (omit to use the configured home channel) + +# Discord +deliver: discord +deliver_extra: + chat_id: "987654321012345678" # Discord channel ID (omit to use home channel) +``` + +The target platform must also be enabled and connected in the gateway. If `chat_id` is omitted, the response is sent to that platform's configured home channel. + +Valid `deliver` values: `log` · `github_comment` · `telegram` · `discord` · `slack` · `signal` · `sms` + +--- + +## GitLab support + +The same adapter works with GitLab. GitLab uses `X-Gitlab-Token` for authentication (plain string match, not HMAC) — Hermes handles both automatically. + +For event filtering, GitLab sets `X-GitLab-Event` to values like `Merge Request Hook`, `Push Hook`, `Pipeline Hook`. Use the exact header value in `events`: + +```yaml +events: + - Merge Request Hook +``` + +GitLab payload fields differ from GitHub's — e.g. `{object_attributes.title}` for the MR title and `{object_attributes.iid}` for the MR number. The easiest way to discover the full payload structure is GitLab's **Test** button in your webhook settings, combined with the **Recent Deliveries** log. Alternatively, omit `prompt` from your route config — Hermes will then pass the full payload as formatted JSON directly to the agent, and the agent's response (visible in the gateway log with `deliver: log`) will describe its structure. + +--- + +## Security notes + +- **Never use `INSECURE_NO_AUTH`** in production — it disables signature validation entirely. It is only for local development. +- **Rotate your webhook secret** periodically and update it in both GitHub (webhook settings) and your `config.yaml`. +- **Rate limiting** is 30 req/min per route by default (configurable via `extra.rate_limit`). Exceeding it returns `429`. +- **Duplicate deliveries** (webhook retries) are deduplicated via a 1-hour idempotency cache. The cache key is `X-GitHub-Delivery` if present, then `X-Request-ID`, then a millisecond timestamp. When neither delivery ID header is set, retries are **not** deduplicated. +- **Prompt injection:** PR titles, descriptions, and commit messages are attacker-controlled. Malicious PRs could attempt to manipulate the agent's actions. Run the gateway in a sandboxed environment (Docker, VM) when exposed to the public internet. + +--- + +## Troubleshooting + +| Symptom | Check | +|---|---| +| `401 Invalid signature` | Secret in config.yaml doesn't match GitHub webhook secret | +| `404 Unknown route` | Route name in the URL doesn't match the key in `routes:` | +| `429 Rate limit exceeded` | 30 req/min per route exceeded — common when re-delivering test events from GitHub's UI; wait a minute or raise `extra.rate_limit` | +| No comment posted | `gh` not installed, not on PATH, or not authenticated (`gh auth login`) | +| Agent runs but no comment | Check the gateway log — if the agent output was empty or just "SKIP", delivery is still attempted | +| Port already in use | Change `extra.port` in config.yaml | +| Agent runs but reviews only the PR description | The prompt isn't including the `gh pr diff` instruction — the diff is not in the webhook payload | +| Can't see the ping event | Ignored events return `{"status":"ignored","event":"ping"}` at DEBUG log level only — check GitHub's delivery log (repo → Settings → Webhooks → your webhook → Recent Deliveries) | + +**GitHub's Recent Deliveries tab** (repo → Settings → Webhooks → your webhook) shows the exact request headers, payload, HTTP status, and response body for every delivery. It is the fastest way to diagnose failures without touching your server logs. + +--- + +## Full config reference + +```yaml +platforms: + webhook: + enabled: true + extra: + host: "0.0.0.0" # bind address (default: 0.0.0.0) + port: 8644 # listen port (default: 8644) + secret: "" # optional global fallback secret + rate_limit: 30 # requests per minute per route + max_body_bytes: 1048576 # payload size limit in bytes (default: 1 MB) + + routes: + : + secret: "required-per-route" + events: [] # [] = accept all; otherwise list X-GitHub-Event values + prompt: "" # {field} / {nested.field} resolved from payload + skills: [] # first matching skill is loaded (only one) + deliver: "log" # log | github_comment | telegram | discord | slack | signal | sms + deliver_extra: {} # repo + pr_number for github_comment; chat_id for others +``` + +--- + +## What's Next? + +- **[Cron-Based PR Reviews](./github-pr-review-agent.md)** — poll for PRs on a schedule, no public endpoint needed +- **[Webhook Reference](/docs/user-guide/messaging/webhooks)** — full config reference for the webhook platform +- **[Build a Plugin](/docs/guides/build-a-hermes-plugin)** — package review logic into a shareable plugin +- **[Profiles](/docs/user-guide/profiles)** — run a dedicated reviewer profile with its own memory and config diff --git a/website/sidebars.ts b/website/sidebars.ts index c84184c4e6..d57a71dcc2 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -162,6 +162,8 @@ const sidebars: SidebarsConfig = { 'guides/cron-troubleshooting', 'guides/work-with-skills', 'guides/delegation-patterns', + 'guides/github-pr-review-agent', + 'guides/webhook-github-pr-review', 'guides/migrate-from-openclaw', 'guides/aws-bedrock', ], From c567adb58abbaa0fd1f775ec27d1754efacca83c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 05:35:45 -0700 Subject: [PATCH 043/455] fix(tui): session.create build thread must clean up if session.close races (#12555) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user hits /new or /resume before the previous session finishes initializing, session.close runs while the previous session.create's _build thread is still constructing the agent. session.close pops _sessions[sid] and closes whatever slash_worker it finds (None at that point — _build hasn't installed it yet), then returns. _build keeps running in the background, installs the slash_worker subprocess and registers an approval-notify callback on a session dict that's now unreachable via _sessions. The subprocess leaks until process exit; the notify callback lingers in the global registry. Fix: _build now tracks what it allocates (worker, notify_registered) and checks in its finally block whether _sessions[sid] still points to the session it's building for. If not, the build was orphaned by a racing close, so clean up the subprocess and unregister the notify ourselves. tui_gateway/server.py: - _build reads _sessions.get(sid) safely (returns early if already gone) - tracks allocated worker + notify registration - finally checks orphan status and cleans up Tests (tests/test_tui_gateway_server.py): 2 new cases. - test_session_create_close_race_does_not_orphan_worker: slow _make_agent, close mid-build, verify worker.close() and unregister_gateway_notify both fire from the build thread's cleanup path. - test_session_create_no_race_keeps_worker_alive: regression guard — happy path does NOT over-eagerly clean up a live worker. Validated: against the unpatched code, the race test fails with 'orphan worker was not cleaned up — closed_workers=[]'. Live E2E against the live Python environment confirmed the cleanup fires exactly when the race happens. --- tests/test_tui_gateway_server.py | 159 +++++++++++++++++++++++++++++++ tui_gateway/server.py | 39 +++++++- 2 files changed, 196 insertions(+), 2 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index c0f5239035..533516b95d 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -949,3 +949,162 @@ def test_mirror_slash_side_effects_allowed_when_idle(monkeypatch): # Should NOT contain "session busy" — the switch went through. assert "session busy" not in warning assert applied["model"] + + +# --------------------------------------------------------------------------- +# session.create / session.close race: fast /new churn must not orphan the +# slash_worker subprocess or the global approval-notify registration. +# --------------------------------------------------------------------------- + + +def test_session_create_close_race_does_not_orphan_worker(monkeypatch): + """Regression guard: if session.close runs while session.create's + _build thread is still constructing the agent, the build thread + must detect the orphan and clean up the slash_worker + notify + registration it's about to install. Without the cleanup those + resources leak — the subprocess stays alive until atexit and the + notify callback lingers in the global registry.""" + import threading + + closed_workers: list[str] = [] + unregistered_keys: list[str] = [] + + class _FakeWorker: + def __init__(self, key, model): + self.key = key + self._closed = False + + def close(self): + self._closed = True + closed_workers.append(self.key) + + class _FakeAgent: + def __init__(self): + self.model = "x" + self.provider = "openrouter" + self.base_url = "" + self.api_key = "" + + # Make _build block until we release it — simulates slow agent init + release_build = threading.Event() + + def _slow_make_agent(sid, key): + release_build.wait(timeout=3.0) + return _FakeAgent() + + # Stub everything _build touches + monkeypatch.setattr(server, "_make_agent", _slow_make_agent) + monkeypatch.setattr(server, "_SlashWorker", _FakeWorker) + monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None)) + monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"}) + monkeypatch.setattr(server, "_probe_credentials", lambda _a: None) + monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: None) + + # Shim register/unregister to observe leaks + import tools.approval as _approval + monkeypatch.setattr(_approval, "register_gateway_notify", + lambda key, cb: None) + monkeypatch.setattr(_approval, "unregister_gateway_notify", + lambda key: unregistered_keys.append(key)) + monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None) + + # Start: session.create spawns _build thread, returns synchronously + resp = server.handle_request({ + "id": "1", "method": "session.create", "params": {"cols": 80}, + }) + assert resp.get("result"), f"got error: {resp.get('error')}" + sid = resp["result"]["session_id"] + + # Build thread is blocked in _slow_make_agent. Close the session + # NOW — this pops _sessions[sid] before _build can install the + # worker/notify. + close_resp = server.handle_request({ + "id": "2", "method": "session.close", "params": {"session_id": sid}, + }) + assert close_resp.get("result", {}).get("closed") is True + + # At this point session.close saw slash_worker=None (not yet + # installed) so it didn't close anything. Release the build thread + # and let it finish — it should detect the orphan and clean up the + # worker it just allocated + unregister the notify. + release_build.set() + + # Give the build thread a moment to run through its finally. + for _ in range(100): + if closed_workers: + break + import time + time.sleep(0.02) + + assert len(closed_workers) == 1, ( + f"orphan worker was not cleaned up — closed_workers={closed_workers}" + ) + # Notify may be unregistered by both session.close (unconditional) + # and the orphan-cleanup path; the key guarantee is that the build + # thread does at least one unregister call (any prior close + # already popped the callback; the duplicate is a no-op). + assert len(unregistered_keys) >= 1, ( + f"orphan notify registration was not unregistered — " + f"unregistered_keys={unregistered_keys}" + ) + + +def test_session_create_no_race_keeps_worker_alive(monkeypatch): + """Regression guard: when session.close does NOT race, the build + thread must install the worker + notify normally and leave them + alone (no over-eager cleanup).""" + closed_workers: list[str] = [] + unregistered_keys: list[str] = [] + + class _FakeWorker: + def __init__(self, key, model): + self.key = key + + def close(self): + closed_workers.append(self.key) + + class _FakeAgent: + def __init__(self): + self.model = "x" + self.provider = "openrouter" + self.base_url = "" + self.api_key = "" + + monkeypatch.setattr(server, "_make_agent", lambda sid, key: _FakeAgent()) + monkeypatch.setattr(server, "_SlashWorker", _FakeWorker) + monkeypatch.setattr(server, "_get_db", lambda: types.SimpleNamespace(create_session=lambda *a, **kw: None)) + monkeypatch.setattr(server, "_session_info", lambda _a: {"model": "x"}) + monkeypatch.setattr(server, "_probe_credentials", lambda _a: None) + monkeypatch.setattr(server, "_wire_callbacks", lambda _sid: None) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: None) + + import tools.approval as _approval + monkeypatch.setattr(_approval, "register_gateway_notify", lambda key, cb: None) + monkeypatch.setattr(_approval, "unregister_gateway_notify", + lambda key: unregistered_keys.append(key)) + monkeypatch.setattr(_approval, "load_permanent_allowlist", lambda: None) + + resp = server.handle_request({ + "id": "1", "method": "session.create", "params": {"cols": 80}, + }) + sid = resp["result"]["session_id"] + + # Wait for the build to finish (ready event inside session dict). + session = server._sessions[sid] + session["agent_ready"].wait(timeout=2.0) + + # Build finished without a close race — nothing should have been + # cleaned up by the orphan check. + assert closed_workers == [], ( + f"build thread closed its own worker despite no race: {closed_workers}" + ) + assert unregistered_keys == [], ( + f"build thread unregistered its own notify despite no race: {unregistered_keys}" + ) + + # Session should have the live worker installed. + assert session.get("slash_worker") is not None + + # Cleanup + server._sessions.pop(sid, None) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 00f8346191..70dff3b17b 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1088,7 +1088,23 @@ def _(rid, params: dict) -> dict: } def _build() -> None: - session = _sessions[sid] + session = _sessions.get(sid) + if session is None: + # session.close ran before the build thread got scheduled. + ready.set() + return + + # Track what we allocate so we can clean up if session.close + # races us to the finish line. session.close pops _sessions[sid] + # unconditionally and tries to close the slash_worker it finds; + # if _build is still mid-construction when close runs, close + # finds slash_worker=None / notify unregistered and returns + # cleanly — leaving us, the build thread, to later install the + # worker + notify on an orphaned session dict. The finally + # block below detects the orphan and cleans up instead of + # leaking a subprocess and a global notify registration. + worker = None + notify_registered = False try: tokens = _set_session_context(key) try: @@ -1100,13 +1116,15 @@ def _(rid, params: dict) -> dict: session["agent"] = agent try: - session["slash_worker"] = _SlashWorker(key, getattr(agent, "model", _resolve_model())) + worker = _SlashWorker(key, getattr(agent, "model", _resolve_model())) + session["slash_worker"] = worker except Exception: pass try: from tools.approval import register_gateway_notify, load_permanent_allowlist register_gateway_notify(key, lambda data: _emit("approval.request", sid, data)) + notify_registered = True load_permanent_allowlist() except Exception: pass @@ -1122,6 +1140,23 @@ def _(rid, params: dict) -> dict: session["agent_error"] = str(e) _emit("error", sid, {"message": f"agent init failed: {e}"}) finally: + # Orphan check: if session.close raced us and popped + # _sessions[sid] while we were building, the dict we just + # populated is unreachable. Clean up the subprocess and + # the global notify registration ourselves — session.close + # couldn't see them at the time it ran. + if _sessions.get(sid) is not session: + if worker is not None: + try: + worker.close() + except Exception: + pass + if notify_registered: + try: + from tools.approval import unregister_gateway_notify + unregister_gateway_notify(key) + except Exception: + pass ready.set() threading.Thread(target=_build, daemon=True).start() From a521005fe5e5885b23c878a5c5fdc2e1b361a4da Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 19 Apr 2026 05:45:59 -0700 Subject: [PATCH 044/455] fix(discord): close two low-severity adapter races (#12558) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two small races in gateway/platforms/discord.py, bundled together since they're adjacent in the adapter and both narrow in impact. 1. on_message vs _resolve_allowed_usernames (startup window) DISCORD_ALLOWED_USERS accepts both numeric IDs and raw usernames. At connect-time, _resolve_allowed_usernames walks the bot's guilds (fetch_members can take multiple seconds) to swap usernames for IDs. on_message can fire during that window; _is_allowed_user compares the numeric author.id against a set that may still contain raw usernames — legitimate users get silently rejected for a few seconds after every reconnect. Fix: on_message awaits _ready_event (with a 30s timeout) when it isn't already set. on_ready sets the event after the resolve completes. In steady state this is a no-op (event already set); only the startup / reconnect window ever blocks. 2. join_voice_channel check-and-connect The existing-connection check at _voice_clients.get() and the channel.connect() call straddled an await boundary with no lock. Two concurrent /voice channel invocations could both see None and both call connect(); discord.py raises ClientException ("Already connected") on the loser. Same race class for leave running concurrently with _voice_timeout_handler. Fix: per-guild asyncio.Lock (_voice_locks dict with lazy alloc via _voice_lock_for). join_voice_channel and leave_voice_channel both run their body under the lock. Sequential within a guild, still fully concurrent across guilds. Both: LOW severity. The first only affects username-based allowlists on fast-follow-up messages at startup; the second is a narrow exception on simultaneous voice commands. Bundled so the adapter gets a single coherent polish pass. Tests (tests/gateway/test_discord_race_polish.py): 2 regression cases. - test_concurrent_joins_do_not_double_connect: two concurrent join_voice_channel calls on the same guild result in exactly one channel.connect() invocation. - test_on_message_blocks_until_ready_event_set: asserts the expected wait pattern is present in on_message (source inspection, since full discord.py client setup isn't practical here). Regression-guard validated: against unpatched gateway/platforms/discord.py both tests fail. With the fix they pass. Full Discord suite (118 tests) green. --- gateway/platforms/discord.py | 116 +++++++++++++------- tests/gateway/test_discord_race_polish.py | 122 ++++++++++++++++++++++ 2 files changed, 201 insertions(+), 37 deletions(-) create mode 100644 tests/gateway/test_discord_race_polish.py diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 1ec831b66d..fce7ece414 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -498,6 +498,7 @@ class DiscordAdapter(BasePlatformAdapter): self._allowed_role_ids: set = set() # For DISCORD_ALLOWED_ROLES filtering # Voice channel state (per-guild) self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient + self._voice_locks: Dict[int, asyncio.Lock] = {} # guild_id -> serialize join/leave # Text batching: merge rapid successive messages (Telegram-style) self._text_batch_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_DELAY_SECONDS", "0.6")) self._text_batch_split_delay_seconds = float(os.getenv("HERMES_DISCORD_TEXT_BATCH_SPLIT_DELAY_SECONDS", "2.0")) @@ -636,6 +637,30 @@ class DiscordAdapter(BasePlatformAdapter): @self._client.event async def on_message(message: DiscordMessage): + # Wait for on_ready to finish resolving username-based + # allowlist entries. Without this block, messages + # arriving between Discord's READY event and the end + # of _resolve_allowed_usernames compare author IDs + # (numeric) against a set that may still contain raw + # usernames (strings) from DISCORD_ALLOWED_USERS — + # legitimate users get silently rejected for the first + # few seconds after every reconnect. The wait is a + # near-instant no-op in steady state (_ready_event is + # already set); only the startup / reconnect window + # ever blocks. + if not adapter_self._ready_event.is_set(): + try: + await asyncio.wait_for( + adapter_self._ready_event.wait(), + timeout=30.0, + ) + except asyncio.TimeoutError: + logger.warning( + "[%s] on_message timed out waiting for _ready_event; " + "allowlist check may use pre-resolved entries", + adapter_self.name, + ) + # Dedup: Discord RESUME replays events after reconnects (#4777) if adapter_self._dedup.is_duplicate(str(message.id)): return @@ -1231,57 +1256,74 @@ class DiscordAdapter(BasePlatformAdapter): # Voice channel methods (join / leave / play) # ------------------------------------------------------------------ + def _voice_lock_for(self, guild_id: int) -> "asyncio.Lock": + """Return the per-guild lock, creating it on first use. + + Voice join/leave/move must be serialized per guild — without + this, two concurrent /voice channel invocations both see + _voice_clients.get(guild_id) return None, both call + channel.connect(), and discord.py raises ClientException + ('Already connected') on the loser. + """ + lock = self._voice_locks.get(guild_id) + if lock is None: + lock = asyncio.Lock() + self._voice_locks[guild_id] = lock + return lock + async def join_voice_channel(self, channel) -> bool: """Join a Discord voice channel. Returns True on success.""" if not self._client or not DISCORD_AVAILABLE: return False guild_id = channel.guild.id - # Already connected in this guild? - existing = self._voice_clients.get(guild_id) - if existing and existing.is_connected(): - if existing.channel.id == channel.id: + async with self._voice_lock_for(guild_id): + # Already connected in this guild? + existing = self._voice_clients.get(guild_id) + if existing and existing.is_connected(): + if existing.channel.id == channel.id: + self._reset_voice_timeout(guild_id) + return True + await existing.move_to(channel) self._reset_voice_timeout(guild_id) return True - await existing.move_to(channel) + + vc = await channel.connect() + self._voice_clients[guild_id] = vc self._reset_voice_timeout(guild_id) + + # Start voice receiver (Phase 2: listen to users) + try: + receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) + receiver.start() + self._voice_receivers[guild_id] = receiver + self._voice_listen_tasks[guild_id] = asyncio.ensure_future( + self._voice_listen_loop(guild_id) + ) + except Exception as e: + logger.warning("Voice receiver failed to start: %s", e) + return True - vc = await channel.connect() - self._voice_clients[guild_id] = vc - self._reset_voice_timeout(guild_id) - - # Start voice receiver (Phase 2: listen to users) - try: - receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids) - receiver.start() - self._voice_receivers[guild_id] = receiver - self._voice_listen_tasks[guild_id] = asyncio.ensure_future( - self._voice_listen_loop(guild_id) - ) - except Exception as e: - logger.warning("Voice receiver failed to start: %s", e) - - return True - async def leave_voice_channel(self, guild_id: int) -> None: """Disconnect from the voice channel in a guild.""" - # Stop voice receiver first - receiver = self._voice_receivers.pop(guild_id, None) - if receiver: - receiver.stop() - listen_task = self._voice_listen_tasks.pop(guild_id, None) - if listen_task: - listen_task.cancel() + async with self._voice_lock_for(guild_id): + # Stop voice receiver first + receiver = self._voice_receivers.pop(guild_id, None) + if receiver: + receiver.stop() + listen_task = self._voice_listen_tasks.pop(guild_id, None) + if listen_task: + listen_task.cancel() - vc = self._voice_clients.pop(guild_id, None) - if vc and vc.is_connected(): - await vc.disconnect() - task = self._voice_timeout_tasks.pop(guild_id, None) - if task: - task.cancel() - self._voice_text_channels.pop(guild_id, None) - self._voice_sources.pop(guild_id, None) + vc = self._voice_clients.pop(guild_id, None) + if vc and vc.is_connected(): + await vc.disconnect() + task = self._voice_timeout_tasks.pop(guild_id, None) + if task: + task.cancel() + self._voice_text_channels.pop(guild_id, None) + self._voice_sources.pop(guild_id, None) # Maximum seconds to wait for voice playback before giving up PLAYBACK_TIMEOUT = 120 diff --git a/tests/gateway/test_discord_race_polish.py b/tests/gateway/test_discord_race_polish.py new file mode 100644 index 0000000000..a0f900aea6 --- /dev/null +++ b/tests/gateway/test_discord_race_polish.py @@ -0,0 +1,122 @@ +"""Regression tests for the Discord adapter race-polish fix. + +Two races are addressed: +1. on_message allowlist check racing on_ready's _resolve_allowed_usernames + resolution window. Username-based entries in DISCORD_ALLOWED_USERS + appear in the set as raw strings for several seconds after + connect/reconnect; author.id is always numeric, so legitimate users + are silently rejected until resolution finishes. +2. join_voice_channel check-and-connect: concurrent /voice channel + invocations both see _voice_clients.get(guild_id) is None, both call + channel.connect(), second raises ClientException ('Already connected'). +""" + +import asyncio +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from gateway.config import Platform, PlatformConfig + + +def _make_adapter(): + """Bare DiscordAdapter for testing — object.__new__ pattern per AGENTS.md.""" + from gateway.platforms.discord import DiscordAdapter + + adapter = object.__new__(DiscordAdapter) + adapter._platform = Platform.DISCORD + adapter.config = PlatformConfig(enabled=True, token="t") + adapter._ready_event = asyncio.Event() + adapter._allowed_user_ids = set() + adapter._allowed_role_ids = set() + adapter._voice_clients = {} + adapter._voice_locks = {} + adapter._voice_receivers = {} + adapter._voice_listen_tasks = {} + adapter._voice_timeout_tasks = {} + adapter._voice_text_channels = {} + adapter._voice_sources = {} + adapter._client = MagicMock() + return adapter + + +class TestJoinVoiceSerialization: + @pytest.mark.asyncio + async def test_concurrent_joins_do_not_double_connect(self): + """Two concurrent join_voice_channel calls on the same guild + must serialize through the per-guild lock — only ONE + channel.connect() actually fires; the second sees the + _voice_clients entry the first just installed.""" + adapter = _make_adapter() + + connect_count = [0] + connect_event = asyncio.Event() + + class FakeVC: + def __init__(self, channel): + self.channel = channel + + def is_connected(self): + return True + + async def move_to(self, _channel): + return None + + async def disconnect(self): + return None + + async def slow_connect(self): + connect_count[0] += 1 + # Widen the race window + await connect_event.wait() + return FakeVC(self) + + channel = MagicMock() + channel.id = 111 + channel.guild.id = 42 + channel.connect = lambda: slow_connect(channel) + + # Swap out VoiceReceiver so it doesn't try to set up real audio + from gateway.platforms import discord as discord_mod + with patch.object(discord_mod, "VoiceReceiver", MagicMock(return_value=MagicMock(start=lambda: None))): + with patch.object(discord_mod.asyncio, "ensure_future", lambda _c: asyncio.create_task(asyncio.sleep(0))): + # Fire two joins concurrently + t1 = asyncio.create_task(adapter.join_voice_channel(channel)) + t2 = asyncio.create_task(adapter.join_voice_channel(channel)) + # Let them run until they're blocked on our event + await asyncio.sleep(0.05) + # Release connect so both can finish + connect_event.set() + r1, r2 = await asyncio.gather(t1, t2) + + assert connect_count[0] == 1, ( + f"Expected exactly 1 channel.connect() call, got {connect_count[0]} — " + "per-guild voice lock is not serializing join_voice_channel" + ) + assert r1 is True and r2 is True + assert 42 in adapter._voice_clients + + +class TestOnMessageWaitsForReadyEvent: + @pytest.mark.asyncio + async def test_on_message_blocks_until_ready_event_set(self): + """A message arriving before on_ready finishes + _resolve_allowed_usernames must wait, not proceed with a + half-resolved allowlist.""" + # This is an integration-style check — we pull out the + # on_message handler by asserting the source contains the + # expected wait pattern. A full end-to-end test would require + # setting up the discord.py client machinery, which is not + # practical here. + import inspect + from gateway.platforms import discord as discord_mod + + src = inspect.getsource(discord_mod.DiscordAdapter.connect) + assert "_ready_event.is_set()" in src, ( + "on_message must gate on _ready_event so username-based " + "allowlist entries are resolved before the allowlist check" + ) + assert "await asyncio.wait_for(" in src and "_ready_event.wait()" in src, ( + "Expected asyncio.wait_for(_ready_event.wait(), timeout=...) " + "pattern in on_message" + ) From a6fe5d08727c9bb2486709ba3357137fbb49a321 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 19 Apr 2026 07:47:15 -0500 Subject: [PATCH 045/455] fix(tui-gateway): dispatch slow RPC handlers on a thread pool (#12546) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stdin-read loop in entry.py calls handle_request() inline, so the five handlers that can block for seconds to minutes (slash.exec, cli.exec, shell.exec, session.resume, session.branch) freeze the dispatcher. While one is running, any inbound RPC — notably approval.respond and session.interrupt — sits unread in the pipe buffer and lands only after the slow handler returns. Route only those five onto a small ThreadPoolExecutor; every other handler stays on the main thread so the fast-path ordering is unchanged and the audit surface stays small. write_json is already _stdout_lock-guarded, so concurrent response writes are safe. Pool size defaults to 4 (overridable via HERMES_TUI_RPC_POOL_WORKERS). - add _LONG_HANDLERS set + ThreadPoolExecutor + atexit shutdown - new dispatch(req) function: pool for long handlers, inline for rest - _run_and_emit wraps pool work in a try/except so a misbehaving handler still surfaces as a JSON-RPC error instead of silently dying in a worker - entry.py swaps handle_request → dispatch - 5 new tests: sync path still inline, long handlers emit via stdout, fast handler not blocked behind slow one, handler exceptions map to error responses, non-long methods always take the sync path Manual repro confirms the fix: shell.exec(sleep 3) + terminal.resize sent back-to-back now returns the resize response at t=0s while the sleep finishes independently at t=3s. Before, both landed together at t=3s. Fixes #12546. --- tests/tui_gateway/test_protocol.py | 79 ++++++++++++++++++++++++++++++ tui_gateway/entry.py | 4 +- tui_gateway/server.py | 51 +++++++++++++++++++ 3 files changed, 132 insertions(+), 2 deletions(-) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 926dfadf17..da154cc168 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -4,6 +4,7 @@ import io import json import sys import threading +import time from unittest.mock import MagicMock, patch import pytest @@ -432,3 +433,81 @@ def test_command_dispatch_returns_skill_payload(server): assert result["type"] == "skill" assert result["message"] == fake_msg assert result["name"] == "hermes-agent-dev" + + +# ── dispatch(): pool routing for long handlers (#12546) ────────────── + + +def test_dispatch_runs_short_handlers_inline(server): + """Non-long handlers return their response synchronously from dispatch().""" + server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True}) + + resp = server.dispatch({"id": "r1", "method": "fast.ping", "params": {}}) + + assert resp == {"jsonrpc": "2.0", "id": "r1", "result": {"pong": True}} + + +def test_dispatch_offloads_long_handlers_and_emits_via_stdout(capture): + """Long handlers run on the pool and write their response via write_json.""" + server, buf = capture + server._methods["slash.exec"] = lambda rid, params: server._ok(rid, {"output": "hi"}) + + resp = server.dispatch({"id": "r2", "method": "slash.exec", "params": {}}) + assert resp is None + + for _ in range(50): + if buf.getvalue(): + break + time.sleep(0.01) + + written = json.loads(buf.getvalue()) + assert written == {"jsonrpc": "2.0", "id": "r2", "result": {"output": "hi"}} + + +def test_dispatch_long_handler_does_not_block_fast_handler(server): + """A slow long handler must not prevent a concurrent fast handler from completing.""" + released = threading.Event() + server._methods["slash.exec"] = lambda rid, params: (released.wait(timeout=5), server._ok(rid, {"done": True}))[1] + server._methods["fast.ping"] = lambda rid, params: server._ok(rid, {"pong": True}) + + t0 = time.monotonic() + assert server.dispatch({"id": "slow", "method": "slash.exec", "params": {}}) is None + + fast_resp = server.dispatch({"id": "fast", "method": "fast.ping", "params": {}}) + fast_elapsed = time.monotonic() - t0 + + assert fast_resp["result"] == {"pong": True} + assert fast_elapsed < 0.5, f"fast handler blocked for {fast_elapsed:.2f}s behind slow handler" + + released.set() + + +def test_dispatch_long_handler_exception_produces_error_response(capture): + """An exception inside a pool-dispatched handler still yields a JSON-RPC error.""" + server, buf = capture + + def boom(rid, params): + raise RuntimeError("kaboom") + + server._methods["slash.exec"] = boom + + server.dispatch({"id": "r3", "method": "slash.exec", "params": {}}) + + for _ in range(50): + if buf.getvalue(): + break + time.sleep(0.01) + + written = json.loads(buf.getvalue()) + assert written["id"] == "r3" + assert written["error"]["code"] == -32000 + assert "kaboom" in written["error"]["message"] + + +def test_dispatch_unknown_long_method_still_goes_inline(server): + """Method name not in _LONG_HANDLERS takes the sync path even if handler is slow.""" + server._methods["some.method"] = lambda rid, params: server._ok(rid, {"ok": True}) + + resp = server.dispatch({"id": "r4", "method": "some.method", "params": {}}) + + assert resp["result"] == {"ok": True} diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index a9667528de..d2b82b9dab 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -2,7 +2,7 @@ import json import signal import sys -from tui_gateway.server import handle_request, resolve_skin, write_json +from tui_gateway.server import dispatch, resolve_skin, write_json signal.signal(signal.SIGPIPE, signal.SIG_DFL) signal.signal(signal.SIGINT, signal.SIG_IGN) @@ -28,7 +28,7 @@ def main(): sys.exit(0) continue - resp = handle_request(req) + resp = dispatch(req) if resp is not None: if not write_json(resp): sys.exit(0) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 70dff3b17b..6d0dbea659 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1,4 +1,5 @@ import atexit +import concurrent.futures import copy import json import os @@ -36,6 +37,29 @@ _cfg_cache: dict | None = None _cfg_mtime: float | None = None _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOUT_S", "45") or 45)) +# ── Async RPC dispatch (#12546) ────────────────────────────────────── +# A handful of handlers block the dispatcher loop in entry.py for seconds +# to minutes (slash.exec, cli.exec, shell.exec, session.resume, +# session.branch). While they're running, inbound RPCs — notably +# approval.respond and session.interrupt — sit unread in the stdin pipe. +# We route only those slow handlers onto a small thread pool; everything +# else stays on the main thread so ordering stays sane for the fast path. +# write_json is already _stdout_lock-guarded, so concurrent response +# writes are safe. +_LONG_HANDLERS = frozenset({ + "cli.exec", + "session.branch", + "session.resume", + "shell.exec", + "slash.exec", +}) +_RPC_POOL_WORKERS = max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)) +_pool = concurrent.futures.ThreadPoolExecutor( + max_workers=_RPC_POOL_WORKERS, + thread_name_prefix="tui-rpc", +) +atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True)) + # Reserve real stdout for JSON-RPC only; redirect Python's stdout to stderr # so stray print() from libraries/tools becomes harmless gateway.stderr instead # of corrupting the JSON protocol. @@ -200,6 +224,33 @@ def handle_request(req: dict) -> dict | None: return fn(req.get("id"), req.get("params", {})) +def _run_and_emit(req: dict) -> None: + """Run a handler on the RPC pool and write its response directly. + + Catches any unexpected exception so a misbehaving handler can't kill + the worker thread silently — the caller still sees a JSON-RPC error. + """ + try: + resp = handle_request(req) + except Exception as exc: + resp = _err(req.get("id"), -32000, f"handler error: {exc}") + if resp is not None: + write_json(resp) + + +def dispatch(req: dict) -> dict | None: + """Route an inbound RPC — long handlers to the pool, everything else inline. + + Returns the response for sync-dispatched requests so the caller + (entry.py) can write it. Returns None when the request has been + scheduled on the pool; the worker writes the response itself. + """ + if req.get("method", "") in _LONG_HANDLERS: + _pool.submit(_run_and_emit, req) + return None + return handle_request(req) + + def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None: ready = session.get("agent_ready") if ready is not None and not ready.wait(timeout=timeout): From ab6eaaff2610ec236edbbe4d7729c103b816e573 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 19 Apr 2026 07:53:01 -0500 Subject: [PATCH 046/455] chore(tui-gateway): inline one-off RPC_POOL_WORKERS, compact _LONG_HANDLERS --- tui_gateway/server.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 6d0dbea659..41d93db442 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -46,16 +46,10 @@ _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOU # else stays on the main thread so ordering stays sane for the fast path. # write_json is already _stdout_lock-guarded, so concurrent response # writes are safe. -_LONG_HANDLERS = frozenset({ - "cli.exec", - "session.branch", - "session.resume", - "shell.exec", - "slash.exec", -}) -_RPC_POOL_WORKERS = max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)) +_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"}) + _pool = concurrent.futures.ThreadPoolExecutor( - max_workers=_RPC_POOL_WORKERS, + max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)), thread_name_prefix="tui-rpc", ) atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True)) From 596280a40bc2807641a42625d172d97af30a841c Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 19 Apr 2026 07:54:16 -0500 Subject: [PATCH 047/455] =?UTF-8?q?chore(tui):=20/clean=20pass=20=E2=80=94?= =?UTF-8?q?=20inline=20one-off=20locals,=20tighten=20ConfirmPrompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - providers.ts: drop the `dup` intermediate, fold the ternary inline - paths.ts (fmtCwdBranch): inline `b` into the `tag` template - prompts.tsx (ConfirmPrompt): hoist a single `lower = ch.toLowerCase()`, collapse the three early-return branches into two, drop the redundant bounds checks on arrow-key handlers (setSel is idempotent at 0/1), inline the `confirmLabel`/`cancelLabel` defaults at the use site - modelPicker.tsx / config/env.ts / providers.test.ts: auto-formatter reflows picked up by `npm run fix` - useInputHandlers.ts: drop the stray blank line that was tripping perfectionist/sort-imports (pre-existing lint error) --- ui-tui/src/__tests__/providers.test.ts | 9 +++++--- ui-tui/src/app/useInputHandlers.ts | 1 - ui-tui/src/components/modelPicker.tsx | 10 +++++++-- ui-tui/src/components/prompts.tsx | 30 ++++++++------------------ ui-tui/src/config/env.ts | 4 +--- ui-tui/src/domain/paths.ts | 3 +-- ui-tui/src/domain/providers.ts | 12 +++-------- 7 files changed, 28 insertions(+), 41 deletions(-) diff --git a/ui-tui/src/__tests__/providers.test.ts b/ui-tui/src/__tests__/providers.test.ts index a46102e893..2dfd76d022 100644 --- a/ui-tui/src/__tests__/providers.test.ts +++ b/ui-tui/src/__tests__/providers.test.ts @@ -4,9 +4,12 @@ import { providerDisplayNames } from '../domain/providers.js' describe('providerDisplayNames', () => { it('returns bare names when all are unique', () => { - expect(providerDisplayNames([{ name: 'Anthropic', slug: 'anthropic' }, { name: 'OpenAI', slug: 'openai' }])).toEqual( - ['Anthropic', 'OpenAI'] - ) + expect( + providerDisplayNames([ + { name: 'Anthropic', slug: 'anthropic' }, + { name: 'OpenAI', slug: 'openai' } + ]) + ).toEqual(['Anthropic', 'OpenAI']) }) it('appends slug to every collision so the disambiguation is symmetric', () => { diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index b71a1dc392..258cf7cee3 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -7,7 +7,6 @@ import type { SudoRespondResponse, VoiceRecordResponse } from '../gatewayTypes.js' - import { writeOsc52Clipboard } from '../lib/osc52.js' import { getInputSelection } from './inputSelectionStore.js' diff --git a/ui-tui/src/components/modelPicker.tsx b/ui-tui/src/components/modelPicker.tsx index 406047bc11..5ee19e407c 100644 --- a/ui-tui/src/components/modelPicker.tsx +++ b/ui-tui/src/components/modelPicker.tsx @@ -181,7 +181,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke const idx = off + i return ( - + {providerIdx === idx ? '▸ ' : ' '} {i + 1}. {row} @@ -212,7 +215,10 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke const idx = off + i return ( - + {modelIdx === idx ? '▸ ' : ' '} {i + 1}. {row} diff --git a/ui-tui/src/components/prompts.tsx b/ui-tui/src/components/prompts.tsx index cd9c3a2d1d..f9d00dbfe3 100644 --- a/ui-tui/src/components/prompts.tsx +++ b/ui-tui/src/components/prompts.tsx @@ -155,31 +155,21 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp const [sel, setSel] = useState(0) useInput((ch, key) => { - if (key.escape || (key.ctrl && ch.toLowerCase() === 'c')) { - onCancel() - - return - } - const lower = ch.toLowerCase() + if (key.escape || (key.ctrl && lower === 'c') || lower === 'n') { + return onCancel() + } + if (lower === 'y') { - onConfirm() - - return + return onConfirm() } - if (lower === 'n') { - onCancel() - - return - } - - if (key.upArrow && sel > 0) { + if (key.upArrow) { setSel(0) } - if (key.downArrow && sel < 1) { + if (key.downArrow) { setSel(1) } @@ -189,12 +179,10 @@ export function ConfirmPrompt({ onCancel, onConfirm, req, t }: ConfirmPromptProp }) const accent = req.danger ? t.color.error : t.color.warn - const confirmLabel = req.confirmLabel ?? 'Yes' - const cancelLabel = req.cancelLabel ?? 'No' const rows = [ - { color: t.color.cornsilk, label: cancelLabel }, - { color: req.danger ? t.color.error : t.color.cornsilk, label: confirmLabel } + { color: t.color.cornsilk, label: req.cancelLabel ?? 'No' }, + { color: req.danger ? t.color.error : t.color.cornsilk, label: req.confirmLabel ?? 'Yes' } ] return ( diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts index 999607dacf..60f1e80c53 100644 --- a/ui-tui/src/config/env.ts +++ b/ui-tui/src/config/env.ts @@ -1,5 +1,3 @@ export const STARTUP_RESUME_ID = (process.env.HERMES_TUI_RESUME ?? '').trim() export const MOUSE_TRACKING = !/^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_DISABLE_MOUSE ?? '').trim()) -export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test( - (process.env.HERMES_TUI_NO_CONFIRM ?? '').trim() -) +export const NO_CONFIRM_DESTRUCTIVE = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_TUI_NO_CONFIRM ?? '').trim()) diff --git a/ui-tui/src/domain/paths.ts b/ui-tui/src/domain/paths.ts index 6b95dcbac1..43c023b6ba 100644 --- a/ui-tui/src/domain/paths.ts +++ b/ui-tui/src/domain/paths.ts @@ -10,8 +10,7 @@ export const fmtCwdBranch = (cwd: string, branch: null | string, max = 40) => { return shortCwd(cwd, max) } - const b = branch.length > 16 ? `…${branch.slice(-15)}` : branch - const tag = ` (${b})` + const tag = ` (${branch.length > 16 ? `…${branch.slice(-15)}` : branch})` return `${shortCwd(cwd, Math.max(8, max - tag.length))}${tag}` } diff --git a/ui-tui/src/domain/providers.ts b/ui-tui/src/domain/providers.ts index 02cc99b922..83ac016ff1 100644 --- a/ui-tui/src/domain/providers.ts +++ b/ui-tui/src/domain/providers.ts @@ -5,13 +5,7 @@ export const providerDisplayNames = (providers: readonly { name: string; slug: s counts.set(p.name, (counts.get(p.name) ?? 0) + 1) } - return providers.map(p => { - const dup = (counts.get(p.name) ?? 0) > 1 - - if (!dup || !p.slug || p.slug === p.name) { - return p.name - } - - return `${p.name} (${p.slug})` - }) + return providers.map(p => + (counts.get(p.name) ?? 0) > 1 && p.slug && p.slug !== p.name ? `${p.name} (${p.slug})` : p.name + ) } From 393175e60ce119f654d15dad489a8e282a532d24 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 19 Apr 2026 07:58:33 -0500 Subject: [PATCH 048/455] =?UTF-8?q?chore(tui-gateway):=20inline=20=5Frun?= =?UTF-8?q?=5Fand=5Femit=20=E2=80=94=20one-off=20wrapper,=20belongs=20insi?= =?UTF-8?q?de=20dispatch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tui_gateway/server.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 41d93db442..3a48e381e8 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -218,31 +218,27 @@ def handle_request(req: dict) -> dict | None: return fn(req.get("id"), req.get("params", {})) -def _run_and_emit(req: dict) -> None: - """Run a handler on the RPC pool and write its response directly. - - Catches any unexpected exception so a misbehaving handler can't kill - the worker thread silently — the caller still sees a JSON-RPC error. - """ - try: - resp = handle_request(req) - except Exception as exc: - resp = _err(req.get("id"), -32000, f"handler error: {exc}") - if resp is not None: - write_json(resp) - - def dispatch(req: dict) -> dict | None: - """Route an inbound RPC — long handlers to the pool, everything else inline. + """Route inbound RPCs — long handlers to the pool, everything else inline. - Returns the response for sync-dispatched requests so the caller - (entry.py) can write it. Returns None when the request has been - scheduled on the pool; the worker writes the response itself. + Returns a response dict when handled inline. Returns None when the + handler was scheduled on the pool; the worker writes its own + response via write_json when done. """ - if req.get("method", "") in _LONG_HANDLERS: - _pool.submit(_run_and_emit, req) - return None - return handle_request(req) + if req.get("method") not in _LONG_HANDLERS: + return handle_request(req) + + def run(): + try: + resp = handle_request(req) + except Exception as exc: + resp = _err(req.get("id"), -32000, f"handler error: {exc}") + if resp is not None: + write_json(resp) + + _pool.submit(run) + + return None def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None: From d32e8d2ace98a24ce22d014ddf8da44812aee37a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Sun, 19 Apr 2026 08:56:29 -0500 Subject: [PATCH 049/455] =?UTF-8?q?fix(tui):=20drain=20message=20queue=20o?= =?UTF-8?q?n=20every=20busy=20=E2=86=92=20false=20transition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the queue only drained inside the message.complete event handler, so anything enqueued while a shell.exec (!sleep, !cmd) or a failed agent turn was running would stay stuck forever — neither of those paths emits message.complete. After Ctrl+C an interrupted session would also orphan the queue because idle() flips busy=false locally without going through message.complete. Single source of truth: a useEffect that watches ui.busy. When the session is settled (sid present, busy false, not editing a queue item), pull one message and send it. Covers agent turn end, interrupt, shell.exec completion, error recovery, and the original startup hydration (first-sid case) all at once. Dropped the now-redundant dequeue/sendQueued from createGatewayEventHandler.message.complete and the accompanying GatewayEventHandlerContext.composer field — the effect handles it. --- ui-tui/src/app/createGatewayEventHandler.ts | 11 ----------- ui-tui/src/app/interfaces.ts | 5 ----- ui-tui/src/app/useMainApp.ts | 15 ++++++--------- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 699a3794de..8f45bb3d7e 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -46,7 +46,6 @@ const pushNote = pushUnique(6) const pushTool = pushUnique(8) export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: GatewayEvent) => void { - const { dequeue, queueEditRef, sendQueued } = ctx.composer const { rpc } = ctx.gateway const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session const { bellOnComplete, stdout, sys } = ctx.system @@ -394,16 +393,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: patchUiState(state => ({ ...state, usage: { ...state.usage, ...ev.payload!.usage } })) } - if (queueEditRef.current !== null) { - return - } - - const next = dequeue() - - if (next) { - sendQueued(next) - } - return } diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts index 353c56535b..af13e047c7 100644 --- a/ui-tui/src/app/interfaces.ts +++ b/ui-tui/src/app/interfaces.ts @@ -193,11 +193,6 @@ export interface InputHandlerResult { } export interface GatewayEventHandlerContext { - composer: { - dequeue: () => string | undefined - queueEditRef: MutableRefObject - sendQueued: (text: string) => void - } gateway: GatewayServices session: { STARTUP_RESUME_ID: string diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index fb48badea9..e0c18dec64 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -380,12 +380,13 @@ export function useMainApp(gw: GatewayClient) { sys }) - const prevSidRef = useRef(null) + // Drain one queued message whenever the session settles (busy → false): + // agent turn ends, interrupt, shell.exec finishes, error recovered, or the + // session first comes up with pre-queued messages. Without this, shell.exec + // and error paths never emit message.complete, so anything enqueued while + // `!sleep` / a failed turn was running would stay stuck forever. useEffect(() => { - const prev = prevSidRef.current - prevSidRef.current = ui.sid - - if (prev !== null || !ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) { + if (!ui.sid || ui.busy || composerRefs.queueEditRef.current !== null) { return } @@ -416,7 +417,6 @@ export function useMainApp(gw: GatewayClient) { const onEvent = useMemo( () => createGatewayEventHandler({ - composer: { dequeue: composerActions.dequeue, queueEditRef: composerRefs.queueEditRef, sendQueued }, gateway, session: { STARTUP_RESUME_ID, @@ -432,11 +432,8 @@ export function useMainApp(gw: GatewayClient) { [ appendMessage, bellOnComplete, - composerActions, - composerRefs, gateway, panel, - sendQueued, session.newSession, session.resetSession, session.resumeById, From 923539a46b801a1ba993fae13f3a02eb91d51c7b Mon Sep 17 00:00:00 2001 From: Austin Pickett Date: Sun, 19 Apr 2026 10:48:56 -0400 Subject: [PATCH 050/455] fix: add nous-research/ui package --- .gitignore | 5 + ui-tui/package-lock.json | 38 ++- web/package-lock.json | 236 ++++++++++++++++++- web/package.json | 4 + web/public/fonts/CourierPrime-Bold.woff2 | Bin 11588 -> 0 bytes web/public/fonts/CourierPrime-Regular.woff2 | Bin 11192 -> 0 bytes web/src/App.tsx | 107 +++++---- web/src/components/Backdrop.tsx | 77 +++++++ web/src/components/LanguageSwitcher.tsx | 2 +- web/src/components/OAuthLoginModal.tsx | 2 +- web/src/components/OAuthProvidersCard.tsx | 8 +- web/src/components/ThemeSwitcher.tsx | 124 ++++++---- web/src/components/ui/button.tsx | 2 +- web/src/components/ui/card.tsx | 2 +- web/src/components/ui/label.tsx | 2 +- web/src/components/ui/tabs.tsx | 2 +- web/src/index.css | 242 +++++++------------- web/src/lib/api.ts | 24 +- web/src/main.tsx | 2 +- web/src/pages/StatusPage.tsx | 2 +- web/src/plugins/registry.ts | 2 - web/src/themes/context.tsx | 213 +++++++---------- web/src/themes/index.ts | 4 +- web/src/themes/presets.ts | 215 ++++------------- web/src/themes/types.ts | 66 +++--- web/vite.config.ts | 54 ++++- 26 files changed, 798 insertions(+), 637 deletions(-) delete mode 100644 web/public/fonts/CourierPrime-Bold.woff2 delete mode 100644 web/public/fonts/CourierPrime-Regular.woff2 create mode 100644 web/src/components/Backdrop.tsx diff --git a/.gitignore b/.gitignore index e516d154f3..8b455cf506 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,11 @@ environments/benchmarks/evals/ # Web UI build output hermes_cli/web_dist/ +# Web UI assets — synced from @nous-research/ui at build time via +# `npm run sync-assets` (see web/package.json). +web/public/fonts/ +web/public/ds-assets/ + # Release script temp files .release_notes.md mini-swe-agent/ diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json index 0b33e6e334..1e8e5cfa4f 100644 --- a/ui-tui/package-lock.json +++ b/ui-tui/package-lock.json @@ -89,6 +89,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -318,29 +319,6 @@ "node": ">=6.9.0" } }, - "node_modules/@emnapi/core": { - "version": "1.9.2", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz", - "integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/wasi-threads": "1.2.1", - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime": { - "version": "1.9.2", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz", - "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, "node_modules/@emnapi/wasi-threads": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", @@ -1484,6 +1462,7 @@ "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.19.0" } @@ -1494,6 +1473,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1504,6 +1484,7 @@ "integrity": "sha512-eSkwoemjo76bdXl2MYqtxg51HNwUSkWfODUOQ3PaTLZGh9uIWWFZIjyjaJnex7wXDu+TRx+ATsnSxdN9YWfRTQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.12.2", "@typescript-eslint/scope-manager": "8.58.1", @@ -1533,6 +1514,7 @@ "integrity": "sha512-gGkiNMPqerb2cJSVcruigx9eHBlLG14fSdPdqMoOcBfh+vvn4iCq2C8MzUB89PrxOXk0y3GZ1yIWb9aOzL93bw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.58.1", "@typescript-eslint/types": "8.58.1", @@ -1850,6 +1832,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2185,6 +2168,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", @@ -2870,6 +2854,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -3765,6 +3750,7 @@ "resolved": "https://registry.npmjs.org/ink-text-input/-/ink-text-input-6.0.0.tgz", "integrity": "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw==", "license": "MIT", + "peer": true, "dependencies": { "chalk": "^5.3.0", "type-fest": "^4.18.2" @@ -5121,6 +5107,7 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5220,6 +5207,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz", "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -5992,6 +5980,7 @@ "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.27.0", "get-tsconfig": "^4.7.5" @@ -6118,6 +6107,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -6227,6 +6217,7 @@ "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", @@ -6635,6 +6626,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/web/package-lock.json b/web/package-lock.json index 71ca2c7a7e..47c6595ab6 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -8,6 +8,7 @@ "name": "web", "version": "0.0.0", "dependencies": { + "@nous-research/ui": "^0.3.0", "@tailwindcss/vite": "^4.2.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", @@ -64,6 +65,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -985,6 +987,66 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@nanostores/react": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@nanostores/react/-/react-1.1.0.tgz", + "integrity": "sha512-MbH35fjhcf7LAubYX5vhOChYUfTLzNLqH/mBGLVsHkcvjy0F8crO1WQwdmQ2xKbAmtpalDa2zBt3Hlg5kqr8iw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "engines": { + "node": "^20.0.0 || >=22.0.0" + }, + "peerDependencies": { + "nanostores": "^1.2.0", + "react": ">=18.0.0" + } + }, + "node_modules/@nous-research/ui": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@nous-research/ui/-/ui-0.3.0.tgz", + "integrity": "sha512-konGgtV9lkzqYkWuoUGnROqavq1svTnGbERLKItvEXmsRz4xRtbAMHI8rK6sjGpHDpwvOUN3olcOhRLTGuVfcA==", + "license": "MIT", + "dependencies": { + "@nanostores/react": "^1.0.0", + "class-variance-authority": "^0.7.1", + "clsx": "^2.1.1", + "nanostores": "^1.0.1", + "sanitize-html": "^2.16.0", + "tailwind-merge": "^3.3.1", + "tw-animate-css": "^1.4.0" + }, + "peerDependencies": { + "@observablehq/plot": "^0.6.17", + "@react-three/fiber": "^9.4.0", + "gsap": "^3.13.0", + "leva": "^0.10.1", + "react": "^19.0.0", + "react-dom": "^19.0.0", + "three": "^0.180.0" + }, + "peerDependenciesMeta": { + "@observablehq/plot": { + "optional": true + }, + "@react-three/fiber": { + "optional": true + }, + "gsap": { + "optional": true + }, + "leva": { + "optional": true + }, + "three": { + "optional": true + } + } + }, "node_modules/@rolldown/pluginutils": { "version": "1.0.0-rc.3", "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.3.tgz", @@ -1638,6 +1700,7 @@ "integrity": "sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.16.0" } @@ -1648,6 +1711,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1707,6 +1771,7 @@ "integrity": "sha512-XZzOmihLIr8AD1b9hL9ccNMzEMWt/dE2u7NyTY9jJG6YNiNthaD5XtUHVF2uCXZ15ng+z2hT3MVuxnUYhq6k1g==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.57.0", "@typescript-eslint/types": "8.57.0", @@ -1984,6 +2049,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2092,6 +2158,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -2269,6 +2336,15 @@ "dev": true, "license": "MIT" }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/detect-libc": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", @@ -2278,6 +2354,73 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/dom-serializer/node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/electron-to-chromium": { "version": "1.5.313", "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.313.tgz", @@ -2298,6 +2441,18 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/esbuild": { "version": "0.27.4", "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.4.tgz", @@ -2353,7 +2508,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", - "dev": true, "license": "MIT", "engines": { "node": ">=10" @@ -2368,6 +2522,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -2718,6 +2873,25 @@ "hermes-estree": "0.25.1" } }, + "node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -2778,6 +2952,15 @@ "node": ">=0.10.0" } }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -3223,6 +3406,22 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/nanostores": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/nanostores/-/nanostores-1.3.0.tgz", + "integrity": "sha512-XPUa/jz+P1oJvN9VBxw4L9MtdFfaH3DAryqPssqhb2kXjmb9npz0dly6rCsgFWOPr4Yg9mTfM3MDZgZZ+7A3lA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "peer": true, + "engines": { + "node": "^20.0.0 || >=22.0.0" + } + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -3300,6 +3499,12 @@ "node": ">=6" } }, + "node_modules/parse-srcset": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/parse-srcset/-/parse-srcset-1.0.2.tgz", + "integrity": "sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==", + "license": "MIT" + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -3331,6 +3536,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -3391,6 +3597,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", "integrity": "sha512-9nfp2hYpCwOjAN+8TZFGhtWEwgvWHXqESH8qT89AT/lWklpLON22Lc8pEtnpsZz7VmawabSU0gCjnj8aC0euHQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -3400,6 +3607,7 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.4.tgz", "integrity": "sha512-AXJdLo8kgMbimY95O2aKQqsz2iWi9jMgKJhRBAxECE4IFxfcazB2LmzloIoibJI3C12IlY20+KFaLv+71bUJeQ==", "license": "MIT", + "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -3509,6 +3717,20 @@ "fsevents": "~2.3.2" } }, + "node_modules/sanitize-html": { + "version": "2.17.3", + "resolved": "https://registry.npmjs.org/sanitize-html/-/sanitize-html-2.17.3.tgz", + "integrity": "sha512-Kn4srCAo2+wZyvCNKCSyB2g8RQ8IkX/gQs2uqoSRNu5t9I2qvUyAVvRDiFUVAiX3N3PNuwStY0eNr+ooBHVWEg==", + "license": "MIT", + "dependencies": { + "deepmerge": "^4.2.2", + "escape-string-regexp": "^4.0.0", + "htmlparser2": "^10.1.0", + "is-plain-object": "^5.0.0", + "parse-srcset": "^1.0.2", + "postcss": "^8.3.11" + } + }, "node_modules/scheduler": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", @@ -3647,6 +3869,15 @@ "typescript": ">=4.8.4" } }, + "node_modules/tw-animate-css": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.4.0.tgz", + "integrity": "sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/Wombosvideo" + } + }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", @@ -3666,6 +3897,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -3751,6 +3983,7 @@ "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.1.tgz", "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==", "license": "MIT", + "peer": true, "dependencies": { "esbuild": "^0.27.0", "fdir": "^6.5.0", @@ -3872,6 +4105,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/web/package.json b/web/package.json index 09675d283f..e10a10127e 100644 --- a/web/package.json +++ b/web/package.json @@ -4,12 +4,16 @@ "version": "0.0.0", "type": "module", "scripts": { + "sync-assets": "rm -rf public/fonts public/ds-assets && cp -r node_modules/@nous-research/ui/dist/fonts public/fonts && cp -r node_modules/@nous-research/ui/dist/assets public/ds-assets", + "predev": "npm run sync-assets", + "prebuild": "npm run sync-assets", "dev": "vite", "build": "tsc -b && vite build", "lint": "eslint .", "preview": "vite preview" }, "dependencies": { + "@nous-research/ui": "^0.3.0", "@tailwindcss/vite": "^4.2.1", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", diff --git a/web/public/fonts/CourierPrime-Bold.woff2 b/web/public/fonts/CourierPrime-Bold.woff2 deleted file mode 100644 index 4f6d5e9c863cad49d54112e119f708ed9f644d74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11588 zcmV-KExXcpPew8T0RR9104+oS3;+NC0BSG*04(hQ0RR9100000000000000000000 z0000Sf;0wT0D=Y)2nvC`RDqWO3xh%c0X7081A{^YAO(aR2ZT%;#ZzfLfR%=rSX2Ms|wyPJk4C4O2{mVuN(2{xSU>0i9&{5DX8&w$EXZ>=F^OwlT(Ih*8^NNL9K+ zdR24^uK%{vBtF1FleBwnR(NDj^5oe8m@8AQbcf4b0`fdd-Q63O>YWKf@}0#gi7vg5 z20Z{~J%|OcPb@$XG-oIS+NU6o#geK87y5z(stQNF@n$JYo}r9`-Z+yPYg3oH^z-zC z?9G;1B#K*Ea*D$+GFEur{FvH5vO3P(Sf4CfZf=Yi8sHhLu?2cB`p%IkT)FWmkEZ62~ORHKHt#pxj zd>8r{l!#;}Hz9~swST$Xs?6C6K=+ayJp8K8vj(EU*C2ph`LXpXO~VBMX&Ij%F=3Gz z=cquimtm)-dh9&tuC3be9h!FTWP4o&phMR(`MM@_kXBv3e)$Z5r%MBdthLvKIp2jz zv|iSy`n-b$-WqRhD}H#FdP8$!3wRkR@fuZA3!|r=yp1|c4WW-xfKiA3o9R}BBMp0BOO{miE0@0 z6p;^%VRy0!wJG@&6m@2;A|Ol^L>O*>9hR`*V_~AmU`K#Ps#VikDMB##MG|_mV5S-l z`4I}?y_@(+cZzYmwA2JmRf++%TIwFBGFQ1x&l*!3KYK0fSdq9l>kZ+)*|_+rUBAO z$V{<9WB5Sg_)OTO>@=k)Ol>q}bf_Xsa7nY->*E7>L_QNd(Klr0LB&R8o(8vO;sSJD9Efq= zuZ$b$)pa`bT$k~a!mMQjB|<}r(wg*J2h+$&0lknC$%>v1u;B{=BSPNhZ-A`p&)^I5 zKwJL}6$doG5`421B))*pTym-06>JjZ`rin?%2|TWp?jnqJyP)$#3AIZKMEeI06!20 zLvh4iKmdswS)}2VAX+F2vrh!x^e;=~^FoN=8FgQSa*Vt=5w^M)@*i~KS(a^;Nyqqz zA6n?rh8>8HSn{7KCurrm{nQ_v0<0BvJ2HE$F&qPSp)n=!fEI$~5+-d4tu=3S#~hHi zNxEb-ZRA$)XyP!;OiPs<>ob=S?gsRr$|}RIq}zhjE+*3l=tvS-8olC=5b0flg(24s%aC5T~OaETL5a+I|@Y zRoA;AU15-Y*QOZ;zWsauc6zM)oJ)^f)XkjPJ9?^YZ*}i96`B|;|H{gb64UHFfpEiu zY(v2N|IMSFS22QG-}1_NT}v(4l6;E(mU2EIInzG@f znPX-tUnAENp_!kY6S~H;bbeqaQ;i+%^LCYE8DzGBjyi}KhZpA&QnjKG><9n&lF~@3 z_)fAMnHZ)K*`8*TA~O?Ko(HYT9Kc0MQ!R7KwvR&?v2XZ4R%?twSwSWlDb$w+7n-;;C-T!hu677S=Xb9 zR7G=d(vl8FvP79{UdvS+35r_eIleeEBPfzgX%#a>Jaq<6HF~TuXVH#Zxg(lKMEQ+- zB!xW#MISj@F=~27@91gLbQ>est!8IXTp#RNWbKQaaT~K(N~q7l0*ZFYX_4?s*4?ri z-<&GlzOkiGO+8H!U+=DCrh4kGfGELU1o1JdCbG04Imhs3vb|GjU6XQ;=yk_Cy$5a> zw>#|_ZJ14=c*|E`Iw&4EEFNKzP%z-kPN?ivv>#nUjZfx-1M>Ehrm7d%jTqKDsvN>J z`DDzk<99x4$eJ=V2cSm6kBOL5*^2rJw%SRJz6d^E5kEwnfZLpd6l%^?mZ(+YWqVLA#bEwz|I+;v~8S<4)c<6lMWf-~O)Q4j3x@YE1)wFn| z6v-92#-@*Hh$rxGx@Q}v=m^X_=4Nq1>?<1A9p&L3coVNe84^&NN23)^pJ+(nI^sbk z0x3^Oy%2QYC=LQuBH#c|in=3<)XFauga!dtr$e11`Goq=2ql7k=+)qIe@xhi*ej~V zxJj7tZC5x0vb5rUQ(=FyD9Z}ab%{e7%e*k~CBg~;!hp<44T2K_jSTuizQ8jw+%f2= zP+=Q0n+>Kj`^s!k1QQwBxz-IS&IC=zup216U^p-ZSb-i`MgPM?Oa~xSm=2V0Wx0cl zfX#6~nL9f=sgk~mV8E2bA+rd<@SYxE$%d%sMJZ{^Y*J}V5FBGq1r*&9v{@mF6!kNNK?zJnk}Hq zz~e*}Bz^g$PaiEMVEHn=ku^>xwAOhPl{uVA_hw%!< zX9|3T;$y zXeuo$X8%V;@jLWYm|$nFrV zDeGEiHM{k>z7-i#Cgos^f>2(+97%%G$RJLm3=m!McvQ?#55@tC%CQdeZU2kXDc7nAjSr-GhI~(`{%|oMPeUx0ZxlDMmi<6lPGcp&w-Fs=s6uS@Gf*U6J`E zi2@L+^sKYSUc3%N8CnD_D*qUSTj1HZcFf#Kb?21EQL=LegH`^9tUqqB3mwv=5w2E5^{=Y9XIpIDsZiYG?lqe-F5n<1FbN&iq(kTm^}?v z>6?3_#E^QvatZ+96dF~jkSQgb^ih;F-Iis&@^AgCVHOU0??5KHiXSlV7SnDq>$-?c z6^Uh{&jiVA7}V`7B^AuRUjH@B8fZABzZ_XF*P>|c9!(%vlc&OwS9arUp!l(9tw$!v zrp3EeouC-XV4BciGUZRcIpoz?b9b!Ib;{_oPNquT5(7v1a+=X&oLGkKfqjcL^#z%T z7Aw%9Eai$GS(nB45J)6dzhTI6tvJ-I1Ai^{mvB5YU+K*9mzAp8VvWySvSrnJYgV?g z`KP~Tv7ZB9pmM>S(FN(${r}3WW$E-YPlNFWG_dI;so2Udm10&)1F4{kSJhdm4mE)NH{ z5XzpBbWFul9U^enAYiMEP#N))!7F~%l{2Kf3hC1gDCU=w8X*}FlKEIrVn7e7>ARJF zUQ{0a5|s!M;5)=%Y==)1qYo86)063kg_Yf6n$NNCtd!xT;pUAA;D<`Y=wVO8Od+*s zL5Rdfly=U5_t3Zmla_9&$@I%^KMw2yS8{?-&MTK&MBsUfcnk7`One!=df`V9uU^JU zZT&(u)Fm!#TNpB1Yh-F^))l%hJQjp@nOT||tu@oIK9;C!NVRlmdR(B%0Z??mcwxMi zp(!b$Z%>RD>gUzwlOb5>A><9Q_;iP0>OQsQHidFqiQeh=x&aOSu$q7`0efvpA$Hb! zs~PtfAIwa&@|sCo(0hDeH*R|8{oZD`^tiUP?M~l+&GxZa^m`4`<#-8vX4R}dWDvlX zLieF5nx{tqHyOLN+A!QM0-so(CW&ny6_)87g(Y5I-Mwn5h6rdU`(l11yr=m~n1XEV zJ~lEJH`W<85|`AlJ2-Qd$~V$+cr6Z37kKnKIR+E@=UTS`^h-^BXlT7a04z8bLks?_j8T`kl?$SbZjM zS6uA&%+$oKiM}zFbs3T!lkw#S?mBpmvriz`OZ4vib-+Bnq=o#*-byBKWy{IxARJu& z1?84GMkrmDr?ZXCmAV_8hYx`XXA3!dvUE{KkGe!Zzuzppd6O|?ekKqPHaV;G5#IX< zq(5ukQA}Cn8|s2j5LEEA#)35T^0wKo4o)G8kt=b;^X`hz9v*jjct=St169b;an1AP z5XqwEX#oU%QWv--Sf-SBKEOF<^;aa?z)+wB!^?;K>ide(47f%xAOWMi8{q#dQ)6TK zJ9I#t!z2c&lYyg;SbhSn>Jv4!e~}CS1kl$+MEi(^E|JtflZX7JDc35n6)vX7?f3q*pD*pQeUc%H>N1&1fb1@eNTgT-M&tBAIioFlCk*-gOFfa_}ByWyw! zDmyjWThQp^FegT5z=t=`ae=ds2f3$LKQo4b??u%q7Hb}!HC#%ym(|T=CLXQ0_V2C5 z)LF97ImVTgujFmv&C=r3*r+5)bV?e$$B8X8!xPPJLNd@fI(va{f{kxFf%rYFWwZHu zNvn5!@pMYsxV)tF(2ews8_s31Q+wQ|v+mDQize10d&UwSX?!x-O&CXF#^of031xAy zpl_j4-f*F{S(`r})Pr!xul3j(q@bD$%yTFD1TFd zT)y{8Y)cEUc*+a&dQzcqLTtS>*?>)6HM}R$;Q;|rVWFYGf-^6E>|;s1Kp=^Yi{l_q zijM<%4^8hT@Z=e-M#_;qfyWRJ1$mo{%91fl#5$QH=m$noLOlwi7&+}fPV$EBRW&Dn zYAivJ3kI% zROHjz7Mg*QArzke7XWU-_|V<-_wEMK$u3z!a8j62_~xpZtoqWN_*+|Aq9yjF_@EH9 zdGO!^KfYIkrTu4&!zrjVydv@Nc$%X1PH)+lzOtBuDw(3A+Obs6h`BM})b#LC-d?O& z^`T|Mr_rTZ&G{! zha2OU-23J1`EEf9FXJp2T+}>+H$qxI#mxtuSy-9Xz4^-g?C|xT_&wxg%3nqlTk(Re z`@%xr!pJ`Z9aeL*tyb>_1`o#62S7X7fLf^j>ulAZpT~{=bsc=vQ8o>kwmG{;YcFVX zckDVlJG<-5j$AN!sak}|7H8wI^8e4U6#uwB6SiN6rnsE{U>URok2La(N)Tj^cUc>a znx+2rgc_Ro;}s3ahMPwK`=!RLCXDXRaXHEP`Qmik*tt*I(H-8-Z|0QQ`KOlc+G}DQs?lXIZ`#Q2JGZ+4DWsyH91e z*m=cPAc;zz!^?%Zo#-(q~eKg&gO54#|o{@p#J`@i(es}j%=%|HDeR&#S` z`)6QeBn)}>k94hUzA`_*`D%AJ1?5Cnx`B6)>fNmi7vA0edGN3}^4VAPA^VO94+twO zt0Xd6RZ`^*MsDiRnV#;@<{pu`b~GK=qU6 z@GpP)#{4aCDQvh%Ey0+Rc0OqlX#q|G`n*5PNqrMW^kjpp3vQR^ywG_3+dy_&NLUy? zgKV0HMgo zYYH+s_Wdzw{KVkn@5dTke8B_od#Ft|3MoEaMHg;l_VfVD++98t^&2T9DvC-{Qn*e* z6w2YM#pmM(Ssj_Yzti7;(|OAmlhvBK_7;f`Ghb%%mEnmXOzbb*TQ7RjNn%6hOsn@i z-1ks@ic4M>=A>snsgPP%6F*S72Y<_Cb|rB|Y2k>5d1@_JQxTyI*>rETig0UVAS9gy#<#PU$O z2AItM-eTSFB-mZCOV~#bp;0M3DmS=|agg0?y#aVy@mgrqsnt)^e4!!dSw{!36f~#q zNN|scf%NDyM@HLRT_JWcGRN1yEHV;U3fL?Oi>eTb2-8k15YVT71_v5eZx<(iq>E2T z0gG8C6f))yNQiJREzA{TOd^P^ZAoKNcdbv(@T=OoXFWaj&mY&c?dOy+?AR9FV7wNx z@H@VVH;@*JagW%MklMpxQLCR5N^jMR9z_!2WE1zL2j~U$LQ~=ELFfk5WEN2uBinJ5 zam2vtuiO3B)`i0v_}j$H;b*m_PdD{cKKrS5==h4uwv#3y^2j()A0-ZXcWlI3_stQ} z2D%#@QX3EWd0F-=yyh|d^~{O+6HAKoIT1R0{M-z1_@v$u;V$4oTIkKS@2mJ*7vDU5 z6L$kI#eQR;Z+-~H7^+S)%c=`9bFv*;eMOBIs`4JRw*$+_Mvs-E4GIN4P%=B0;a4aU zVfs~O1Melg7W%pJ_=Kb*B6MrU9--U$5mGWmo)H1e92sk$R7JUHoyjqPA9+*b4|biT zpZG7iGdD6G`N8g90MbJDbnesPV+^lj@a{jK7h_wezps7__XlpHETc5x?dwe2e)V@o za$t4dz_y?egX;J#v(B^8hQip=6LS_-`XboM?Za_B)1B*jZVwi2sFlxoUye84m&$dlO^y*9JdfYg*)FeyR?M zPK5!hR+lef25F%u0_FLgnV_XoCzQ@)X2>Jcm*Wh(gqr3hBc%E@mRYH#H=Zi(QVTY8 zSMZLD+wSm5-W!d%scA_v$Xuti-8*g@pWHYSlrvNolBn7d5P~#rK4@!&LYOdp8 z#wp4^!O)Wa7Zz(3F1Yz{!p!k=v+y~&I_^1ZNXrMEBRjQ_(&?$o3@oV(B0_b3A^Iu{#?cME%D%tan}^sY6D zt&o_JKAXAsWz^nyG|Gj<>H~s`y(3ix=YWiVL~#rIz9ogrA{5 zYJBo^1vVT)(v@baliNeVO*vszmn6m)sT)Li2LzuV!A4Y&h3Bh88j&Og( zqrud#P#QBmar;C<1(?YGH8UPtyjg8*7Sz&`L@q1@79mJkTYYnJY@VhjT2hgmCTVPZ z4GcYBtU<~pkcx^xz}lL{=R>%P4PEjgBQ_Ttupr0AQoOu?rJBKjES_LSy2;1Y462iR zzrNJ6t?>;LHMWjv@T;`<$mXeJN;Y|mguB$@1Ig#o)p>ocgSN-7=u!DEOUpUe2XJt# zGo(y(OYKYRUZ3)g5fxnB#+7wJ@2|OAPl@zLk0TKnSjYc-5Y2+umrnCvst5j8(LK-z z6wc$tXNG-w-(gQcFKYDn4x*>02Qd;G8=DaZp$j$+U#QH#*W8S@3ssGcVQ})5jUg2> z%YO7)Q_C}_3rzK!BW##_abi>yb@7?x)vRw?aQd{B=~{HZrGO$E6vu4O&u1)OhB83= zHG^9^mh85m(Ke}6iU=E*O^m$NPUkoGHLWigs_4upx3Z#h=~2;jPO5d5caQ8aw|5K1 zY_|@qJ5*tBpXKhE<8pzXX`TiK7le!OV*zf2vkk4hNg`>LXfY_RVJx-Q|Eg5hzgcSQ z?qdtRaWhBfL$rBE8te}9~56X)YW3g`PKhz~eMms0CGE$VhK@xbW_X8QjpnLp+ zp1=fQk}-$ey^iv}R=abE)Q4VBkbyR#>Pui6yZpyqNC;>vW3;)VqMG?vaB6V<@rEA+ zyKwC|C)D=CA59Rn>>4biKh9O+nT)O^lPl2z)*{K7OA6!!W0{MM>X4oBK{JWD5bs?5qe(p3cjJ5`BU$b$@<{*SkA?3L}Er z!ZTiH!RV+o4sw78-6u8+b-&|X_xe|D9Xm=K+{nqts|HdT^Hl~@mKHhFH?W?s|9M;O^pd_>`m-TP4H zHgc@|7$6>=dYZXomU~g&0QBh%pE&b6@6G>a0LfUKnGf^0MRWp9R3gmUhY0GqR{$W`s}PaDI<} z3;L6mVA{T8+DIpAB^%{d7Xm+U6Iuj1I0Rbw=Y~VA4DWuL8|>!>mFnp9)6&rVevJF}0dGXL2%A8De#~09W#a!*&+oiTJ&zuT z!UJ5iNHhmpfJheo!K`58^s`9l_JsAJDYmdd-QMsV3cxYx^y|~zB zV($KI2M39BF1O9y+$0xVV;C@PT=ngN(M#XC> zT$>Yz6<9(zcU+Vb9SfZeV4Z<_4EuvcF%eO*MfuSZ>R_sXz(^U4s7dGrWFKCj|+?Go1}*Ult$@WNW9s;B)wI9cei`J52fXluk@ zMsD^ZE1Zivn>g$ku{SbVoV>|*g(c7zebx_>xvp`H^ zCZ-67Bbi2~&rJ)gjMt#q5C{6r7Lv4Wu`= z9|>ey_*0M)n&NPF`|scEfc9mkS052f`?gt|@lw+hY9^Mp-yyroIhi>;OY8C3(usH7 zu5iLv7^~jc0^@tdM^8)SXu6BwULfi}RA`nyI#8ajb}0DneeBwxKR=Jt{?Ix4w&%x< zr`1)yqxo^cYrcSX9rdu$ClP8KHPS?rqN`pRpq(FGz@kO@+j$L<^Vbrv>A<1Uvx0-Lc?enS-uSgHz^c=pB`xQvE( z9w4h{S;^&A3IxD{V=9yfJy`S_Or;1?u;sAIIVXn!t$(FDi$vbWep4{L}Cc6xIP z@9-N#^o;N7?*cs+@0T9-*1gZC>8`c-b>zfH`-4~Nekm;|xt~-?V}>)6(cXTV08oS} zXhz5&tMndxG9YcP#=vcCR~NH8cmBEa?;SIj=+OpaR;-G;jf^|rBSB@qqp?xXGk;DZ`I{jhCygA>v9!!NE*U{QHuEQuo}1(5%#ulj zubaEScNd%g6N-?hDU@mZ>ID0*RO(mzglhX~3gtAsm3G$qN8Is;D7>H^ilS1ZXf$BK zu^2J|i5tDJi<7Ndy|7sc{O>t-&!2wA(}0_KH}nV@c~vQVNgz7#)M9ADQ#`?i{sh`x z!)We~+DVa;?~ooT$9=&wWBTnZbuxKTl;gs%eF|+q2vW-UkMp}?>)5p{XeS%At%QlL z@EU)YO80*`+$1+>t?%v=14lT;+okQnSTm?(x&a(8tj8$Oho4`{5f7aS!xi=x5y*qn%^j5ngv?^mTp&&*b zCunuJ2XIQqvn>_vF=saGl#o`?F$`9RR{*EB&I+a{vnQaP@(UEO70xIESaDT$0IS10 zfKw>L2tVy`&`F}LnmH8Tgi9cvua96pD@-@I1Dednh z;(^PnO17bE?uhqn&+M;ozHq5t{Ad!`JZoS-REq>};_hU8Sl$^XdZEXVMx4v1inp3u>F(G!M5`a_A zmj=CWmYG;ifUIv*tmc%QJcKN6ua?+wN=)refFD5expqV&fNSDp&CzRPJk9Hih(@?3 zHJVy8jR39*JLEHTERs=d7L%z&&NW4V98Ij)PbG4$2~)e%%<_>CXe3&8l%5(#sAP;w z&Wvnm9(z2=ztF7uJ!$cBz}G~0Qq(VDrlygQ{DXON);ZpGa$xLsu^(j>V7{lO_3>D$ zvEL6wO#ygxYkrlV{eSw9ufOCk>!=f$RU{|?0)N3-mtU>t{jqBm;$0y~ts7 zBj^fpMpj8*PMJs=KDg)Dqt&e4PN;IoUh^gm=A4vMWx`f>5c(&c9DavXD7u=;K+llA z7(npt<^k#Gwu|mNww!bYVo%7N>(f{uXQM%yW@8JY-0L1bcJ<>>F`~4ZI6`CTocazA zdmf|tm7g`{YGr6pHmBl!u_uPK@k*8Ypm}a0C_&3GCuQZFqzaK`#$B#|`WeLsb%k^Q z+=S$mQE<110?@D9Nfj&dyWurx=EkHNKtcV4Xl#*)lM7&DPk_7;881w9CAG4Hbu!vG zOmav}y0j8Y%lZ9`DR)(hMj-xbDV>ysX*l-=h8=16j3L7s)~nnj=btr zxpWpYY(qsT))!(iiCN;5!(v(+JXA&*ZI3guk$BZ9=FB3;FQ{EZ%KzgM^2o%aOH-CjDm`W&YcHO3|_qX zVDjb1A4>qXKpb2_g7NT$2qh3EoRCO_NKwS1#fTLrUV=nPB&3q1NF|d-rA4b#2EEp% zSGQe8ba|>-Uv$LToHeX-!BX_t?+c&#ll`V$bH$8w8I&^JkmaiDZqvHymS1JN?~c0; z%JDl}JoLbVTyMSCDPNufMGDF7RcyTyr4-7QsZi;SDmALrs#oWveHv`AQKNT0n03hE ztmUES7CoVM#BnEFbks4Iw3~F=8S{Gl76cE1lv*4$k#&MVMvD;}twsiq{ZN*N)EFk~kZCe&CHMq}FE5{~AAV6d{Q4>-&X z$FXaQV*17}GH*-OyCcc6IK&Cx12?lj_=c{#&iWA=`y`k6ujT*x+?n0|DY~DDlJY58 zFb?NYNoh1ff_uWDLY^VmW~VZzOc9|K6e(i{f)$iefMOyRAc`WWV8zfGwN31;T{>4T zovZvgUG~pKf4KcO5C81v?o9I1()zrMqLq-vA|*p762(H{{0bp0!5UIY_5$E&P79Q@ zyH1ckZ$-j2w-lt zt2L|FPTLTnx+fUW_CN>sD^Ul;OAo9%*S_rw)1kt%gyT|C+)QEDQ2l$GW~*=Q8dw+7 zg>?i)4_IZCD^pY{$|tb2U;9@S$+!y2z%uW#yhm#tAap9@YHhDscRtW@h`3_zQ#9$j zE86_;)THvQ4G+K)cw;kGrhI)_o^qm$*XOunEL>Yj-m`U1hcDN$;9|W3ykK8nvh;Yw z@4=OJX%)nod2-+=TuwWXpoFa?lMTw$3Xxak~A`# zJ&#WYA-m=I#ei{?r`G@!6qD(rWD+PahGGVCqX|A(~UG&y`Pv6Yy>J2XS3Q8VCsMTfVY?_EjE-J;ANJ;!<36| zUhO1uz!}z!V{`Vu*@^b3kuGB1%+fCUIN%~<7T5!im@&~n{QBa#g9HN3TBA)S%=kau zi0e{4swWpHk{!u`L?+QmEK(4OPr9m;$p|v~f7nK{$!5Eqh#065(1*Qs6@$ z-*G7@4vdd39sT|M-~XK{`|P#bPLqc8@_K%^v(GLA6HnK`ZTH`2YWJ1U&?;4?oX%4f zDpjdg;~^6!!7iXx^zsDw3Y?q*aQy^;HUp{-)KkV#SUogY(ptLb$kfJH8X=~FK^&Mt zujfhAt(qWDB%_tt0eOKqTnvuBEyG{}mliBGTB{9faR891QH%2q%2+B1{4hYyctP*^f&d2~ zzA6*y<-_{OSk|~_+ZiG@m?;{eqeO(kvd85Txe6q-2#3@S)`NYDx?Q^Ih|+N6ofGK` zqou~_pmPwLF`r|60hkGd(`giqO(Is9-)U<=^#rNct9f>!>LnM^+hYO2@!cCKfnyk% znNEV?M?YtVZ+Q_Fh7U&SIlzd}y5jMpHyY$}iB=ZcSb~oP`6qFL*I1#NvT;*(RBd0a zvMoxh+oecy+c4-(w>mOxf{8d~up#Ae5sl#Z>Cwhg)8R8@GrbP+f~YIWl-G#U@I^Ew zfaA(ZAyb?ZhYK&&tYB`Ib$mrtPDLVG!wBoxk>Yuo<;Js#>|H0k82MNQibAuuXx159 zlaOv6!9k?*@ueO@8LVaN@TiF5dNl!;zsFoCgXpF;OWl|hLRu^}mL-c}Pa4$(+d z8?FIlD2gGTZ8|dc^sh{|#T3t2{>OZ)nD5o}{I9@JsAP|PGz2D+xX=LbZOYsUdA_!`=M0x^0R<%p8czv2jl~3okX)#Al4$ zWO#JpS*AH3jfhTR?RbMVelzqAY7I=ZuynO*&&IWT>nuX}o#|pihk;D=+Ju!Ovtvxg z87q!=WzA-0YkKk;x5XkqzGIFd3`04sqRA-bjYQPea`&<=@RzXT`tXOcYg8vY|NdY6 zP>NBFNQ>EWI9N0i5fRXAS@UA4p~l?#Eyra zxCjjKXCs43169KSxR;nqRu`4vX(B;uItw#*T4Y~9^-j*Vp; zsXcGwidFkl9eQi!jYm}AzPCQ__vn^uiVj?Is@uQOL>}+ z6OO9ieM*v!rttLn2qt(I`TCbxpVVPT)&h_$IDxX~xuj}RTvNgE$;^u}Q| zaqzgwmFxQmxAu%)PaQ!T?x~@BSU^q09gcNw+Ff5v42b}ZU%W6m9sa(qBCu(d$*TPr z5x&kB)>zBT@OHe|HM$I2Eunb7o_Y~?)w)_-_gE7BnCWZS+%pUY>Gk)b`alEqLd*K0 z4f25VO~Ls#5N>lj?oaETpybYiOg#dc7*5L{+D$dsH9}j5m(JGWsUJI%%yec(x8ggV zMA!S%YJOh&w>3V)PnPBAhomI*Xc6fJnCccdtkWzb_Y#l!IA8{m?zo{|*=a`5_?ULh zuoqTxR|=9JhB1(Jut<`zghS|PI;T)o3&*NFt&Y{Tb{Dd7f%`m1u*ufX`g;*J(|bkL zdQ?eOg~(3TR*Xzl-hI1q+wh~4-OJCL8eMMAzedi;Hb#7%O~T-QqX&HZcuhpwn=-Cn z-ins;J|4%V6f?_?XsoCgXHpI#HNH6v7m=(P842D0Y0B0*EGkyZlq*gmh>e}ighsq< zQuM=r6%o4ZXqdm`oOhF2a+2aZ0r;$;W4>%@tD1Gib9urXQi8Q zE(T`*DQBh`C^uXhi}Qh0h&ZN|rU5N@B+zkD4Wh-}2u1)XazcdRiHZcb@mf}NXz0B! zzU7_4{O}You|Cr<6i!cK7%WP}xl#EU{twShS=|{IsOxVyw3J!foWH@o{k4SO&?<|( zI91a$#!uxP?QtRp%|@>Zv~*!EP1NahB#nfCjG4_QPv}Do&guL7^ThT>se^XXqy$|W zE)s>ntZU-3+YBeibNXR;TjZcLu2+wiQ|ZzkoP(pF)>Y}mW}p@JOha7@GJqhVxMciBN=mtCyYBP~utOavmI4rZO<<-Gy!5EP z-A2~HEZ74RqIf%L0?}p@6BG?thTQs_xcp{bx{_Yqqv)QgAE5O}!8^sUy%7Lh=jX1x z_rUA?n-n{5?MMBW`{(l0X}e1fKvqMZ_@adX{Zu7AS$%G^QYq9*W@wbUYCp1Nv{8*;c>`oD}kv&6>#w&~1~qGDAnnOvFwoMrhZW zi+U@E1q0ph0F?!EGj|@a1hHjI5u_S~%2a=*T@i0n4<5b^Znr1raQo<8Xs3ej>5$bD!Z}Y@bkW$Py|{IrFGoPPq?AJSU{DOZFB_? zq?|^Hh!dAq0J3WFkqDhXxgG6$1P8_`55bIY7|tG)hSXYHFPS7_)e(BuB*k!>ApmzG zQPA$hB7ohA$G|#{#(B^K-9!ea6Pq}po+O~$`$NwimOdeWU(WLcpMSK=>n#~;s@aNQ zuoqM9(45FWdkqvnv`HWkLP5~r6PDf3;_XqJn)Kj1(n9foZvW9#!4hv@Dd4g3P3;W4 z#Bn!xLyxnCjXL4p>`guBS{2*8I;f6WM;3FF5;!%NAXCJOVDTCki&w`346As*!n-Jp ztom<_m7KCT;pF2H(9jiq`rNxrdY2OnypvK~ME2mL$G(=VS0uGi_R;l8UfHEX+n;i_S;RqdNr zZ|`$H(w`#W)Obw#n6A>Z;ovwWYUcg+JHYb}@CEM_}< zj`)v2?t8*u@q*DMs&q&ju$~p z7XobyTZ(YNtxkhjGthLy)g3m_f&6;WAI~_4xngJc&fM!9-XCiK$+wf_G4dprOL_qt zR^AzB=QHGU;lH0CUw?{MN$l_!NWw{+89vs*{F-cvWG>#LPG9_5V}N`45by?(h6`t`R> zB}rWJOz){=OB;ZDJ>3u!(-0dQ(_kRMwCxj3Si9OT47Sp5JS=6CfU6K(M{?+Nj2--^b?7UEX9k2nJyalYZYsb7BZ>1Y@fUfxCV2U{q;H_cIHx(Pr(M>Q=V-J! zau%h=J6m0o%sg`Cj2o%BhT*-ckqpsLW`6#9DO;xN$w*o=ELPmU6H>(U@en6^`BqA_ z5E>x*#YDfeuFD;W!L9~JkHA(y7xJO-i<_^&tdfR?qkFeLx?#_A|vE%GK(q|D(KqPHo<7`y~qzr*lF1rEYkm)CK zU|<;yrurnt9zXb_A#7lp+dt#@(M*Va)=TDA^f_QBbwzvrB9m;eXZe$|W8_cK(eO!t zK`Rf%TDZ8G7i%e47q@Fb6w_x$3Ngoy6{1g%tXOe6AvA+2h0OwEwT5@AzhAZ0B|>2a zs-Y1A3w)tKSBrFtc|5{x7#q?@h{CU!qh@6sd*DQ;ltm z&)vpHoAPG=V)4N4k1Bz5(eKb`;;z z!G9<6GwU^t0HrKcK~BIzM)9 zcw|^fd1!omnSZE~9}!Ye8Xy2>>-ZcEmt(ipu}x`DoyEao@w%TM^_S0fuRNy#l-JWc zQ*(wh6EmA)oTaYXL0N%n>vWc0uVBssS?+UZOV5wH-6%qpG zWCZ|}g0Senw7y}vVN+3_)-NUo!3>GXEe{d{6#aBw7kg8&mX9;4E{%EE5R|ojbH%8d zUD2pjb;R^{S$zO$>m)gfaBA7eS-Wp>LL8Mi*#YBEcR9E>I3DSn{R~Q=aQG|DEq{OB4*BEk6?N`=xJQ=7r+1Q+MVo!cuL@SbAW=5Fh zK<#VNhU^nJE*NTm+i`9~5c=Sr;PK@2fUYR7PzUc+39FOqSxyB~>p1KH2Kgwo%tQvY z+Y>)8Bz>;gAp$51KpiyB+}&|4wf#7uW10Z8f$?!+((1f;zW#TWQ|XXal`*_KvDSYd zzSdal+_A;if6nVFa?+)`tcrP~WnHT(Z|-jx;6SE{l-O!{boe3K25y#mdztihUtij- z&1Gu&!N4d#Ym|eMh;>+yvLA8u>)wqtqK_S2f%z~xoV_O8F6e)PCleT6x*Mx(a0PgQjs%s1~PQe1t~7|kn%V$a$lw_KYEMLefdoeJ6hqAb?3)uGm`2SkpB}7auZ>w=0nW+QV{AnVEEER1C zJFCGbO*>JRbGxrka%+2;W?x;v&p$4xJ!e5$=FLjP9>pT;abijN zkzFgCQq?qZZdo+N(=CDc)PR;blx(9cb!1ynIUC6*57mmAkN6Drv6 zmWw7%d~Dw6qC=9x=y92jQA7?k+W1iH7Ti&1*o5kz7@pw2-ea%9F6Xuv*53t2>ok$_ zhC+0uKlGC9$<}JB^HW`iPthWCrg(#LtDp(7Y2Q#F3yWyVkWAR6TV zWL)MOjB7H7q|F1v`fonEe&vetH~YhGD1KmR=gQkApxA}M;LSg)93z2K0=iTYWLf8+JIBq7$;-KmXV@2cC6vaU*$i2s{wWA3=J zCcpm~fI>@Ofd1-}r$e?4Cv6D1&F z;m9@sfjWJ&pC!mxW_vspgGJ6}etN#%fy`MX7L$71c3B2X(0`KvPB;G9<>t1_>Yf#O z06_PU28pjt$HDLwkEC|QdlqCQA@hG-`P9sgZGA#6rFhRgHb=^SmKb#^p*VkxpVpGO z;pNc5jjNd{d1^^b+-BEkss+p`X9t|Z6)!*InRoi{RzYfvLX>%VQof`) z+I;iY6ZPly+D8Ke3f5k|Y+G7bqKGy}-Z)co@U=;J5@*sS6bu&@5|AJxqbZp?ESCd7 zWd^WxQ_IRd8ycR~$jZnaPDvTf%@d9Er{qc}10sEe2x7-OEakf}_#v-+)uV>0$6Z}D zkD*#`#sE7wWIUvsDI|AtQcC8O!-&EXx24I{%0JP(wR^%@|Ib$@nM*U2x<>&uo0`>^ z%q?vrUnAc!AJ{T7C_U)Oqg%iz$*s07hvVrty^4nK>a-7sR>6aEzq-42^`?59Mvf{U zsy2XWKCOlM3uBq*1M0F1jSS6C4>gi5$AviL2=ZMDxdhFv+ji{O4j>|@s_h~}i~ySfeKSL@cEtv|2P&iC{vn6O@8Y!YyLGBW`{kde_9 zBk0M^1%NL$17y&oIohx6+zF*TbEP24D1V(afIg-4Zbo8(D5XCyZFiA_G}xO2XAd|C z)|)zR!bkt*dU#`3%;4d*@D_v~?TD)vMscPOrkM&mf`CM8BZ@=dgb#$LAL;vMiIHQkf)wh05Kssb`I)xq>4$oQr9pnx=?-C_ zN13XClpdvlx~%5p?=MyY^4rwoy80*G-3qpRXD1cp8vB!lBQhBRE}xf&b_P1esgBrm z@`z#ON_fv*9hq^w%(bhm2v^3nvdQjYxmp5acaPSTPw||q=!o`cn$2ZsfaGw!e!k*d zBBEOP^*v1vo|O)N@=9xO8I6hq`uLPa=Q!OG$GMGpYJM20y&jsnjBlO`tweB*JFqmP zZEsAidOUbmtMfTia(1Nk=Z3;AcjOG?5$t;0!I!rvVZvXz8YuC1G3+l~4A77U_Zd7UBby`i+i3|a> zONwuKs;_hbY;Lc+e%ImLcQ-ZO48hCrcVVm1D9!M0WOtfB2po+2Mv1%d&Zwp@haDI> zFm~X8=W@EV<;*(-5c*xDx>XU|=M@=_;rF-QGe+ct9|2asNr&aQ1hA7>OIq06uO?qi zgvWScnfzp$UzETo@wXT5EciPZs{bg$@Wx1XYu)fzq{fVt?8V{cKuUeH4g9W>>)~Y? zg7>!a&PcFWb%-zWHZM%JnBfS2xTxjuOt$6n-~)PCItz5Pbh)~tMb&|fS~Iz66r@VC zu8{?8pqBP&FwmvdR`@1^u5V3?aP`^2Llj$Rt-V#*%F@*++U}MHJ<1XiL$scLa_kxq z7R!kg5D+JCc5VjMl~P|YZB2y4Sfcb%(k&HX^~%yRD|=|IE#=U&gIE1`w9lYx!doq^ zhGI0OeGOD-BTKUmQe_%tCTG-E2NvD@8G`?33fCWNq8y!5BE-L9Fzg=$SdnL3yF?Hq zcjY*j#*pZN8-ix9>2%{uTHfkcm7SYWSju;f_rCCK%Om>Ch({W%Pprtl(bQBhf5K2_ zy)C5n4mPwTq%ov;giKmX)Tj%rhNCQ6eQQ^{&+XgR@E*M+!O9qKqt9e_ih;tP9{1xj z%!-Ff@T~9`doCAwyAcu|*5S_)o;1ud?Ec=K2ppfJs(wa|jrU&(h25U|j|J)+aV+VDdWOM*bYUYAIipcX)~4cIP6bXk2SJIZ}xwuw_!$xTQ=z~$rOXwVTFzv@lx#y?-Y$N73?%rD1m zo-a(yIG;3&n z)5nYqpm(Otw;1sg6RKT0zJ0ryLMgYSwQ9bp{6$}1`Ac)HT#4u(Bkx!~} zW-6;xJ6WXF0wq*cJ}xS)urJe-A@&OkOEISBq@fUS8zjceFce!#tR@l)iTep7gmo%J zdO+iH*94y&FuwC@2z#JpA_Hh!r<-GAo8#jN3?!Ha(A&6lIyV3c1`jifp%Zw*Br*vB_s&y#SvdFas_sOP^s_ihU~_0!CukM@8ht|K0@yx zAwSkF8D(3*d7&8X1>1++Vt}0Qw=y`gII>0?%t1%XW2?5bZHP6J_GQ}J>Kz=REw1H6 zKS>@tl$-6F;o}}g;9{MVQv$<-(n1769w9uYCs{*`U$KfD1v-oENV3H}qMfwj5gvw1 zuf-$(&itQoaJx0dlk(Kr)cGl8RaoAkntTPcP)C?@Ys4<2fJTxJmcn?@&Y~YVsUV zQPYz-vRy^RvYqBCE>60g#YM7RT}HI1Lxa^35y2(ukjSXwkfe%ylGMeaQ2@^epijE_ z`nXNZ_Qc$?!hXD9gDU!~vDMnf+ctO$I>0_I6otGWDD#$EV&iTj5Dzy0P)mOHClCTq znWHG9gTpv#Gzis9vPr59sebhmP5cR^&zyx=KCz5@{My&7IYl{uZt!q@>Jma4Lcjar z#&h-N1cXr|;B%>%0SAWxOf3!y8Y!XLMxBh!QAGq?+G@=~4>*AIMV-bwSQ4bjV#-32 z!sP9xt)X@HP*;q#?4JEn!(a~1BQDX8=`V1Pb*K8^2neUhC@){{7;g@*$`>8)XN{KH zS^QH)2xe>{T4LjFs^+$qm2ELmWzYU^%Y$Y^AUbT=-TIcTHU{GDz;a+*8_7d7XU2~I z%zF5OpK~?T9msqc^*Q~(sEldbVwAhzY$lNQal$boSc;Be9Tqm1&e>!sK&NZQrZ)KH zjm1lMnOT?E)lfSx+SufgzRW(IQFa4!h&|?XTF35AjT{GK)_D|WVgiGzf-Fl~MI(R+ z>NR`BoAf5GDDqjGd9B+mwMp?BEe>%m+TQj@S_8+8S*aRrqb4yytHmRy{7&iJ6c0yN zeY}_)RJs(oqfGneIvP`WfvSy(;z9k}iY>{B13B4={aeHpZ#y5TNub2H&bW?!&RDlV zZu`e1= z*eP%EjUE@R^5U+JPZU&yhgXPp{4seSlkcAw=aJ%c%M(kej4}FNUjJx%?gkN_$a~^z_~GlrF@U@InGur?Iq!B;71Iy>kVcEfcZ8= z$bzL=aCckn_4{S*?FvTP=)+S}5g%REUi3A?9EPYIEk`)*8}PpxPv;Pk59`UcxcZPB z{-^YCxlO@K#;^3n2|A@x0$M&h+foAJYey?`A=xS*fULM-U=>=C@5@mHo0bs_roFf@ zxpvE0=qE}YWs+i$+=DAZKh0UHt6lzm`DKB> zcCPxJxu3QAfPdg}@pkEW=6^PjSsZ)Y*8n|wv|*XajJ?n3SzDC+)2dYpuqB0N{XIxt zP{ViT$9Aq8TmRSl_jr3_>tef|BWfZ9oh888G3S`U(g#MeX6^>9c3n z)j0|vx?n+QBUl6k;KjYd$OF}E3Pt$)0}lRt5w`w>pTlt<{wAPKd8c+++jMxRfL}>NIC{nxIODM+!Q%k+IqtStjWyj zUbj_#npti6latd`@``%ONeDPesb4|9ns59ka$`L$kk4JRlZ9-_X4B~k1`mLJlHNTsI-DBR`%69>Bv%L0sbzuAleb#p{lc~Fmp#h zl>ZkDm9p5^IqKa{Z@3)KVf_g>@*g4IDr0eIqZbVSIPC$~Kx04y^#*X|E})$22%S+i zs8!Mzwj<>c8FeTxmVxQ(BGErPp7bw_aZ^<|eO~RdJ`V}>b@8fymIJL)yX*6Sv2|mg zrB!Ng`huFN&x6s|#fSb`z$()?xuyYXj2$p`z?cEp`5Hkrra}M^UsaL;P17ZM&KmQ= zn5x9{i!?e3Dq8XZQ=C0oPE~GmFK-FfEa|&JpjJr}vnNS%Tn$;Xp^^v?6*VV8(&Z#o zD^2&W=j``H)t6eDrei`~FRlQ8yA+oP12jvp2swR)zQSGwRWpU%dPTUB)SXM=|LO(v zBfMbVH|~a|zL8h-As0LwYqn<@db=n0mgwTlZ~y8~J)JM3I(^aq_1v!aMKA;OjY;g( zf2CQweP(%{UG=%1g91I72W{Fw2UB_d7YfnTw&y|Gz~-rfvz`c|4K3w~jE~+n2TQl9 z>E}V(Km}VxLk$9LfGb@H(9dGpS2nc+t*1RGasL^uAM;2AXgw#ct5EGg>osBt#pjcU zR>IL&B8bvkK_eE8szrd-1L&ClQOKAXj$E9>|NkbG&>g665X&X@&eTsG*B1@E1RK}A z5$Mf{WYm%F)uyZm#JWEp{VzBB$85WMwW}lhBojc(uKh32*5%lQe7j_00RX;y=*@2H zPfXOmrGJ0r-3z4$v!+pefS)_%>@QvX9~`5v@e>o*YtTK~cmKQVASPoL;+kToO+>Hv z?{k-&@WhPMj83>}!JIRBo(>Hr?DHQcD3MAcjAf8_g^t#A5GkDq9={E0MS@0{LhMMa zOHq0^t`evnO_L`5W;97K;}o6)<5z;#@PA{YgDOO0Mj|myIZ*b#7|MH1uF(of+dCLv zRKM!c8nW*uLW*ZLd!!}LlM^2w2L?i=%mMVy0Tef4Gh&%wfx+*ki@g328ZO$|I4}b} z!}fp$22V&94I`%Mk%ftLD$cdkq%<%vF;>~0!>Dz@nd#D~B-NSTGW}_pATq<~iBanT zFP--&8{A_8SSe*LAEb4sEoOi)sZWw%CSCaJn697u-56VfES@i@p!ZQ4iS4QSpiA+EX&UmO=sbMJE&!>Sc9FNSu&SP|?sibK%O(a_&5M^1|Sa={!EiL5OlP`bS~iEv<3~hBMaRU( z#U}`aqQoR|a!P7idWIxZnw6cCn>Vi2N+%6@r%j)3>uuHLkru{Cqc=x~^PaUfSp5C_ z7JFQE*(Em_?DXi`E$v} W$IF>aZ(pA4YyQ(diTN+Q1ONc)ajHWA diff --git a/web/src/App.tsx b/web/src/App.tsx index b07608c311..74d225b497 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -6,6 +6,9 @@ import { Sparkles, Terminal, Globe, Database, Shield, Wrench, Zap, Heart, Star, Code, Eye, } from "lucide-react"; +import { SelectionSwitcher } from "@nous-research/ui/ui/components/selection-switcher"; +import { cn } from "@/lib/utils"; +import { Backdrop } from "@/components/Backdrop"; import StatusPage from "@/pages/StatusPage"; import ConfigPage from "@/pages/ConfigPage"; import EnvPage from "@/pages/EnvPage"; @@ -20,17 +23,6 @@ import { useI18n } from "@/i18n"; import { usePlugins } from "@/plugins"; import type { RegisteredPlugin } from "@/plugins"; -// --------------------------------------------------------------------------- -// Built-in nav items -// --------------------------------------------------------------------------- - -interface NavItem { - path: string; - label: string; - labelKey?: string; - icon: React.ComponentType<{ className?: string }>; -} - const BUILTIN_NAV: NavItem[] = [ { path: "/", labelKey: "status", label: "Status", icon: Activity }, { path: "/sessions", labelKey: "sessions", label: "Sessions", icon: MessageSquare }, @@ -42,11 +34,8 @@ const BUILTIN_NAV: NavItem[] = [ { path: "/env", labelKey: "keys", label: "Keys", icon: KeyRound }, ]; -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -/** Map of icon names plugins can use. Covers common choices without importing all of lucide. */ +// Plugins can reference any of these by name in their manifest — keeps bundle +// size sane vs. importing the full lucide-react set. const ICON_MAP: Record> = { Activity, BarChart3, Clock, FileText, KeyRound, MessageSquare, Package, Settings, Puzzle, @@ -54,12 +43,10 @@ const ICON_MAP: Record> = { Wrench, Zap, Heart, Star, Code, Eye, }; -/** Resolve a Lucide icon name to a component, fallback to Puzzle. */ function resolveIcon(name: string): React.ComponentType<{ className?: string }> { return ICON_MAP[name] ?? Puzzle; } -/** Insert plugin nav items at the position specified in their manifest. */ function buildNavItems(builtIn: NavItem[], plugins: RegisteredPlugin[]): NavItem[] { const items = [...builtIn]; @@ -89,10 +76,6 @@ function buildNavItems(builtIn: NavItem[], plugins: RegisteredPlugin[]): NavItem return items; } -// --------------------------------------------------------------------------- -// App -// --------------------------------------------------------------------------- - export default function App() { const { t } = useI18n(); const { plugins } = usePlugins(); @@ -103,15 +86,26 @@ export default function App() { ); return ( -
-
-
+
+ + -
-
-
- - Hermes Agent +
+
+
+ + Hermes +
+ Agent
@@ -122,22 +116,36 @@ export default function App() { to={path} end={path === "/"} className={({ isActive }) => - `group relative inline-flex items-center gap-1 sm:gap-1.5 border-r border-border px-2.5 sm:px-4 py-2 font-display text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em] uppercase whitespace-nowrap transition-colors cursor-pointer shrink-0 focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring ${ + cn( + "group relative inline-flex items-center gap-1.5 shrink-0", + "border-r border-current/20 px-2.5 sm:px-4 py-2", + "font-mondwest text-[0.65rem] sm:text-[0.8rem] tracking-[0.12em]", + "whitespace-nowrap transition-colors cursor-pointer", + "focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-midground", isActive - ? "text-foreground" - : "text-muted-foreground hover:text-foreground" - }` + ? "text-midground" + : "opacity-60 hover:opacity-100", + ) } > {({ isActive }) => ( <> - + {labelKey ? (t.app.nav as Record)[labelKey] ?? label : label} - + + + {isActive && ( - + )} )} @@ -145,17 +153,17 @@ export default function App() { ))} -
+
- + {t.app.webUi}
-
+
} /> } /> @@ -166,7 +174,6 @@ export default function App() { } /> } /> - {/* Plugin routes */} {plugins.map(({ manifest, component: PluginComponent }) => (
-